1; Test stores of element-swapped vector elements.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
4
5; Test v16i8 stores.
6define void @f1(<16 x i8> %val, <16 x i8> *%ptr) {
7; CHECK-LABEL: f1:
8; CHECK: vstbrq %v24, 0(%r2)
9; CHECK: br %r14
10  %swap = shufflevector <16 x i8> %val, <16 x i8> undef,
11                        <16 x i32> <i32 15, i32 14, i32 13, i32 12,
12                                    i32 11, i32 10, i32 9, i32 8,
13                                    i32 7, i32 6, i32 5, i32 4,
14                                    i32 3, i32 2, i32 1, i32 0>
15  store <16 x i8> %swap, <16 x i8> *%ptr
16  ret void
17}
18
19; Test v8i16 stores.
20define void @f2(<8 x i16> %val, <8 x i16> *%ptr) {
21; CHECK-LABEL: f2:
22; CHECK: vsterh %v24, 0(%r2)
23; CHECK: br %r14
24  %swap = shufflevector <8 x i16> %val, <8 x i16> undef,
25                        <8 x i32> <i32 7, i32 6, i32 5, i32 4,
26                                   i32 3, i32 2, i32 1, i32 0>
27  store <8 x i16> %swap, <8 x i16> *%ptr
28  ret void
29}
30
31; Test v4i32 stores.
32define void @f3(<4 x i32> %val, <4 x i32> *%ptr) {
33; CHECK-LABEL: f3:
34; CHECK: vsterf %v24, 0(%r2)
35; CHECK: br %r14
36  %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
37                        <4 x i32> <i32 3, i32 2, i32 1, i32 0>
38  store <4 x i32> %swap, <4 x i32> *%ptr
39  ret void
40}
41
42; Test v2i64 stores.
43define void @f4(<2 x i64> %val, <2 x i64> *%ptr) {
44; CHECK-LABEL: f4:
45; CHECK: vsterg %v24, 0(%r2)
46; CHECK: br %r14
47  %swap = shufflevector <2 x i64> %val, <2 x i64> undef,
48                        <2 x i32> <i32 1, i32 0>
49  store <2 x i64> %swap, <2 x i64> *%ptr
50  ret void
51}
52
53; Test v4f32 stores.
54define void @f5(<4 x float> %val, <4 x float> *%ptr) {
55; CHECK-LABEL: f5:
56; CHECK: vsterf %v24, 0(%r2)
57; CHECK: br %r14
58  %swap = shufflevector <4 x float> %val, <4 x float> undef,
59                        <4 x i32> <i32 3, i32 2, i32 1, i32 0>
60  store <4 x float> %swap, <4 x float> *%ptr
61  ret void
62}
63
64; Test v2f64 stores.
65define void @f6(<2 x double> %val, <2 x double> *%ptr) {
66; CHECK-LABEL: f6:
67; CHECK: vsterg %v24, 0(%r2)
68; CHECK: br %r14
69  %swap = shufflevector <2 x double> %val, <2 x double> undef,
70                        <2 x i32> <i32 1, i32 0>
71  store <2 x double> %swap, <2 x double> *%ptr
72  ret void
73}
74
75; Test the highest aligned in-range offset.
76define void @f7(<4 x i32> %val, <4 x i32> *%base) {
77; CHECK-LABEL: f7:
78; CHECK: vsterf %v24, 4080(%r2)
79; CHECK: br %r14
80  %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
81  %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
82                        <4 x i32> <i32 3, i32 2, i32 1, i32 0>
83  store <4 x i32> %swap, <4 x i32> *%ptr
84  ret void
85}
86
87; Test the highest unaligned in-range offset.
88define void @f8(<4 x i32> %val, i8 *%base) {
89; CHECK-LABEL: f8:
90; CHECK: vsterf %v24, 4095(%r2)
91; CHECK: br %r14
92  %addr = getelementptr i8, i8 *%base, i64 4095
93  %ptr = bitcast i8 *%addr to <4 x i32> *
94  %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
95                        <4 x i32> <i32 3, i32 2, i32 1, i32 0>
96  store <4 x i32> %swap, <4 x i32> *%ptr, align 1
97  ret void
98}
99
100; Test the next offset up, which requires separate address logic,
101define void @f9(<4 x i32> %val, <4 x i32> *%base) {
102; CHECK-LABEL: f9:
103; CHECK: aghi %r2, 4096
104; CHECK: vsterf %v24, 0(%r2)
105; CHECK: br %r14
106  %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
107  %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
108                        <4 x i32> <i32 3, i32 2, i32 1, i32 0>
109  store <4 x i32> %swap, <4 x i32> *%ptr
110  ret void
111}
112
113; Test negative offsets, which also require separate address logic,
114define void @f10(<4 x i32> %val, <4 x i32> *%base) {
115; CHECK-LABEL: f10:
116; CHECK: aghi %r2, -16
117; CHECK: vsterf %v24, 0(%r2)
118; CHECK: br %r14
119  %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
120  %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
121                        <4 x i32> <i32 3, i32 2, i32 1, i32 0>
122  store <4 x i32> %swap, <4 x i32> *%ptr
123  ret void
124}
125
126; Check that indexes are allowed.
127define void @f11(<4 x i32> %val, i8 *%base, i64 %index) {
128; CHECK-LABEL: f11:
129; CHECK: vsterf %v24, 0(%r3,%r2)
130; CHECK: br %r14
131  %addr = getelementptr i8, i8 *%base, i64 %index
132  %ptr = bitcast i8 *%addr to <4 x i32> *
133  %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
134                        <4 x i32> <i32 3, i32 2, i32 1, i32 0>
135  store <4 x i32> %swap, <4 x i32> *%ptr, align 1
136  ret void
137}
138
139