1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx -mattr=+avx2 | FileCheck %s
3
4;void Distance(float *p1, int p2, unsigned long p3[], float p4[]) {
5;  long a = p3[0] = 5;
6;  p1 += p2;
7;  p4[3] += p1[a];
8;  p3[0] >>= 5;
9;  p3[1] >>= 5;
10;  p3[2] >>= 5;
11;  p3[3] >>= 5;
12;  p1 += p2;
13;  p4[0] += p1[p3[0] & a];
14;}
15
16define void @_Z8DistanceIlLi5EEvPfiPmS0_(float* %p1, i32 %p2, i64* %p3, float* %p4) {
17; CHECK-LABEL: @_Z8DistanceIlLi5EEvPfiPmS0_(
18; CHECK-NEXT:  entry:
19; CHECK-NEXT:    store i64 5, i64* [[P3:%.*]], align 8
20; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
21; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[P1:%.*]], i64 [[IDX_EXT]]
22; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 5
23; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX1]], align 4
24; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[P4:%.*]], i64 3
25; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
26; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
27; CHECK-NEXT:    store float [[ADD]], float* [[ARRAYIDX2]], align 4
28; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
29; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8
30; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
31; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
32; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8
33; CHECK-NEXT:    [[ADD_PTR11:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 [[IDX_EXT]]
34; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
35; CHECK-NEXT:    [[AND:%.*]] = and i64 [[TMP6]], 5
36; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[ADD_PTR11]], i64 [[AND]]
37; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX13]], align 4
38; CHECK-NEXT:    [[TMP8:%.*]] = load float, float* [[P4]], align 4
39; CHECK-NEXT:    [[ADD15:%.*]] = fadd float [[TMP7]], [[TMP8]]
40; CHECK-NEXT:    store float [[ADD15]], float* [[P4]], align 4
41; CHECK-NEXT:    ret void
42;
43entry:
44  store i64 5, i64* %p3, align 8
45  %idx.ext = sext i32 %p2 to i64
46  %add.ptr = getelementptr inbounds float, float* %p1, i64 %idx.ext
47  %arrayidx1 = getelementptr inbounds float, float* %add.ptr, i64 5
48  %0 = load float, float* %arrayidx1, align 4
49  %arrayidx2 = getelementptr inbounds float, float* %p4, i64 3
50  %1 = load float, float* %arrayidx2, align 4
51  %add = fadd float %0, %1
52  store float %add, float* %arrayidx2, align 4
53  %2 = load i64, i64* %p3, align 8
54  %shr = lshr i64 %2, 5
55  store i64 %shr, i64* %p3, align 8
56  %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 1
57  %3 = load i64, i64* %arrayidx4, align 8
58  %shr5 = lshr i64 %3, 5
59  store i64 %shr5, i64* %arrayidx4, align 8
60  %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 2
61  %4 = load i64, i64* %arrayidx6, align 8
62  %shr7 = lshr i64 %4, 5
63  store i64 %shr7, i64* %arrayidx6, align 8
64  %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 3
65  %5 = load i64, i64* %arrayidx8, align 8
66  %shr9 = lshr i64 %5, 5
67  store i64 %shr9, i64* %arrayidx8, align 8
68  %add.ptr11 = getelementptr inbounds float, float* %add.ptr, i64 %idx.ext
69  %and = and i64 %shr, 5
70  %arrayidx13 = getelementptr inbounds float, float* %add.ptr11, i64 %and
71  %6 = load float, float* %arrayidx13, align 4
72  %7 = load float, float* %p4, align 4
73  %add15 = fadd float %6, %7
74  store float %add15, float* %p4, align 4
75  ret void
76}
77
78define void @store_reverse(i64* %p3) {
79; CHECK-LABEL: @store_reverse(
80; CHECK-NEXT:  entry:
81; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[P3:%.*]], i64 8
82; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 4
83; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
84; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 8
85; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[ARRAYIDX1]] to <4 x i64>*
86; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8
87; CHECK-NEXT:    [[TMP4:%.*]] = shl <4 x i64> [[TMP1]], [[TMP3]]
88; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
89; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[ARRAYIDX14]] to <4 x i64>*
90; CHECK-NEXT:    store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP5]], align 8
91; CHECK-NEXT:    ret void
92;
93entry:
94  %0 = load i64, i64* %p3, align 8
95  %arrayidx1 = getelementptr inbounds i64, i64* %p3, i64 8
96  %1 = load i64, i64* %arrayidx1, align 8
97  %shl = shl i64 %0, %1
98  %arrayidx2 = getelementptr inbounds i64, i64* %p3, i64 7
99  store i64 %shl, i64* %arrayidx2, align 8
100  %arrayidx3 = getelementptr inbounds i64, i64* %p3, i64 1
101  %2 = load i64, i64* %arrayidx3, align 8
102  %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 9
103  %3 = load i64, i64* %arrayidx4, align 8
104  %shl5 = shl i64 %2, %3
105  %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 6
106  store i64 %shl5, i64* %arrayidx6, align 8
107  %arrayidx7 = getelementptr inbounds i64, i64* %p3, i64 2
108  %4 = load i64, i64* %arrayidx7, align 8
109  %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 10
110  %5 = load i64, i64* %arrayidx8, align 8
111  %shl9 = shl i64 %4, %5
112  %arrayidx10 = getelementptr inbounds i64, i64* %p3, i64 5
113  store i64 %shl9, i64* %arrayidx10, align 8
114  %arrayidx11 = getelementptr inbounds i64, i64* %p3, i64 3
115  %6 = load i64, i64* %arrayidx11, align 8
116  %arrayidx12 = getelementptr inbounds i64, i64* %p3, i64 11
117  %7 = load i64, i64* %arrayidx12, align 8
118  %shl13 = shl i64 %6, %7
119  %arrayidx14 = getelementptr inbounds i64, i64* %p3, i64 4
120  store i64 %shl13, i64* %arrayidx14, align 8
121  ret void
122}
123
124define void @store15(float* %p1, i32 %p2, i64* %p3, float* %p4) {
125; CHECK-LABEL: @store15(
126; CHECK-NEXT:  entry:
127; CHECK-NEXT:    store i64 5, i64* [[P3:%.*]], align 8
128; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
129; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[P1:%.*]], i64 [[IDX_EXT]]
130; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 5
131; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX1]], align 4
132; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[P4:%.*]], i64 3
133; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
134; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
135; CHECK-NEXT:    store float [[ADD]], float* [[ARRAYIDX2]], align 4
136; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 5
137; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
138; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
139; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
140; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
141; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
142; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
143; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
144; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
145; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
146; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
147; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
148; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
149; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
150; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
151; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
152; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
153; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8
154; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
155; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
156; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8
157; CHECK-NEXT:    ret void
158;
159entry:
160  store i64 5, i64* %p3, align 8
161  %idx.ext = sext i32 %p2 to i64
162  %add.ptr = getelementptr inbounds float, float* %p1, i64 %idx.ext
163  %arrayidx1 = getelementptr inbounds float, float* %add.ptr, i64 5
164  %0 = load float, float* %arrayidx1, align 4
165  %arrayidx2 = getelementptr inbounds float, float* %p4, i64 3
166  %1 = load float, float* %arrayidx2, align 4
167  %add = fadd float %0, %1
168  store float %add, float* %arrayidx2, align 4
169  %2 = load i64, i64* %p3, align 8
170  %shr = lshr i64 %2, 5
171  store i64 %shr, i64* %p3, align 8
172  %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 1
173  %3 = load i64, i64* %arrayidx4, align 8
174  %shr5 = lshr i64 %3, 5
175  store i64 %shr5, i64* %arrayidx4, align 8
176  %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 2
177  %4 = load i64, i64* %arrayidx6, align 8
178  %shr7 = lshr i64 %4, 5
179  store i64 %shr7, i64* %arrayidx6, align 8
180  %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 3
181  %5 = load i64, i64* %arrayidx8, align 8
182  %shr9 = lshr i64 %5, 5
183  %arrayidx9 = getelementptr inbounds i64, i64* %p3, i64 5
184  store i64 5, i64* %arrayidx9, align 8
185  store i64 5, i64* %arrayidx9, align 8
186  store i64 5, i64* %arrayidx9, align 8
187  store i64 5, i64* %arrayidx9, align 8
188  store i64 5, i64* %arrayidx9, align 8
189  store i64 5, i64* %arrayidx9, align 8
190  store i64 5, i64* %arrayidx9, align 8
191  store i64 5, i64* %arrayidx9, align 8
192  store i64 5, i64* %arrayidx9, align 8
193  store i64 5, i64* %arrayidx9, align 8
194  store i64 5, i64* %arrayidx9, align 8
195  store i64 5, i64* %arrayidx9, align 8
196  store i64 5, i64* %arrayidx9, align 8
197  store i64 5, i64* %arrayidx9, align 8
198  store i64 5, i64* %arrayidx9, align 8
199  store i64 %shr9, i64* %arrayidx8, align 8
200  ret void
201}
202
203define void @store16(float* %p1, i32 %p2, i64* %p3, float* %p4) {
204; CHECK-LABEL: @store16(
205; CHECK-NEXT:  entry:
206; CHECK-NEXT:    store i64 5, i64* [[P3:%.*]], align 8
207; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
208; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[P1:%.*]], i64 [[IDX_EXT]]
209; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 5
210; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX1]], align 4
211; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[P4:%.*]], i64 3
212; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
213; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
214; CHECK-NEXT:    store float [[ADD]], float* [[ARRAYIDX2]], align 4
215; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 5
216; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
217; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
218; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
219; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
220; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
221; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
222; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
223; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
224; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
225; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
226; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
227; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
228; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
229; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
230; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
231; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
232; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
233; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8
234; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
235; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
236; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8
237; CHECK-NEXT:    ret void
238;
239entry:
240  store i64 5, i64* %p3, align 8
241  %idx.ext = sext i32 %p2 to i64
242  %add.ptr = getelementptr inbounds float, float* %p1, i64 %idx.ext
243  %arrayidx1 = getelementptr inbounds float, float* %add.ptr, i64 5
244  %0 = load float, float* %arrayidx1, align 4
245  %arrayidx2 = getelementptr inbounds float, float* %p4, i64 3
246  %1 = load float, float* %arrayidx2, align 4
247  %add = fadd float %0, %1
248  store float %add, float* %arrayidx2, align 4
249  %2 = load i64, i64* %p3, align 8
250  %shr = lshr i64 %2, 5
251  store i64 %shr, i64* %p3, align 8
252  %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 1
253  %3 = load i64, i64* %arrayidx4, align 8
254  %shr5 = lshr i64 %3, 5
255  store i64 %shr5, i64* %arrayidx4, align 8
256  %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 2
257  %4 = load i64, i64* %arrayidx6, align 8
258  %shr7 = lshr i64 %4, 5
259  store i64 %shr7, i64* %arrayidx6, align 8
260  %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 3
261  %5 = load i64, i64* %arrayidx8, align 8
262  %shr9 = lshr i64 %5, 5
263  %arrayidx9 = getelementptr inbounds i64, i64* %p3, i64 5
264  store i64 5, i64* %arrayidx9, align 8
265  store i64 5, i64* %arrayidx9, align 8
266  store i64 5, i64* %arrayidx9, align 8
267  store i64 5, i64* %arrayidx9, align 8
268  store i64 5, i64* %arrayidx9, align 8
269  store i64 5, i64* %arrayidx9, align 8
270  store i64 5, i64* %arrayidx9, align 8
271  store i64 5, i64* %arrayidx9, align 8
272  store i64 5, i64* %arrayidx9, align 8
273  store i64 5, i64* %arrayidx9, align 8
274  store i64 5, i64* %arrayidx9, align 8
275  store i64 5, i64* %arrayidx9, align 8
276  store i64 5, i64* %arrayidx9, align 8
277  store i64 5, i64* %arrayidx9, align 8
278  store i64 5, i64* %arrayidx9, align 8
279  store i64 5, i64* %arrayidx9, align 8
280  store i64 %shr9, i64* %arrayidx8, align 8
281  ret void
282}
283
284