1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -slp-vectorizer -slp-threshold=-999 -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s
3
4declare i64 @may_inf_loop_ro() nounwind readonly
5declare i64 @may_inf_loop_rw() nounwind
6declare i64 @may_throw() willreturn
7
8; Base case with no interesting control dependencies
9define void @test_no_control(i64* %a, i64* %b, i64* %c) {
10; CHECK-LABEL: @test_no_control(
11; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
12; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
13; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
14; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 4
15; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
16; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
17; CHECK-NEXT:    store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4
18; CHECK-NEXT:    ret void
19;
20  %v1 = load i64, i64* %a
21  %a2 = getelementptr i64, i64* %a, i32 1
22  %v2 = load i64, i64* %a2
23
24  %c1 = load i64, i64* %c
25  %ca2 = getelementptr i64, i64* %c, i32 1
26  %c2 = load i64, i64* %ca2
27  %add1 = add i64 %v1, %c1
28  %add2 = add i64 %v2, %c2
29
30  store i64 %add1, i64* %b
31  %b2 = getelementptr i64, i64* %b, i32 1
32  store i64 %add2, i64* %b2
33  ret void
34}
35
36define void @test1(i64* %a, i64* %b, i64* %c) {
37; CHECK-LABEL: @test1(
38; CHECK-NEXT:    [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4
39; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
40; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
41; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
42; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
43; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1
44; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
45; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
46; CHECK-NEXT:    store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4
47; CHECK-NEXT:    ret void
48;
49  %v1 = load i64, i64* %a
50  %a2 = getelementptr i64, i64* %a, i32 1
51  %v2 = load i64, i64* %a2
52
53  %c1 = load i64, i64* %c
54  %c2 = call i64 @may_inf_loop_ro()
55  %add1 = add i64 %v1, %c1
56  %add2 = add i64 %v2, %c2
57
58  store i64 %add1, i64* %b
59  %b2 = getelementptr i64, i64* %b, i32 1
60  store i64 %add2, i64* %b2
61  ret void
62}
63
64define void @test2(i64* %a, i64* %b, i64* %c) {
65; CHECK-LABEL: @test2(
66; CHECK-NEXT:    [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4
67; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
68; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
69; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
70; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
71; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1
72; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
73; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
74; CHECK-NEXT:    store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4
75; CHECK-NEXT:    ret void
76;
77  %c1 = load i64, i64* %c
78  %c2 = call i64 @may_inf_loop_ro()
79
80  %v1 = load i64, i64* %a
81  %a2 = getelementptr i64, i64* %a, i32 1
82  %v2 = load i64, i64* %a2
83
84  %add1 = add i64 %v1, %c1
85  %add2 = add i64 %v2, %c2
86
87  store i64 %add1, i64* %b
88  %b2 = getelementptr i64, i64* %b, i32 1
89  store i64 %add2, i64* %b2
90  ret void
91}
92
93define void @test3(i64* %a, i64* %b, i64* %c) {
94; CHECK-LABEL: @test3(
95; CHECK-NEXT:    [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4
96; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
97; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
98; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
99; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
100; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1
101; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
102; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
103; CHECK-NEXT:    store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4
104; CHECK-NEXT:    ret void
105;
106  %v1 = load i64, i64* %a
107  %c1 = load i64, i64* %c
108  %add1 = add i64 %v1, %c1
109
110  %a2 = getelementptr i64, i64* %a, i32 1
111  %v2 = load i64, i64* %a2
112  %c2 = call i64 @may_inf_loop_ro()
113  %add2 = add i64 %v2, %c2
114
115  store i64 %add1, i64* %b
116  %b2 = getelementptr i64, i64* %b, i32 1
117  store i64 %add2, i64* %b2
118  ret void
119}
120
121define void @test4(i64* %a, i64* %b, i64* %c) {
122; CHECK-LABEL: @test4(
123; CHECK-NEXT:    [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4
124; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
125; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
126; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
127; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
128; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1
129; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
130; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
131; CHECK-NEXT:    store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4
132; CHECK-NEXT:    ret void
133;
134  %v1 = load i64, i64* %a
135  %c1 = load i64, i64* %c
136  %add1 = add i64 %v1, %c1
137
138  %c2 = call i64 @may_inf_loop_ro()
139  %a2 = getelementptr i64, i64* %a, i32 1
140  %v2 = load i64, i64* %a2
141  %add2 = add i64 %v2, %c2
142
143  store i64 %add1, i64* %b
144  %b2 = getelementptr i64, i64* %b, i32 1
145  store i64 %add2, i64* %b2
146  ret void
147}
148
149define void @test5(i64* %a, i64* %b, i64* %c) {
150; CHECK-LABEL: @test5(
151; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
152; CHECK-NEXT:    [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4
153; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
154; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
155; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
156; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1
157; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
158; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
159; CHECK-NEXT:    store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4
160; CHECK-NEXT:    ret void
161;
162  %a2 = getelementptr i64, i64* %a, i32 1
163  %v2 = load i64, i64* %a2
164  %c2 = call i64 @may_inf_loop_ro()
165  %add2 = add i64 %v2, %c2
166
167  %v1 = load i64, i64* %a
168  %c1 = load i64, i64* %c
169  %add1 = add i64 %v1, %c1
170
171  store i64 %add1, i64* %b
172  %b2 = getelementptr i64, i64* %b, i32 1
173  store i64 %add2, i64* %b2
174  ret void
175}
176
177define void @test6(i64* %a, i64* %b, i64* %c) {
178; CHECK-LABEL: @test6(
179; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
180; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
181; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4
182; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
183; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 4
184; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP3]], [[TMP5]]
185; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
186; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
187; CHECK-NEXT:    ret void
188;
189  %v1 = load i64, i64* %a
190  call i64 @may_inf_loop_ro()
191  %a2 = getelementptr i64, i64* %a, i32 1
192  %v2 = load i64, i64* %a2
193
194  %c1 = load i64, i64* %c
195  %ca2 = getelementptr i64, i64* %c, i32 1
196  %c2 = load i64, i64* %ca2
197  %add1 = add i64 %v1, %c1
198  %add2 = add i64 %v2, %c2
199
200  store i64 %add1, i64* %b
201  %b2 = getelementptr i64, i64* %b, i32 1
202  store i64 %add2, i64* %b2
203  ret void
204}
205
206; In this case, we can't vectorize the load pair because there's no valid
207; scheduling point which respects both memory and control dependence.  If
208; we scheduled the second load before the store holding the first one in place,
209; we'd have hoisted a potentially faulting load above a potentially infinite
210; call and thus have introduced a possible fault into a program which didn't
211; previously exist.
212define void @test7(i64* %a, i64* %b, i64* %c) {
213; CHECK-LABEL: @test7(
214; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1
215; CHECK-NEXT:    [[V1:%.*]] = load i64, i64* [[A]], align 4
216; CHECK-NEXT:    store i64 0, i64* [[A]], align 4
217; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
218; CHECK-NEXT:    [[V2:%.*]] = load i64, i64* [[A2]], align 4
219; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
220; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4
221; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
222; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[V2]], i32 1
223; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]]
224; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
225; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
226; CHECK-NEXT:    ret void
227;
228  %v1 = load i64, i64* %a
229  store i64 0, i64* %a
230  call i64 @may_inf_loop_ro()
231  %a2 = getelementptr i64, i64* %a, i32 1
232  %v2 = load i64, i64* %a2
233
234  %c1 = load i64, i64* %c
235  %ca2 = getelementptr i64, i64* %c, i32 1
236  %c2 = load i64, i64* %ca2
237  %add1 = add i64 %v1, %c1
238  %add2 = add i64 %v2, %c2
239
240  store i64 %add1, i64* %b
241  %b2 = getelementptr i64, i64* %b, i32 1
242  store i64 %add2, i64* %b2
243  ret void
244}
245
246; Same as test7, but with a throwing call
247define void @test8(i64* %a, i64* %b, i64* %c) {
248; CHECK-LABEL: @test8(
249; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1
250; CHECK-NEXT:    [[V1:%.*]] = load i64, i64* [[A]], align 4
251; CHECK-NEXT:    store i64 0, i64* [[A]], align 4
252; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_throw() #[[ATTR4:[0-9]+]]
253; CHECK-NEXT:    [[V2:%.*]] = load i64, i64* [[A2]], align 4
254; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
255; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4
256; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
257; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[V2]], i32 1
258; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]]
259; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
260; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
261; CHECK-NEXT:    ret void
262;
263  %v1 = load i64, i64* %a
264  store i64 0, i64* %a
265  call i64 @may_throw() readonly
266  %a2 = getelementptr i64, i64* %a, i32 1
267  %v2 = load i64, i64* %a2
268
269  %c1 = load i64, i64* %c
270  %ca2 = getelementptr i64, i64* %c, i32 1
271  %c2 = load i64, i64* %ca2
272  %add1 = add i64 %v1, %c1
273  %add2 = add i64 %v2, %c2
274
275  store i64 %add1, i64* %b
276  %b2 = getelementptr i64, i64* %b, i32 1
277  store i64 %add2, i64* %b2
278  ret void
279}
280
281; Same as test8, but with a readwrite maythrow call
282define void @test9(i64* %a, i64* %b, i64* %c) {
283; CHECK-LABEL: @test9(
284; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1
285; CHECK-NEXT:    [[V1:%.*]] = load i64, i64* [[A]], align 4
286; CHECK-NEXT:    store i64 0, i64* [[A]], align 4
287; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_throw()
288; CHECK-NEXT:    [[V2:%.*]] = load i64, i64* [[A2]], align 4
289; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
290; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4
291; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
292; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[V2]], i32 1
293; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]]
294; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
295; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
296; CHECK-NEXT:    ret void
297;
298  %v1 = load i64, i64* %a
299  store i64 0, i64* %a
300  call i64 @may_throw()
301  %a2 = getelementptr i64, i64* %a, i32 1
302  %v2 = load i64, i64* %a2
303
304  %c1 = load i64, i64* %c
305  %ca2 = getelementptr i64, i64* %c, i32 1
306  %c2 = load i64, i64* %ca2
307  %add1 = add i64 %v1, %c1
308  %add2 = add i64 %v2, %c2
309
310  store i64 %add1, i64* %b
311  %b2 = getelementptr i64, i64* %b, i32 1
312  store i64 %add2, i64* %b2
313  ret void
314}
315
316; A variant of test7 which shows the same problem with a non-load instruction
317define void @test10(i64* %a, i64* %b, i64* %c) {
318; CHECK-LABEL: @test10(
319; CHECK-NEXT:    [[V1:%.*]] = load i64, i64* [[A:%.*]], align 4
320; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, i64* [[A]], i32 1
321; CHECK-NEXT:    [[V2:%.*]] = load i64, i64* [[A2]], align 4
322; CHECK-NEXT:    [[U1:%.*]] = udiv i64 200, [[V1]]
323; CHECK-NEXT:    store i64 [[U1]], i64* [[A]], align 4
324; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
325; CHECK-NEXT:    [[U2:%.*]] = udiv i64 200, [[V2]]
326; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
327; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4
328; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0
329; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[U2]], i32 1
330; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]]
331; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
332; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
333; CHECK-NEXT:    ret void
334;
335  %v1 = load i64, i64* %a
336  %a2 = getelementptr i64, i64* %a, i32 1
337  %v2 = load i64, i64* %a2
338
339  %u1 = udiv i64 200, %v1
340  store i64 %u1, i64* %a
341  call i64 @may_inf_loop_ro()
342  %u2 = udiv i64 200, %v2
343
344  %c1 = load i64, i64* %c
345  %ca2 = getelementptr i64, i64* %c, i32 1
346  %c2 = load i64, i64* %ca2
347  %add1 = add i64 %u1, %c1
348  %add2 = add i64 %u2, %c2
349
350  store i64 %add1, i64* %b
351  %b2 = getelementptr i64, i64* %b, i32 1
352  store i64 %add2, i64* %b2
353  ret void
354}
355
356; Variant of test10 block invariant operands to the udivs
357; FIXME: This is wrong, we're hoisting a faulting udiv above an infinite loop.
358define void @test11(i64 %x, i64 %y, i64* %b, i64* %c) {
359; CHECK-LABEL: @test11(
360; CHECK-NEXT:    [[U1:%.*]] = udiv i64 200, [[X:%.*]]
361; CHECK-NEXT:    store i64 [[U1]], i64* [[B:%.*]], align 4
362; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
363; CHECK-NEXT:    [[U2:%.*]] = udiv i64 200, [[Y:%.*]]
364; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
365; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4
366; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0
367; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[U2]], i32 1
368; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]]
369; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[B]] to <2 x i64>*
370; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
371; CHECK-NEXT:    ret void
372;
373  %u1 = udiv i64 200, %x
374  store i64 %u1, i64* %b
375  call i64 @may_inf_loop_ro()
376  %u2 = udiv i64 200, %y
377
378  %c1 = load i64, i64* %c
379  %ca2 = getelementptr i64, i64* %c, i32 1
380  %c2 = load i64, i64* %ca2
381  %add1 = add i64 %u1, %c1
382  %add2 = add i64 %u2, %c2
383
384  store i64 %add1, i64* %b
385  %b2 = getelementptr i64, i64* %b, i32 1
386  store i64 %add2, i64* %b2
387  ret void
388}
389