1; RUN: opt < %s -O1 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
2
3target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
4
5%struct.anon = type { [100 x i32], i32, [100 x i32] }
6%struct.anon.0 = type { [100 x [100 x i32]], i32, [100 x [100 x i32]] }
7
8@Foo = common global %struct.anon zeroinitializer, align 4
9@Bar = common global %struct.anon.0 zeroinitializer, align 4
10
11@PB = external global i32*
12@PA = external global i32*
13
14
15;; === First, the tests that should always vectorize, wither statically or by adding run-time checks ===
16
17
18; /// Different objects, positive induction, constant distance
19; int noAlias01 (int a) {
20;   int i;
21;   for (i=0; i<SIZE; i++)
22;     Foo.A[i] = Foo.B[i] + a;
23;   return Foo.A[a];
24; }
25; CHECK-LABEL: define i32 @noAlias01(
26; CHECK: add nsw <4 x i32>
27; CHECK: ret
28
29define i32 @noAlias01(i32 %a) nounwind {
30entry:
31  %a.addr = alloca i32, align 4
32  %i = alloca i32, align 4
33  store i32 %a, i32* %a.addr, align 4
34  store i32 0, i32* %i, align 4
35  br label %for.cond
36
37for.cond:                                         ; preds = %for.inc, %entry
38  %0 = load i32* %i, align 4
39  %cmp = icmp slt i32 %0, 100
40  br i1 %cmp, label %for.body, label %for.end
41
42for.body:                                         ; preds = %for.cond
43  %1 = load i32* %i, align 4
44  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
45  %2 = load i32* %arrayidx, align 4
46  %3 = load i32* %a.addr, align 4
47  %add = add nsw i32 %2, %3
48  %4 = load i32* %i, align 4
49  %arrayidx1 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
50  store i32 %add, i32* %arrayidx1, align 4
51  br label %for.inc
52
53for.inc:                                          ; preds = %for.body
54  %5 = load i32* %i, align 4
55  %inc = add nsw i32 %5, 1
56  store i32 %inc, i32* %i, align 4
57  br label %for.cond
58
59for.end:                                          ; preds = %for.cond
60  %6 = load i32* %a.addr, align 4
61  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
62  %7 = load i32* %arrayidx2, align 4
63  ret i32 %7
64}
65
66; /// Different objects, positive induction with widening slide
67; int noAlias02 (int a) {
68;   int i;
69;   for (i=0; i<SIZE-10; i++)
70;     Foo.A[i] = Foo.B[i+10] + a;
71;   return Foo.A[a];
72; }
73; CHECK-LABEL: define i32 @noAlias02(
74; CHECK: add nsw <4 x i32>
75; CHECK: ret
76
77define i32 @noAlias02(i32 %a) {
78entry:
79  %a.addr = alloca i32, align 4
80  %i = alloca i32, align 4
81  store i32 %a, i32* %a.addr, align 4
82  store i32 0, i32* %i, align 4
83  br label %for.cond
84
85for.cond:                                         ; preds = %for.inc, %entry
86  %0 = load i32* %i, align 4
87  %cmp = icmp slt i32 %0, 90
88  br i1 %cmp, label %for.body, label %for.end
89
90for.body:                                         ; preds = %for.cond
91  %1 = load i32* %i, align 4
92  %add = add nsw i32 %1, 10
93  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %add
94  %2 = load i32* %arrayidx, align 4
95  %3 = load i32* %a.addr, align 4
96  %add1 = add nsw i32 %2, %3
97  %4 = load i32* %i, align 4
98  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
99  store i32 %add1, i32* %arrayidx2, align 4
100  br label %for.inc
101
102for.inc:                                          ; preds = %for.body
103  %5 = load i32* %i, align 4
104  %inc = add nsw i32 %5, 1
105  store i32 %inc, i32* %i, align 4
106  br label %for.cond
107
108for.end:                                          ; preds = %for.cond
109  %6 = load i32* %a.addr, align 4
110  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
111  %7 = load i32* %arrayidx3, align 4
112  ret i32 %7
113}
114
115; /// Different objects, positive induction with shortening slide
116; int noAlias03 (int a) {
117;   int i;
118;   for (i=0; i<SIZE; i++)
119;     Foo.A[i+10] = Foo.B[i] + a;
120;   return Foo.A[a];
121; }
122; CHECK-LABEL: define i32 @noAlias03(
123; CHECK: add nsw <4 x i32>
124; CHECK: ret
125
126define i32 @noAlias03(i32 %a) {
127entry:
128  %a.addr = alloca i32, align 4
129  %i = alloca i32, align 4
130  store i32 %a, i32* %a.addr, align 4
131  store i32 0, i32* %i, align 4
132  br label %for.cond
133
134for.cond:                                         ; preds = %for.inc, %entry
135  %0 = load i32* %i, align 4
136  %cmp = icmp slt i32 %0, 100
137  br i1 %cmp, label %for.body, label %for.end
138
139for.body:                                         ; preds = %for.cond
140  %1 = load i32* %i, align 4
141  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
142  %2 = load i32* %arrayidx, align 4
143  %3 = load i32* %a.addr, align 4
144  %add = add nsw i32 %2, %3
145  %4 = load i32* %i, align 4
146  %add1 = add nsw i32 %4, 10
147  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add1
148  store i32 %add, i32* %arrayidx2, align 4
149  br label %for.inc
150
151for.inc:                                          ; preds = %for.body
152  %5 = load i32* %i, align 4
153  %inc = add nsw i32 %5, 1
154  store i32 %inc, i32* %i, align 4
155  br label %for.cond
156
157for.end:                                          ; preds = %for.cond
158  %6 = load i32* %a.addr, align 4
159  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
160  %7 = load i32* %arrayidx3, align 4
161  ret i32 %7
162}
163
164; /// Pointer access, positive stride, run-time check added
165; int noAlias04 (int a) {
166;   int i;
167;   for (i=0; i<SIZE; i++)
168;     *(PA+i) = *(PB+i) + a;
169;   return *(PA+a);
170; }
171; CHECK-LABEL: define i32 @noAlias04(
172; CHECK-NOT: add nsw <4 x i32>
173; CHECK: ret
174;
175; TODO: This test vectorizes (with run-time check) on real targets with -O3)
176; Check why it's not being vectorized even when forcing vectorization
177
178define i32 @noAlias04(i32 %a) #0 {
179entry:
180  %a.addr = alloca i32, align 4
181  %i = alloca i32, align 4
182  store i32 %a, i32* %a.addr, align 4
183  store i32 0, i32* %i, align 4
184  br label %for.cond
185
186for.cond:                                         ; preds = %for.inc, %entry
187  %0 = load i32* %i, align 4
188  %cmp = icmp slt i32 %0, 100
189  br i1 %cmp, label %for.body, label %for.end
190
191for.body:                                         ; preds = %for.cond
192  %1 = load i32** @PB, align 4
193  %2 = load i32* %i, align 4
194  %add.ptr = getelementptr inbounds i32* %1, i32 %2
195  %3 = load i32* %add.ptr, align 4
196  %4 = load i32* %a.addr, align 4
197  %add = add nsw i32 %3, %4
198  %5 = load i32** @PA, align 4
199  %6 = load i32* %i, align 4
200  %add.ptr1 = getelementptr inbounds i32* %5, i32 %6
201  store i32 %add, i32* %add.ptr1, align 4
202  br label %for.inc
203
204for.inc:                                          ; preds = %for.body
205  %7 = load i32* %i, align 4
206  %inc = add nsw i32 %7, 1
207  store i32 %inc, i32* %i, align 4
208  br label %for.cond
209
210for.end:                                          ; preds = %for.cond
211  %8 = load i32** @PA, align 4
212  %9 = load i32* %a.addr, align 4
213  %add.ptr2 = getelementptr inbounds i32* %8, i32 %9
214  %10 = load i32* %add.ptr2, align 4
215  ret i32 %10
216}
217
218; /// Different objects, positive induction, multi-array
219; int noAlias05 (int a) {
220;   int i, N=10;
221;   for (i=0; i<SIZE; i++)
222;     Bar.A[N][i] = Bar.B[N][i] + a;
223;   return Bar.A[N][a];
224; }
225; CHECK-LABEL: define i32 @noAlias05(
226; CHECK: add nsw <4 x i32>
227; CHECK: ret
228
229define i32 @noAlias05(i32 %a) #0 {
230entry:
231  %a.addr = alloca i32, align 4
232  %i = alloca i32, align 4
233  %N = alloca i32, align 4
234  store i32 %a, i32* %a.addr, align 4
235  store i32 10, i32* %N, align 4
236  store i32 0, i32* %i, align 4
237  br label %for.cond
238
239for.cond:                                         ; preds = %for.inc, %entry
240  %0 = load i32* %i, align 4
241  %cmp = icmp slt i32 %0, 100
242  br i1 %cmp, label %for.body, label %for.end
243
244for.body:                                         ; preds = %for.cond
245  %1 = load i32* %i, align 4
246  %2 = load i32* %N, align 4
247  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
248  %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
249  %3 = load i32* %arrayidx1, align 4
250  %4 = load i32* %a.addr, align 4
251  %add = add nsw i32 %3, %4
252  %5 = load i32* %i, align 4
253  %6 = load i32* %N, align 4
254  %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
255  %arrayidx3 = getelementptr inbounds [100 x i32]* %arrayidx2, i32 0, i32 %5
256  store i32 %add, i32* %arrayidx3, align 4
257  br label %for.inc
258
259for.inc:                                          ; preds = %for.body
260  %7 = load i32* %i, align 4
261  %inc = add nsw i32 %7, 1
262  store i32 %inc, i32* %i, align 4
263  br label %for.cond
264
265for.end:                                          ; preds = %for.cond
266  %8 = load i32* %a.addr, align 4
267  %9 = load i32* %N, align 4
268  %arrayidx4 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
269  %arrayidx5 = getelementptr inbounds [100 x i32]* %arrayidx4, i32 0, i32 %8
270  %10 = load i32* %arrayidx5, align 4
271  ret i32 %10
272}
273
274; /// Same objects, positive induction, multi-array, different sub-elements
275; int noAlias06 (int a) {
276;   int i, N=10;
277;   for (i=0; i<SIZE; i++)
278;     Bar.A[N][i] = Bar.A[N+1][i] + a;
279;   return Bar.A[N][a];
280; }
281; CHECK-LABEL: define i32 @noAlias06(
282; CHECK: add nsw <4 x i32>
283; CHECK: ret
284
285define i32 @noAlias06(i32 %a) #0 {
286entry:
287  %a.addr = alloca i32, align 4
288  %i = alloca i32, align 4
289  %N = alloca i32, align 4
290  store i32 %a, i32* %a.addr, align 4
291  store i32 10, i32* %N, align 4
292  store i32 0, i32* %i, align 4
293  br label %for.cond
294
295for.cond:                                         ; preds = %for.inc, %entry
296  %0 = load i32* %i, align 4
297  %cmp = icmp slt i32 %0, 100
298  br i1 %cmp, label %for.body, label %for.end
299
300for.body:                                         ; preds = %for.cond
301  %1 = load i32* %i, align 4
302  %2 = load i32* %N, align 4
303  %add = add nsw i32 %2, 1
304  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
305  %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
306  %3 = load i32* %arrayidx1, align 4
307  %4 = load i32* %a.addr, align 4
308  %add2 = add nsw i32 %3, %4
309  %5 = load i32* %i, align 4
310  %6 = load i32* %N, align 4
311  %arrayidx3 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
312  %arrayidx4 = getelementptr inbounds [100 x i32]* %arrayidx3, i32 0, i32 %5
313  store i32 %add2, i32* %arrayidx4, align 4
314  br label %for.inc
315
316for.inc:                                          ; preds = %for.body
317  %7 = load i32* %i, align 4
318  %inc = add nsw i32 %7, 1
319  store i32 %inc, i32* %i, align 4
320  br label %for.cond
321
322for.end:                                          ; preds = %for.cond
323  %8 = load i32* %a.addr, align 4
324  %9 = load i32* %N, align 4
325  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
326  %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %8
327  %10 = load i32* %arrayidx6, align 4
328  ret i32 %10
329}
330
331; /// Different objects, negative induction, constant distance
332; int noAlias07 (int a) {
333;   int i;
334;   for (i=0; i<SIZE; i++)
335;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-1] + a;
336;   return Foo.A[a];
337; }
338; CHECK-LABEL: define i32 @noAlias07(
339; CHECK: sub nsw <4 x i32>
340; CHECK: ret
341
342define i32 @noAlias07(i32 %a) #0 {
343entry:
344  %a.addr = alloca i32, align 4
345  %i = alloca i32, align 4
346  store i32 %a, i32* %a.addr, align 4
347  store i32 0, i32* %i, align 4
348  br label %for.cond
349
350for.cond:                                         ; preds = %for.inc, %entry
351  %0 = load i32* %i, align 4
352  %cmp = icmp slt i32 %0, 100
353  br i1 %cmp, label %for.body, label %for.end
354
355for.body:                                         ; preds = %for.cond
356  %1 = load i32* %i, align 4
357  %sub = sub nsw i32 100, %1
358  %sub1 = sub nsw i32 %sub, 1
359  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
360  %2 = load i32* %arrayidx, align 4
361  %3 = load i32* %a.addr, align 4
362  %add = add nsw i32 %2, %3
363  %4 = load i32* %i, align 4
364  %sub2 = sub nsw i32 100, %4
365  %sub3 = sub nsw i32 %sub2, 1
366  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
367  store i32 %add, i32* %arrayidx4, align 4
368  br label %for.inc
369
370for.inc:                                          ; preds = %for.body
371  %5 = load i32* %i, align 4
372  %inc = add nsw i32 %5, 1
373  store i32 %inc, i32* %i, align 4
374  br label %for.cond
375
376for.end:                                          ; preds = %for.cond
377  %6 = load i32* %a.addr, align 4
378  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
379  %7 = load i32* %arrayidx5, align 4
380  ret i32 %7
381}
382
383; /// Different objects, negative induction, shortening slide
384; int noAlias08 (int a) {
385;   int i;
386;   for (i=0; i<SIZE-10; i++)
387;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-10] + a;
388;   return Foo.A[a];
389; }
390; CHECK-LABEL: define i32 @noAlias08(
391; CHECK: sub nsw <4 x i32>
392; CHECK: ret
393
394define i32 @noAlias08(i32 %a) #0 {
395entry:
396  %a.addr = alloca i32, align 4
397  %i = alloca i32, align 4
398  store i32 %a, i32* %a.addr, align 4
399  store i32 0, i32* %i, align 4
400  br label %for.cond
401
402for.cond:                                         ; preds = %for.inc, %entry
403  %0 = load i32* %i, align 4
404  %cmp = icmp slt i32 %0, 90
405  br i1 %cmp, label %for.body, label %for.end
406
407for.body:                                         ; preds = %for.cond
408  %1 = load i32* %i, align 4
409  %sub = sub nsw i32 100, %1
410  %sub1 = sub nsw i32 %sub, 10
411  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
412  %2 = load i32* %arrayidx, align 4
413  %3 = load i32* %a.addr, align 4
414  %add = add nsw i32 %2, %3
415  %4 = load i32* %i, align 4
416  %sub2 = sub nsw i32 100, %4
417  %sub3 = sub nsw i32 %sub2, 1
418  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
419  store i32 %add, i32* %arrayidx4, align 4
420  br label %for.inc
421
422for.inc:                                          ; preds = %for.body
423  %5 = load i32* %i, align 4
424  %inc = add nsw i32 %5, 1
425  store i32 %inc, i32* %i, align 4
426  br label %for.cond
427
428for.end:                                          ; preds = %for.cond
429  %6 = load i32* %a.addr, align 4
430  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
431  %7 = load i32* %arrayidx5, align 4
432  ret i32 %7
433}
434
435; /// Different objects, negative induction, widening slide
436; int noAlias09 (int a) {
437;   int i;
438;   for (i=0; i<SIZE; i++)
439;     Foo.A[SIZE-i-10] = Foo.B[SIZE-i-1] + a;
440;   return Foo.A[a];
441; }
442; CHECK-LABEL: define i32 @noAlias09(
443; CHECK: sub nsw <4 x i32>
444; CHECK: ret
445
446define i32 @noAlias09(i32 %a) #0 {
447entry:
448  %a.addr = alloca i32, align 4
449  %i = alloca i32, align 4
450  store i32 %a, i32* %a.addr, align 4
451  store i32 0, i32* %i, align 4
452  br label %for.cond
453
454for.cond:                                         ; preds = %for.inc, %entry
455  %0 = load i32* %i, align 4
456  %cmp = icmp slt i32 %0, 100
457  br i1 %cmp, label %for.body, label %for.end
458
459for.body:                                         ; preds = %for.cond
460  %1 = load i32* %i, align 4
461  %sub = sub nsw i32 100, %1
462  %sub1 = sub nsw i32 %sub, 1
463  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
464  %2 = load i32* %arrayidx, align 4
465  %3 = load i32* %a.addr, align 4
466  %add = add nsw i32 %2, %3
467  %4 = load i32* %i, align 4
468  %sub2 = sub nsw i32 100, %4
469  %sub3 = sub nsw i32 %sub2, 10
470  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
471  store i32 %add, i32* %arrayidx4, align 4
472  br label %for.inc
473
474for.inc:                                          ; preds = %for.body
475  %5 = load i32* %i, align 4
476  %inc = add nsw i32 %5, 1
477  store i32 %inc, i32* %i, align 4
478  br label %for.cond
479
480for.end:                                          ; preds = %for.cond
481  %6 = load i32* %a.addr, align 4
482  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
483  %7 = load i32* %arrayidx5, align 4
484  ret i32 %7
485}
486
487; /// Pointer access, negative stride, run-time check added
488; int noAlias10 (int a) {
489;   int i;
490;   for (i=0; i<SIZE; i++)
491;     *(PA+SIZE-i-1) = *(PB+SIZE-i-1) + a;
492;   return *(PA+a);
493; }
494; CHECK-LABEL: define i32 @noAlias10(
495; CHECK-NOT: sub nsw <4 x i32>
496; CHECK: ret
497;
498; TODO: This test vectorizes (with run-time check) on real targets with -O3)
499; Check why it's not being vectorized even when forcing vectorization
500
501define i32 @noAlias10(i32 %a) #0 {
502entry:
503  %a.addr = alloca i32, align 4
504  %i = alloca i32, align 4
505  store i32 %a, i32* %a.addr, align 4
506  store i32 0, i32* %i, align 4
507  br label %for.cond
508
509for.cond:                                         ; preds = %for.inc, %entry
510  %0 = load i32* %i, align 4
511  %cmp = icmp slt i32 %0, 100
512  br i1 %cmp, label %for.body, label %for.end
513
514for.body:                                         ; preds = %for.cond
515  %1 = load i32** @PB, align 4
516  %add.ptr = getelementptr inbounds i32* %1, i32 100
517  %2 = load i32* %i, align 4
518  %idx.neg = sub i32 0, %2
519  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
520  %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
521  %3 = load i32* %add.ptr2, align 4
522  %4 = load i32* %a.addr, align 4
523  %add = add nsw i32 %3, %4
524  %5 = load i32** @PA, align 4
525  %add.ptr3 = getelementptr inbounds i32* %5, i32 100
526  %6 = load i32* %i, align 4
527  %idx.neg4 = sub i32 0, %6
528  %add.ptr5 = getelementptr inbounds i32* %add.ptr3, i32 %idx.neg4
529  %add.ptr6 = getelementptr inbounds i32* %add.ptr5, i32 -1
530  store i32 %add, i32* %add.ptr6, align 4
531  br label %for.inc
532
533for.inc:                                          ; preds = %for.body
534  %7 = load i32* %i, align 4
535  %inc = add nsw i32 %7, 1
536  store i32 %inc, i32* %i, align 4
537  br label %for.cond
538
539for.end:                                          ; preds = %for.cond
540  %8 = load i32** @PA, align 4
541  %9 = load i32* %a.addr, align 4
542  %add.ptr7 = getelementptr inbounds i32* %8, i32 %9
543  %10 = load i32* %add.ptr7, align 4
544  ret i32 %10
545}
546
547; /// Different objects, negative induction, multi-array
548; int noAlias11 (int a) {
549;   int i, N=10;
550;   for (i=0; i<SIZE; i++)
551;     Bar.A[N][SIZE-i-1] = Bar.B[N][SIZE-i-1] + a;
552;   return Bar.A[N][a];
553; }
554; CHECK-LABEL: define i32 @noAlias11(
555; CHECK: sub nsw <4 x i32>
556; CHECK: ret
557
558define i32 @noAlias11(i32 %a) #0 {
559entry:
560  %a.addr = alloca i32, align 4
561  %i = alloca i32, align 4
562  %N = alloca i32, align 4
563  store i32 %a, i32* %a.addr, align 4
564  store i32 10, i32* %N, align 4
565  store i32 0, i32* %i, align 4
566  br label %for.cond
567
568for.cond:                                         ; preds = %for.inc, %entry
569  %0 = load i32* %i, align 4
570  %cmp = icmp slt i32 %0, 100
571  br i1 %cmp, label %for.body, label %for.end
572
573for.body:                                         ; preds = %for.cond
574  %1 = load i32* %i, align 4
575  %sub = sub nsw i32 100, %1
576  %sub1 = sub nsw i32 %sub, 1
577  %2 = load i32* %N, align 4
578  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
579  %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
580  %3 = load i32* %arrayidx2, align 4
581  %4 = load i32* %a.addr, align 4
582  %add = add nsw i32 %3, %4
583  %5 = load i32* %i, align 4
584  %sub3 = sub nsw i32 100, %5
585  %sub4 = sub nsw i32 %sub3, 1
586  %6 = load i32* %N, align 4
587  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
588  %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %sub4
589  store i32 %add, i32* %arrayidx6, align 4
590  br label %for.inc
591
592for.inc:                                          ; preds = %for.body
593  %7 = load i32* %i, align 4
594  %inc = add nsw i32 %7, 1
595  store i32 %inc, i32* %i, align 4
596  br label %for.cond
597
598for.end:                                          ; preds = %for.cond
599  %8 = load i32* %a.addr, align 4
600  %9 = load i32* %N, align 4
601  %arrayidx7 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
602  %arrayidx8 = getelementptr inbounds [100 x i32]* %arrayidx7, i32 0, i32 %8
603  %10 = load i32* %arrayidx8, align 4
604  ret i32 %10
605}
606
607; /// Same objects, negative induction, multi-array, different sub-elements
608; int noAlias12 (int a) {
609;   int i, N=10;
610;   for (i=0; i<SIZE; i++)
611;     Bar.A[N][SIZE-i-1] = Bar.A[N+1][SIZE-i-1] + a;
612;   return Bar.A[N][a];
613; }
614; CHECK-LABEL: define i32 @noAlias12(
615; CHECK: sub nsw <4 x i32>
616; CHECK: ret
617
618define i32 @noAlias12(i32 %a) #0 {
619entry:
620  %a.addr = alloca i32, align 4
621  %i = alloca i32, align 4
622  %N = alloca i32, align 4
623  store i32 %a, i32* %a.addr, align 4
624  store i32 10, i32* %N, align 4
625  store i32 0, i32* %i, align 4
626  br label %for.cond
627
628for.cond:                                         ; preds = %for.inc, %entry
629  %0 = load i32* %i, align 4
630  %cmp = icmp slt i32 %0, 100
631  br i1 %cmp, label %for.body, label %for.end
632
633for.body:                                         ; preds = %for.cond
634  %1 = load i32* %i, align 4
635  %sub = sub nsw i32 100, %1
636  %sub1 = sub nsw i32 %sub, 1
637  %2 = load i32* %N, align 4
638  %add = add nsw i32 %2, 1
639  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
640  %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
641  %3 = load i32* %arrayidx2, align 4
642  %4 = load i32* %a.addr, align 4
643  %add3 = add nsw i32 %3, %4
644  %5 = load i32* %i, align 4
645  %sub4 = sub nsw i32 100, %5
646  %sub5 = sub nsw i32 %sub4, 1
647  %6 = load i32* %N, align 4
648  %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
649  %arrayidx7 = getelementptr inbounds [100 x i32]* %arrayidx6, i32 0, i32 %sub5
650  store i32 %add3, i32* %arrayidx7, align 4
651  br label %for.inc
652
653for.inc:                                          ; preds = %for.body
654  %7 = load i32* %i, align 4
655  %inc = add nsw i32 %7, 1
656  store i32 %inc, i32* %i, align 4
657  br label %for.cond
658
659for.end:                                          ; preds = %for.cond
660  %8 = load i32* %a.addr, align 4
661  %9 = load i32* %N, align 4
662  %arrayidx8 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
663  %arrayidx9 = getelementptr inbounds [100 x i32]* %arrayidx8, i32 0, i32 %8
664  %10 = load i32* %arrayidx9, align 4
665  ret i32 %10
666}
667
668; /// Same objects, positive induction, constant distance, just enough for vector size
669; int noAlias13 (int a) {
670;   int i;
671;   for (i=0; i<SIZE; i++)
672;     Foo.A[i] = Foo.A[i+4] + a;
673;   return Foo.A[a];
674; }
675; CHECK-LABEL: define i32 @noAlias13(
676; CHECK: add nsw <4 x i32>
677; CHECK: ret
678
679define i32 @noAlias13(i32 %a) #0 {
680entry:
681  %a.addr = alloca i32, align 4
682  %i = alloca i32, align 4
683  store i32 %a, i32* %a.addr, align 4
684  store i32 0, i32* %i, align 4
685  br label %for.cond
686
687for.cond:                                         ; preds = %for.inc, %entry
688  %0 = load i32* %i, align 4
689  %cmp = icmp slt i32 %0, 100
690  br i1 %cmp, label %for.body, label %for.end
691
692for.body:                                         ; preds = %for.cond
693  %1 = load i32* %i, align 4
694  %add = add nsw i32 %1, 4
695  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add
696  %2 = load i32* %arrayidx, align 4
697  %3 = load i32* %a.addr, align 4
698  %add1 = add nsw i32 %2, %3
699  %4 = load i32* %i, align 4
700  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
701  store i32 %add1, i32* %arrayidx2, align 4
702  br label %for.inc
703
704for.inc:                                          ; preds = %for.body
705  %5 = load i32* %i, align 4
706  %inc = add nsw i32 %5, 1
707  store i32 %inc, i32* %i, align 4
708  br label %for.cond
709
710for.end:                                          ; preds = %for.cond
711  %6 = load i32* %a.addr, align 4
712  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
713  %7 = load i32* %arrayidx3, align 4
714  ret i32 %7
715}
716
717; /// Same objects, negative induction, constant distance, just enough for vector size
718; int noAlias14 (int a) {
719;   int i;
720;   for (i=0; i<SIZE; i++)
721;     Foo.A[SIZE-i-1] = Foo.A[SIZE-i-5] + a;
722;   return Foo.A[a];
723; }
724; CHECK-LABEL: define i32 @noAlias14(
725; CHECK: sub nsw <4 x i32>
726; CHECK: ret
727
728define i32 @noAlias14(i32 %a) #0 {
729entry:
730  %a.addr = alloca i32, align 4
731  %i = alloca i32, align 4
732  store i32 %a, i32* %a.addr, align 4
733  store i32 0, i32* %i, align 4
734  br label %for.cond
735
736for.cond:                                         ; preds = %for.inc, %entry
737  %0 = load i32* %i, align 4
738  %cmp = icmp slt i32 %0, 100
739  br i1 %cmp, label %for.body, label %for.end
740
741for.body:                                         ; preds = %for.cond
742  %1 = load i32* %i, align 4
743  %sub = sub nsw i32 100, %1
744  %sub1 = sub nsw i32 %sub, 5
745  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
746  %2 = load i32* %arrayidx, align 4
747  %3 = load i32* %a.addr, align 4
748  %add = add nsw i32 %2, %3
749  %4 = load i32* %i, align 4
750  %sub2 = sub nsw i32 100, %4
751  %sub3 = sub nsw i32 %sub2, 1
752  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
753  store i32 %add, i32* %arrayidx4, align 4
754  br label %for.inc
755
756for.inc:                                          ; preds = %for.body
757  %5 = load i32* %i, align 4
758  %inc = add nsw i32 %5, 1
759  store i32 %inc, i32* %i, align 4
760  br label %for.cond
761
762for.end:                                          ; preds = %for.cond
763  %6 = load i32* %a.addr, align 4
764  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
765  %7 = load i32* %arrayidx5, align 4
766  ret i32 %7
767}
768
769
770;; === Now, the tests that we could vectorize with induction changes or run-time checks ===
771
772
773; /// Different objects, swapped induction, alias at the end
774; int mayAlias01 (int a) {
775;   int i;
776;   for (i=0; i<SIZE; i++)
777;     Foo.A[i] = Foo.B[SIZE-i-1] + a;
778;   return Foo.A[a];
779; }
780; CHECK-LABEL: define i32 @mayAlias01(
781; CHECK-NOT: add nsw <4 x i32>
782; CHECK: ret
783
784define i32 @mayAlias01(i32 %a) nounwind {
785entry:
786  %a.addr = alloca i32, align 4
787  %i = alloca i32, align 4
788  store i32 %a, i32* %a.addr, align 4
789  store i32 0, i32* %i, align 4
790  br label %for.cond
791
792for.cond:                                         ; preds = %for.inc, %entry
793  %0 = load i32* %i, align 4
794  %cmp = icmp slt i32 %0, 100
795  br i1 %cmp, label %for.body, label %for.end
796
797for.body:                                         ; preds = %for.cond
798  %1 = load i32* %i, align 4
799  %sub = sub nsw i32 100, %1
800  %sub1 = sub nsw i32 %sub, 1
801  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
802  %2 = load i32* %arrayidx, align 4
803  %3 = load i32* %a.addr, align 4
804  %add = add nsw i32 %2, %3
805  %4 = load i32* %i, align 4
806  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
807  store i32 %add, i32* %arrayidx2, align 4
808  br label %for.inc
809
810for.inc:                                          ; preds = %for.body
811  %5 = load i32* %i, align 4
812  %inc = add nsw i32 %5, 1
813  store i32 %inc, i32* %i, align 4
814  br label %for.cond
815
816for.end:                                          ; preds = %for.cond
817  %6 = load i32* %a.addr, align 4
818  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
819  %7 = load i32* %arrayidx3, align 4
820  ret i32 %7
821}
822
823; /// Different objects, swapped induction, alias at the beginning
824; int mayAlias02 (int a) {
825;   int i;
826;   for (i=0; i<SIZE; i++)
827;     Foo.A[SIZE-i-1] = Foo.B[i] + a;
828;   return Foo.A[a];
829; }
830; CHECK-LABEL: define i32 @mayAlias02(
831; CHECK-NOT: add nsw <4 x i32>
832; CHECK: ret
833
834define i32 @mayAlias02(i32 %a) nounwind {
835entry:
836  %a.addr = alloca i32, align 4
837  %i = alloca i32, align 4
838  store i32 %a, i32* %a.addr, align 4
839  store i32 0, i32* %i, align 4
840  br label %for.cond
841
842for.cond:                                         ; preds = %for.inc, %entry
843  %0 = load i32* %i, align 4
844  %cmp = icmp slt i32 %0, 100
845  br i1 %cmp, label %for.body, label %for.end
846
847for.body:                                         ; preds = %for.cond
848  %1 = load i32* %i, align 4
849  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
850  %2 = load i32* %arrayidx, align 4
851  %3 = load i32* %a.addr, align 4
852  %add = add nsw i32 %2, %3
853  %4 = load i32* %i, align 4
854  %sub = sub nsw i32 100, %4
855  %sub1 = sub nsw i32 %sub, 1
856  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
857  store i32 %add, i32* %arrayidx2, align 4
858  br label %for.inc
859
860for.inc:                                          ; preds = %for.body
861  %5 = load i32* %i, align 4
862  %inc = add nsw i32 %5, 1
863  store i32 %inc, i32* %i, align 4
864  br label %for.cond
865
866for.end:                                          ; preds = %for.cond
867  %6 = load i32* %a.addr, align 4
868  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
869  %7 = load i32* %arrayidx3, align 4
870  ret i32 %7
871}
872
873; /// Pointer access, run-time check added
874; int mayAlias03 (int a) {
875;   int i;
876;   for (i=0; i<SIZE; i++)
877;     *(PA+i) = *(PB+SIZE-i-1) + a;
878;   return *(PA+a);
879; }
880; CHECK-LABEL: define i32 @mayAlias03(
881; CHECK-NOT: add nsw <4 x i32>
882; CHECK: ret
883
884define i32 @mayAlias03(i32 %a) nounwind {
885entry:
886  %a.addr = alloca i32, align 4
887  %i = alloca i32, align 4
888  store i32 %a, i32* %a.addr, align 4
889  store i32 0, i32* %i, align 4
890  br label %for.cond
891
892for.cond:                                         ; preds = %for.inc, %entry
893  %0 = load i32* %i, align 4
894  %cmp = icmp slt i32 %0, 100
895  br i1 %cmp, label %for.body, label %for.end
896
897for.body:                                         ; preds = %for.cond
898  %1 = load i32** @PB, align 4
899  %add.ptr = getelementptr inbounds i32* %1, i32 100
900  %2 = load i32* %i, align 4
901  %idx.neg = sub i32 0, %2
902  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
903  %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
904  %3 = load i32* %add.ptr2, align 4
905  %4 = load i32* %a.addr, align 4
906  %add = add nsw i32 %3, %4
907  %5 = load i32** @PA, align 4
908  %6 = load i32* %i, align 4
909  %add.ptr3 = getelementptr inbounds i32* %5, i32 %6
910  store i32 %add, i32* %add.ptr3, align 4
911  br label %for.inc
912
913for.inc:                                          ; preds = %for.body
914  %7 = load i32* %i, align 4
915  %inc = add nsw i32 %7, 1
916  store i32 %inc, i32* %i, align 4
917  br label %for.cond
918
919for.end:                                          ; preds = %for.cond
920  %8 = load i32** @PA, align 4
921  %9 = load i32* %a.addr, align 4
922  %add.ptr4 = getelementptr inbounds i32* %8, i32 %9
923  %10 = load i32* %add.ptr4, align 4
924  ret i32 %10
925}
926
927
928;; === Finally, the tests that should only vectorize with care (or if we ignore undefined behaviour at all) ===
929
930
931; int mustAlias01 (int a) {
932;   int i;
933;   for (i=0; i<SIZE; i++)
934;     Foo.A[i+10] = Foo.B[SIZE-i-1] + a;
935;   return Foo.A[a];
936; }
937; CHECK-LABEL: define i32 @mustAlias01(
938; CHECK-NOT: add nsw <4 x i32>
939; CHECK: ret
940
941define i32 @mustAlias01(i32 %a) nounwind {
942entry:
943  %a.addr = alloca i32, align 4
944  %i = alloca i32, align 4
945  store i32 %a, i32* %a.addr, align 4
946  store i32 0, i32* %i, align 4
947  br label %for.cond
948
949for.cond:                                         ; preds = %for.inc, %entry
950  %0 = load i32* %i, align 4
951  %cmp = icmp slt i32 %0, 100
952  br i1 %cmp, label %for.body, label %for.end
953
954for.body:                                         ; preds = %for.cond
955  %1 = load i32* %i, align 4
956  %sub = sub nsw i32 100, %1
957  %sub1 = sub nsw i32 %sub, 1
958  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
959  %2 = load i32* %arrayidx, align 4
960  %3 = load i32* %a.addr, align 4
961  %add = add nsw i32 %2, %3
962  %4 = load i32* %i, align 4
963  %add2 = add nsw i32 %4, 10
964  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
965  store i32 %add, i32* %arrayidx3, align 4
966  br label %for.inc
967
968for.inc:                                          ; preds = %for.body
969  %5 = load i32* %i, align 4
970  %inc = add nsw i32 %5, 1
971  store i32 %inc, i32* %i, align 4
972  br label %for.cond
973
974for.end:                                          ; preds = %for.cond
975  %6 = load i32* %a.addr, align 4
976  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
977  %7 = load i32* %arrayidx4, align 4
978  ret i32 %7
979}
980
981; int mustAlias02 (int a) {
982;   int i;
983;   for (i=0; i<SIZE; i++)
984;     Foo.A[i] = Foo.B[SIZE-i-10] + a;
985;   return Foo.A[a];
986; }
987; CHECK-LABEL: define i32 @mustAlias02(
988; CHECK-NOT: add nsw <4 x i32>
989; CHECK: ret
990
991define i32 @mustAlias02(i32 %a) nounwind {
992entry:
993  %a.addr = alloca i32, align 4
994  %i = alloca i32, align 4
995  store i32 %a, i32* %a.addr, align 4
996  store i32 0, i32* %i, align 4
997  br label %for.cond
998
999for.cond:                                         ; preds = %for.inc, %entry
1000  %0 = load i32* %i, align 4
1001  %cmp = icmp slt i32 %0, 100
1002  br i1 %cmp, label %for.body, label %for.end
1003
1004for.body:                                         ; preds = %for.cond
1005  %1 = load i32* %i, align 4
1006  %sub = sub nsw i32 100, %1
1007  %sub1 = sub nsw i32 %sub, 10
1008  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
1009  %2 = load i32* %arrayidx, align 4
1010  %3 = load i32* %a.addr, align 4
1011  %add = add nsw i32 %2, %3
1012  %4 = load i32* %i, align 4
1013  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
1014  store i32 %add, i32* %arrayidx2, align 4
1015  br label %for.inc
1016
1017for.inc:                                          ; preds = %for.body
1018  %5 = load i32* %i, align 4
1019  %inc = add nsw i32 %5, 1
1020  store i32 %inc, i32* %i, align 4
1021  br label %for.cond
1022
1023for.end:                                          ; preds = %for.cond
1024  %6 = load i32* %a.addr, align 4
1025  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
1026  %7 = load i32* %arrayidx3, align 4
1027  ret i32 %7
1028}
1029
1030; int mustAlias03 (int a) {
1031;   int i;
1032;   for (i=0; i<SIZE; i++)
1033;     Foo.A[i+10] = Foo.B[SIZE-i-10] + a;
1034;   return Foo.A[a];
1035; }
1036; CHECK-LABEL: define i32 @mustAlias03(
1037; CHECK-NOT: add nsw <4 x i32>
1038; CHECK: ret
1039
1040define i32 @mustAlias03(i32 %a) nounwind {
1041entry:
1042  %a.addr = alloca i32, align 4
1043  %i = alloca i32, align 4
1044  store i32 %a, i32* %a.addr, align 4
1045  store i32 0, i32* %i, align 4
1046  br label %for.cond
1047
1048for.cond:                                         ; preds = %for.inc, %entry
1049  %0 = load i32* %i, align 4
1050  %cmp = icmp slt i32 %0, 100
1051  br i1 %cmp, label %for.body, label %for.end
1052
1053for.body:                                         ; preds = %for.cond
1054  %1 = load i32* %i, align 4
1055  %sub = sub nsw i32 100, %1
1056  %sub1 = sub nsw i32 %sub, 10
1057  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
1058  %2 = load i32* %arrayidx, align 4
1059  %3 = load i32* %a.addr, align 4
1060  %add = add nsw i32 %2, %3
1061  %4 = load i32* %i, align 4
1062  %add2 = add nsw i32 %4, 10
1063  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
1064  store i32 %add, i32* %arrayidx3, align 4
1065  br label %for.inc
1066
1067for.inc:                                          ; preds = %for.body
1068  %5 = load i32* %i, align 4
1069  %inc = add nsw i32 %5, 1
1070  store i32 %inc, i32* %i, align 4
1071  br label %for.cond
1072
1073for.end:                                          ; preds = %for.cond
1074  %6 = load i32* %a.addr, align 4
1075  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
1076  %7 = load i32* %arrayidx4, align 4
1077  ret i32 %7
1078}
1079