1; RUN: opt -basic-aa -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s -check-prefix=LAA
2; RUN: opt -passes='require<aa>,require<scalar-evolution>,require<aa>,loop(print-access-info)' -aa-pipeline='basic-aa' -disable-output < %s  2>&1 | FileCheck %s --check-prefix=LAA
3
4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
5
6; For this loop:
7;   unsigned index = 0;
8;   for (int i = 0; i < n; i++) {
9;    A[2 * index] = A[2 * index] + B[i];
10;    index++;
11;   }
12;
13; SCEV is unable to prove that A[2 * i] does not overflow.
14;
15; Analyzing the IR does not help us because the GEPs are not
16; affine AddRecExprs. However, we can turn them into AddRecExprs
17; using SCEV Predicates.
18;
19; Once we have an affine expression we need to add an additional NUSW
20; to check that the pointers don't wrap since the GEPs are not
21; inbound.
22
23; LAA-LABEL: f1
24; LAA: Memory dependences are safe{{$}}
25; LAA: SCEV assumptions:
26; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nusw>
27; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
28
29; The expression for %mul_ext as analyzed by SCEV is
30;    (zext i32 {0,+,2}<%for.body> to i64)
31; We have added the nusw flag to turn this expression into the SCEV expression:
32;    i64 {0,+,2}<%for.body>
33
34; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
35; LAA-NEXT: ((2 * (zext i32 {0,+,2}<%for.body> to i64))<nuw><nsw> + %a)
36; LAA-NEXT: --> {%a,+,4}<%for.body>
37
38
39define void @f1(i16* noalias %a,
40                i16* noalias %b, i64 %N) {
41entry:
42  br label %for.body
43
44for.body:                                         ; preds = %for.body, %entry
45  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
46  %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
47
48  %mul = mul i32 %ind1, 2
49  %mul_ext = zext i32 %mul to i64
50
51  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
52  %loadA = load i16, i16* %arrayidxA, align 2
53
54  %arrayidxB = getelementptr i16, i16* %b, i64 %ind
55  %loadB = load i16, i16* %arrayidxB, align 2
56
57  %add = mul i16 %loadA, %loadB
58
59  store i16 %add, i16* %arrayidxA, align 2
60
61  %inc = add nuw nsw i64 %ind, 1
62  %inc1 = add i32 %ind1, 1
63
64  %exitcond = icmp eq i64 %inc, %N
65  br i1 %exitcond, label %for.end, label %for.body
66
67for.end:                                          ; preds = %for.body
68  ret void
69}
70
71; For this loop:
72;   unsigned index = n;
73;   for (int i = 0; i < n; i++) {
74;    A[2 * index] = A[2 * index] + B[i];
75;    index--;
76;   }
77;
78; the SCEV expression for 2 * index is not an AddRecExpr
79; (and implictly not affine). However, we are able to make assumptions
80; that will turn the expression into an affine one and continue the
81; analysis.
82;
83; Once we have an affine expression we need to add an additional NUSW
84; to check that the pointers don't wrap since the GEPs are not
85; inbounds.
86;
87; This loop has a negative stride for A, and the nusw flag is required in
88; order to properly extend the increment from i32 -4 to i64 -4.
89
90; LAA-LABEL: f2
91; LAA: Memory dependences are safe{{$}}
92; LAA: SCEV assumptions:
93; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nusw>
94; LAA-NEXT: {((4 * (zext i31 (trunc i64 %N to i31) to i64))<nuw><nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
95
96; The expression for %mul_ext as analyzed by SCEV is
97;     (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
98; We have added the nusw flag to turn this expression into the following SCEV:
99;     i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
100
101; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
102; LAA-NEXT: ((2 * (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nuw><nsw> + %a)
103; LAA-NEXT: --> {((4 * (zext i31 (trunc i64 %N to i31) to i64))<nuw><nsw> + %a),+,-4}<%for.body>
104
105define void @f2(i16* noalias %a,
106                i16* noalias %b, i64 %N) {
107entry:
108  %TruncN = trunc i64 %N to i32
109  br label %for.body
110
111for.body:                                         ; preds = %for.body, %entry
112  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
113  %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
114
115  %mul = mul i32 %ind1, 2
116  %mul_ext = zext i32 %mul to i64
117
118  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
119  %loadA = load i16, i16* %arrayidxA, align 2
120
121  %arrayidxB = getelementptr i16, i16* %b, i64 %ind
122  %loadB = load i16, i16* %arrayidxB, align 2
123
124  %add = mul i16 %loadA, %loadB
125
126  store i16 %add, i16* %arrayidxA, align 2
127
128  %inc = add nuw nsw i64 %ind, 1
129  %dec = sub i32 %ind1, 1
130
131  %exitcond = icmp eq i64 %inc, %N
132  br i1 %exitcond, label %for.end, label %for.body
133
134for.end:                                          ; preds = %for.body
135  ret void
136}
137
138; We replicate the tests above, but this time sign extend 2 * index instead
139; of zero extending it.
140
141; LAA-LABEL: f3
142; LAA: Memory dependences are safe{{$}}
143; LAA: SCEV assumptions:
144; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nssw>
145; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
146
147; The expression for %mul_ext as analyzed by SCEV is
148;     i64 (sext i32 {0,+,2}<%for.body> to i64)
149; We have added the nssw flag to turn this expression into the following SCEV:
150;     i64 {0,+,2}<%for.body>
151
152; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
153; LAA-NEXT: ((2 * (sext i32 {0,+,2}<%for.body> to i64))<nsw> + %a)
154; LAA-NEXT: --> {%a,+,4}<%for.body>
155
156define void @f3(i16* noalias %a,
157                i16* noalias %b, i64 %N) {
158entry:
159  br label %for.body
160
161for.body:                                         ; preds = %for.body, %entry
162  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
163  %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
164
165  %mul = mul i32 %ind1, 2
166  %mul_ext = sext i32 %mul to i64
167
168  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
169  %loadA = load i16, i16* %arrayidxA, align 2
170
171  %arrayidxB = getelementptr i16, i16* %b, i64 %ind
172  %loadB = load i16, i16* %arrayidxB, align 2
173
174  %add = mul i16 %loadA, %loadB
175
176  store i16 %add, i16* %arrayidxA, align 2
177
178  %inc = add nuw nsw i64 %ind, 1
179  %inc1 = add i32 %ind1, 1
180
181  %exitcond = icmp eq i64 %inc, %N
182  br i1 %exitcond, label %for.end, label %for.body
183
184for.end:                                          ; preds = %for.body
185  ret void
186}
187
188; LAA-LABEL: f4
189; LAA: Memory dependences are safe{{$}}
190; LAA: SCEV assumptions:
191; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
192; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
193
194; The expression for %mul_ext as analyzed by SCEV is
195;     i64  (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
196; We have added the nssw flag to turn this expression into the following SCEV:
197;     i64 {sext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
198
199; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
200; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
201; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
202
203define void @f4(i16* noalias %a,
204                i16* noalias %b, i64 %N) {
205entry:
206  %TruncN = trunc i64 %N to i32
207  br label %for.body
208
209for.body:                                         ; preds = %for.body, %entry
210  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
211  %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
212
213  %mul = mul i32 %ind1, 2
214  %mul_ext = sext i32 %mul to i64
215
216  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
217  %loadA = load i16, i16* %arrayidxA, align 2
218
219  %arrayidxB = getelementptr i16, i16* %b, i64 %ind
220  %loadB = load i16, i16* %arrayidxB, align 2
221
222  %add = mul i16 %loadA, %loadB
223
224  store i16 %add, i16* %arrayidxA, align 2
225
226  %inc = add nuw nsw i64 %ind, 1
227  %dec = sub i32 %ind1, 1
228
229  %exitcond = icmp eq i64 %inc, %N
230  br i1 %exitcond, label %for.end, label %for.body
231
232for.end:                                          ; preds = %for.body
233  ret void
234}
235
236; The following function is similar to the one above, but has the GEP
237; to pointer %A inbounds. The index %mul doesn't have the nsw flag.
238; This means that the SCEV expression for %mul can wrap and we need
239; a SCEV predicate to continue analysis.
240;
241; We can still analyze this by adding the required no wrap SCEV predicates.
242
243; LAA-LABEL: f5
244; LAA: Memory dependences are safe{{$}}
245; LAA: SCEV assumptions:
246; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
247; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
248
249; LAA: [PSE]  %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul:
250; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
251; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
252
253define void @f5(i16* noalias %a,
254                i16* noalias %b, i64 %N) {
255entry:
256  %TruncN = trunc i64 %N to i32
257  br label %for.body
258
259for.body:                                         ; preds = %for.body, %entry
260  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
261  %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
262
263  %mul = mul i32 %ind1, 2
264
265  %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul
266  %loadA = load i16, i16* %arrayidxA, align 2
267
268  %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
269  %loadB = load i16, i16* %arrayidxB, align 2
270
271  %add = mul i16 %loadA, %loadB
272
273  store i16 %add, i16* %arrayidxA, align 2
274
275  %inc = add nuw nsw i64 %ind, 1
276  %dec = sub i32 %ind1, 1
277
278  %exitcond = icmp eq i64 %inc, %N
279  br i1 %exitcond, label %for.end, label %for.body
280
281for.end:                                          ; preds = %for.body
282  ret void
283}
284