1; RUN: opt -passes='print-access-info' -aa-pipeline='basic-aa' -disable-output < %s  2>&1 | FileCheck %s --check-prefix=LAA
2
3target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
4
5; For this loop:
6;   unsigned index = 0;
7;   for (int i = 0; i < n; i++) {
8;    A[2 * index] = A[2 * index] + B[i];
9;    index++;
10;   }
11;
12; SCEV is unable to prove that A[2 * i] does not overflow.
13;
14; Analyzing the IR does not help us because the GEPs are not
15; affine AddRecExprs. However, we can turn them into AddRecExprs
16; using SCEV Predicates.
17;
18; Once we have an affine expression we need to add an additional NUSW
19; to check that the pointers don't wrap since the GEPs are not
20; inbound.
21
22; LAA-LABEL: f1
23; LAA: Memory dependences are safe{{$}}
24; LAA: SCEV assumptions:
25; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nusw>
26; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
27
28; The expression for %mul_ext as analyzed by SCEV is
29;    (zext i32 {0,+,2}<%for.body> to i64)
30; We have added the nusw flag to turn this expression into the SCEV expression:
31;    i64 {0,+,2}<%for.body>
32
33; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
34; LAA-NEXT: ((2 * (zext i32 {0,+,2}<%for.body> to i64))<nuw><nsw> + %a)
35; LAA-NEXT: --> {%a,+,4}<%for.body>
36
37
38define void @f1(i16* noalias %a,
39                i16* noalias %b, i64 %N) {
40entry:
41  br label %for.body
42
43for.body:                                         ; preds = %for.body, %entry
44  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
45  %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
46
47  %mul = mul i32 %ind1, 2
48  %mul_ext = zext i32 %mul to i64
49
50  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
51  %loadA = load i16, i16* %arrayidxA, align 2
52
53  %arrayidxB = getelementptr i16, i16* %b, i64 %ind
54  %loadB = load i16, i16* %arrayidxB, align 2
55
56  %add = mul i16 %loadA, %loadB
57
58  store i16 %add, i16* %arrayidxA, align 2
59
60  %inc = add nuw nsw i64 %ind, 1
61  %inc1 = add i32 %ind1, 1
62
63  %exitcond = icmp eq i64 %inc, %N
64  br i1 %exitcond, label %for.end, label %for.body
65
66for.end:                                          ; preds = %for.body
67  ret void
68}
69
70; For this loop:
71;   unsigned index = n;
72;   for (int i = 0; i < n; i++) {
73;    A[2 * index] = A[2 * index] + B[i];
74;    index--;
75;   }
76;
77; the SCEV expression for 2 * index is not an AddRecExpr
78; (and implictly not affine). However, we are able to make assumptions
79; that will turn the expression into an affine one and continue the
80; analysis.
81;
82; Once we have an affine expression we need to add an additional NUSW
83; to check that the pointers don't wrap since the GEPs are not
84; inbounds.
85;
86; This loop has a negative stride for A, and the nusw flag is required in
87; order to properly extend the increment from i32 -4 to i64 -4.
88
89; LAA-LABEL: f2
90; LAA: Memory dependences are safe{{$}}
91; LAA: SCEV assumptions:
92; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nusw>
93; LAA-NEXT: {((4 * (zext i31 (trunc i64 %N to i31) to i64))<nuw><nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
94
95; The expression for %mul_ext as analyzed by SCEV is
96;     (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
97; We have added the nusw flag to turn this expression into the following SCEV:
98;     i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
99
100; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
101; LAA-NEXT: ((2 * (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nuw><nsw> + %a)
102; LAA-NEXT: --> {((4 * (zext i31 (trunc i64 %N to i31) to i64))<nuw><nsw> + %a),+,-4}<%for.body>
103
104define void @f2(i16* noalias %a,
105                i16* noalias %b, i64 %N) {
106entry:
107  %TruncN = trunc i64 %N to i32
108  br label %for.body
109
110for.body:                                         ; preds = %for.body, %entry
111  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
112  %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
113
114  %mul = mul i32 %ind1, 2
115  %mul_ext = zext i32 %mul to i64
116
117  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
118  %loadA = load i16, i16* %arrayidxA, align 2
119
120  %arrayidxB = getelementptr i16, i16* %b, i64 %ind
121  %loadB = load i16, i16* %arrayidxB, align 2
122
123  %add = mul i16 %loadA, %loadB
124
125  store i16 %add, i16* %arrayidxA, align 2
126
127  %inc = add nuw nsw i64 %ind, 1
128  %dec = sub i32 %ind1, 1
129
130  %exitcond = icmp eq i64 %inc, %N
131  br i1 %exitcond, label %for.end, label %for.body
132
133for.end:                                          ; preds = %for.body
134  ret void
135}
136
137; We replicate the tests above, but this time sign extend 2 * index instead
138; of zero extending it.
139
140; LAA-LABEL: f3
141; LAA: Memory dependences are safe{{$}}
142; LAA: SCEV assumptions:
143; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nssw>
144; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
145
146; The expression for %mul_ext as analyzed by SCEV is
147;     i64 (sext i32 {0,+,2}<%for.body> to i64)
148; We have added the nssw flag to turn this expression into the following SCEV:
149;     i64 {0,+,2}<%for.body>
150
151; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
152; LAA-NEXT: ((2 * (sext i32 {0,+,2}<%for.body> to i64))<nsw> + %a)
153; LAA-NEXT: --> {%a,+,4}<%for.body>
154
155define void @f3(i16* noalias %a,
156                i16* noalias %b, i64 %N) {
157entry:
158  br label %for.body
159
160for.body:                                         ; preds = %for.body, %entry
161  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
162  %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
163
164  %mul = mul i32 %ind1, 2
165  %mul_ext = sext i32 %mul to i64
166
167  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
168  %loadA = load i16, i16* %arrayidxA, align 2
169
170  %arrayidxB = getelementptr i16, i16* %b, i64 %ind
171  %loadB = load i16, i16* %arrayidxB, align 2
172
173  %add = mul i16 %loadA, %loadB
174
175  store i16 %add, i16* %arrayidxA, align 2
176
177  %inc = add nuw nsw i64 %ind, 1
178  %inc1 = add i32 %ind1, 1
179
180  %exitcond = icmp eq i64 %inc, %N
181  br i1 %exitcond, label %for.end, label %for.body
182
183for.end:                                          ; preds = %for.body
184  ret void
185}
186
187; LAA-LABEL: f4
188; LAA: Memory dependences are safe{{$}}
189; LAA: SCEV assumptions:
190; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
191; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
192
193; The expression for %mul_ext as analyzed by SCEV is
194;     i64  (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
195; We have added the nssw flag to turn this expression into the following SCEV:
196;     i64 {sext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
197
198; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
199; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
200; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
201
202define void @f4(i16* noalias %a,
203                i16* noalias %b, i64 %N) {
204entry:
205  %TruncN = trunc i64 %N to i32
206  br label %for.body
207
208for.body:                                         ; preds = %for.body, %entry
209  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
210  %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
211
212  %mul = mul i32 %ind1, 2
213  %mul_ext = sext i32 %mul to i64
214
215  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
216  %loadA = load i16, i16* %arrayidxA, align 2
217
218  %arrayidxB = getelementptr i16, i16* %b, i64 %ind
219  %loadB = load i16, i16* %arrayidxB, align 2
220
221  %add = mul i16 %loadA, %loadB
222
223  store i16 %add, i16* %arrayidxA, align 2
224
225  %inc = add nuw nsw i64 %ind, 1
226  %dec = sub i32 %ind1, 1
227
228  %exitcond = icmp eq i64 %inc, %N
229  br i1 %exitcond, label %for.end, label %for.body
230
231for.end:                                          ; preds = %for.body
232  ret void
233}
234
235; The following function is similar to the one above, but has the GEP
236; to pointer %A inbounds. The index %mul doesn't have the nsw flag.
237; This means that the SCEV expression for %mul can wrap and we need
238; a SCEV predicate to continue analysis.
239;
240; We can still analyze this by adding the required no wrap SCEV predicates.
241
242; LAA-LABEL: f5
243; LAA: Memory dependences are safe{{$}}
244; LAA: SCEV assumptions:
245; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
246; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
247
248; LAA: [PSE]  %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul:
249; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)
250; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body>
251
252define void @f5(i16* noalias %a,
253                i16* noalias %b, i64 %N) {
254entry:
255  %TruncN = trunc i64 %N to i32
256  br label %for.body
257
258for.body:                                         ; preds = %for.body, %entry
259  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
260  %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
261
262  %mul = mul i32 %ind1, 2
263
264  %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul
265  %loadA = load i16, i16* %arrayidxA, align 2
266
267  %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
268  %loadB = load i16, i16* %arrayidxB, align 2
269
270  %add = mul i16 %loadA, %loadB
271
272  store i16 %add, i16* %arrayidxA, align 2
273
274  %inc = add nuw nsw i64 %ind, 1
275  %dec = sub i32 %ind1, 1
276
277  %exitcond = icmp eq i64 %inc, %N
278  br i1 %exitcond, label %for.end, label %for.body
279
280for.end:                                          ; preds = %for.body
281  ret void
282}
283