1; RUN: opt -disable-output -passes='print-access-info' %s 2>&1 | FileCheck %s
2
3target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
4
5; CHECK-LABEL: function 'forked_ptrs_simple':
6; CHECK-NEXT:  loop:
7; CHECK-NEXT:    Memory dependences are safe with run-time checks
8; CHECK-NEXT:    Dependences:
9; CHECK-NEXT:    Run-time memory checks:
10; CHECK-NEXT:    Check 0:
11; CHECK-NEXT:      Comparing group ([[G1:.+]]):
12; CHECK-NEXT:        %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
13; CHECK-NEXT:        %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
14; CHECK-NEXT:      Against group ([[G2:.+]]):
15; CHECK-NEXT:        %select = select i1 %cmp, float* %gep.1, float* %gep.2
16; CHECK-NEXT:    Check 1:
17; CHECK-NEXT:      Comparing group ([[G1]]):
18; CHECK-NEXT:        %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
19; CHECK-NEXT:        %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
20; CHECK-NEXT:      Against group ([[G3:.+]]):
21; CHECK-NEXT:        %select = select i1 %cmp, float* %gep.1, float* %gep.2
22; CHECK-NEXT:    Grouped accesses:
23; CHECK-NEXT:      Group [[G1]]
24; CHECK-NEXT:        (Low: %Dest High: (400 + %Dest))
25; CHECK-NEXT:          Member: {%Dest,+,4}<nuw><%loop>
26; CHECK-NEXT:          Member: {%Dest,+,4}<nuw><%loop>
27; CHECK-NEXT:      Group [[G2]]:
28; CHECK-NEXT:        (Low: %Base1 High: (400 + %Base1))
29; CHECK-NEXT:          Member: {%Base1,+,4}<nw><%loop>
30; CHECK-NEXT:      Group [[G3]]:
31; CHECK-NEXT:        (Low: %Base2 High: (400 + %Base2))
32; CHECK-NEXT:          Member: {%Base2,+,4}<nw><%loop>
33; CHECK-EMPTY:
34; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
35; CHECK-NEXT:    SCEV assumptions:
36; CHECK-EMPTY:
37; CHECK-NEXT:    Expressions re-written:
38
39define void @forked_ptrs_simple(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* %Dest) {
40entry:
41  br label %loop
42
43loop:
44  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
45  %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
46  %l.Dest = load float, float* %gep.Dest
47  %cmp = fcmp une float %l.Dest, 0.0
48  %gep.1 = getelementptr inbounds float, float* %Base1, i64 %iv
49  %gep.2 = getelementptr inbounds float, float* %Base2, i64 %iv
50  %select = select i1 %cmp, float* %gep.1, float* %gep.2
51  %sink = load float, float* %select, align 4
52  store float %sink, float* %gep.Dest, align 4
53  %iv.next = add nuw nsw i64 %iv, 1
54  %exitcond.not = icmp eq i64 %iv.next, 100
55  br i1 %exitcond.not, label %exit, label %loop
56
57exit:
58  ret void
59}
60
61
62; CHECK-LABEL: function 'forked_ptrs_different_base_same_offset':
63; CHECK-NEXT:  for.body:
64; CHECK-NEXT:    Report: cannot identify array bounds
65; CHECK-NEXT:    Dependences:
66; CHECK-NEXT:    Run-time memory checks:
67; CHECK-NEXT:    Grouped accesses:
68; CHECK-EMPTY:
69; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
70; CHECK-NEXT:    SCEV assumptions:
71; CHECK-EMPTY:
72; CHECK-NEXT:    Expressions re-written:
73
74;;;; Derived from the following C code
75;; void forked_ptrs_different_base_same_offset(float *A, float *B, float *C, int *D) {
76;;   for (int i=0; i<100; i++) {
77;;     if (D[i] != 0) {
78;;       C[i] = A[i];
79;;     } else {
80;;       C[i] = B[i];
81;;     }
82;;   }
83;; }
84
85define dso_local void @forked_ptrs_different_base_same_offset(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) {
86entry:
87  br label %for.body
88
89for.cond.cleanup:
90  ret void
91
92for.body:
93  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
94  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
95  %0 = load i32, i32* %arrayidx, align 4
96  %cmp1.not = icmp eq i32 %0, 0
97  %spec.select = select i1 %cmp1.not, float* %Base2, float* %Base1
98  %.sink.in = getelementptr inbounds float, float* %spec.select, i64 %indvars.iv
99  %.sink = load float, float* %.sink.in, align 4
100  %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
101  store float %.sink, float* %1, align 4
102  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
103  %exitcond.not = icmp eq i64 %indvars.iv.next, 100
104  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
105}
106
107; CHECK-LABEL: function 'forked_ptrs_same_base_different_offset':
108; CHECK-NEXT:   for.body:
109; CHECK-NEXT:     Report: cannot identify array bounds
110; CHECK-NEXT:     Dependences:
111; CHECK-NEXT:     Run-time memory checks:
112; CHECK-NEXT:     Grouped accesses:
113; CHECK-EMPTY:
114; CHECK-NEXT:     Non vectorizable stores to invariant address were not found in loop.
115; CHECK-NEXT:     SCEV assumptions:
116; CHECK-EMPTY:
117; CHECK-NEXT:     Expressions re-written:
118
119;;;; Derived from the following C code
120;; void forked_ptrs_same_base_different_offset(float *A, float *B, int *C) {
121;;   int offset;
122;;   for (int i = 0; i < 100; i++) {
123;;     if (C[i] != 0)
124;;       offset = i;
125;;     else
126;;       offset = i+1;
127;;     B[i] = A[offset];
128;;   }
129;; }
130
131define dso_local void @forked_ptrs_same_base_different_offset(float* nocapture readonly %Base, float* nocapture %Dest, i32* nocapture readonly %Preds) {
132entry:
133  br label %for.body
134
135for.cond.cleanup:                                 ; preds = %for.body
136  ret void
137
138for.body:                                         ; preds = %entry, %for.body
139  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
140  %i.014 = phi i32 [ 0, %entry ], [ %add, %for.body ]
141  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
142  %0 = load i32, i32* %arrayidx, align 4
143  %cmp1.not = icmp eq i32 %0, 0
144  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
145  %add = add nuw nsw i32 %i.014, 1
146  %1 = trunc i64 %indvars.iv to i32
147  %offset.0 = select i1 %cmp1.not, i32 %add, i32 %1
148  %idxprom213 = zext i32 %offset.0 to i64
149  %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213
150  %2 = load float, float* %arrayidx3, align 4
151  %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
152  store float %2, float* %arrayidx5, align 4
153  %exitcond.not = icmp eq i64 %indvars.iv.next, 100
154  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
155}
156
157;;;; Cases that can be handled by a forked pointer but are not currently allowed.
158
159; CHECK-LABEL: function 'forked_ptrs_uniform_and_strided_forks':
160; CHECK-NEXT:  for.body:
161; CHECK-NEXT:    Report: cannot identify array bounds
162; CHECK-NEXT:    Dependences:
163; CHECK-NEXT:    Run-time memory checks:
164; CHECK-NEXT:    Grouped accesses:
165; CHECK-EMPTY:
166; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
167; CHECK-NEXT:    SCEV assumptions:
168; CHECK-EMPTY:
169; CHECK-NEXT:    Expressions re-written:
170
171;;;; Derived from forked_ptrs_same_base_different_offset with a manually
172;;;; added uniform offset and a mul to provide a stride
173
174define dso_local void @forked_ptrs_uniform_and_strided_forks(float* nocapture readonly %Base, float* nocapture %Dest, i32* nocapture readonly %Preds) {
175entry:
176  br label %for.body
177
178for.cond.cleanup:                                 ; preds = %for.body
179  ret void
180
181for.body:                                         ; preds = %entry, %for.body
182  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
183  %i.014 = phi i32 [ 0, %entry ], [ %add, %for.body ]
184  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
185  %0 = load i32, i32* %arrayidx, align 4
186  %cmp1.not = icmp eq i32 %0, 0
187  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
188  %add = add nuw nsw i32 %i.014, 1
189  %1 = trunc i64 %indvars.iv to i32
190  %mul = mul i32 %1, 3
191  %offset.0 = select i1 %cmp1.not, i32 4, i32 %mul
192  %idxprom213 = sext i32 %offset.0 to i64
193  %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213
194  %2 = load float, float* %arrayidx3, align 4
195  %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
196  store float %2, float* %arrayidx5, align 4
197  %exitcond.not = icmp eq i64 %indvars.iv.next, 100
198  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
199}
200
201; CHECK-LABEL:  function 'forked_ptrs_gather_and_contiguous_forks':
202; CHECK-NEXT:   for.body:
203; CHECK-NEXT:     Report: cannot identify array bounds
204; CHECK-NEXT:     Dependences:
205; CHECK-NEXT:     Run-time memory checks:
206; CHECK-NEXT:     Grouped accesses:
207; CHECK-EMPTY:
208; CHECK-NEXT:     Non vectorizable stores to invariant address were not found in loop.
209; CHECK-NEXT:     SCEV assumptions:
210; CHECK-EMPTY:
211; CHECK-NEXT:     Expressions re-written:
212
213;;;; Derived from forked_ptrs_same_base_different_offset with a gather
214;;;; added using Preds as an index array in addition to the per-iteration
215;;;; condition.
216
217define dso_local void @forked_ptrs_gather_and_contiguous_forks(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) {
218entry:
219  br label %for.body
220
221for.cond.cleanup:                                 ; preds = %for.body
222  ret void
223
224for.body:                                         ; preds = %entry, %for.body
225  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
226  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
227  %0 = load i32, i32* %arrayidx, align 4
228  %cmp1.not = icmp eq i32 %0, 0
229  %arrayidx9 = getelementptr inbounds float, float* %Base2, i64 %indvars.iv
230  %idxprom4 = sext i32 %0 to i64
231  %arrayidx5 = getelementptr inbounds float, float* %Base1, i64 %idxprom4
232  %.sink.in = select i1 %cmp1.not, float* %arrayidx9, float* %arrayidx5
233  %.sink = load float, float* %.sink.in, align 4
234  %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
235  store float %.sink, float* %1, align 4
236  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
237  %exitcond.not = icmp eq i64 %indvars.iv.next, 100
238  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
239}
240