1 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
2 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
3 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
4 
5 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
6 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
7 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
8 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
9 // expected-no-diagnostics
10 #ifndef HEADER
11 #define HEADER
12 
13 void foo(int n);
14 void bar();
15 
16 // CHECK: define{{.*}} void @{{.*}}baz{{.*}}(i32 noundef %n)
17 void baz(int n) {
18   static float a[10];
19   static double b;
20 
21   // CHECK: call i8* @llvm.stacksave()
22   // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
23 
24   // float a_buffer[10][n];
25   // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
26   // double b_buffer[10];
27   // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
28 
29   // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
30   // CHECK: [[LAST:%.+]] = mul nsw i64 9, %
31   // CHECK: [[LAST_REF:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[LAST]]
32   // CHECK: [[BC:%.+]] = bitcast float* [[LAST_REF]] to i8*
33   // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 bitcast ([10 x float]* @_ZZ3baziE1a to i8*), i8* align 4 [[BC]], i64 %{{.+}}, i1 false)
34   // CHECK: [[LAST_REF_B:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 9
35   // CHECK: [[LAST_VAL:%.+]] = load double, double* [[LAST_REF_B]],
36   // CHECK: store double [[LAST_VAL]], double* @_ZZ3baziE1b,
37 
38   // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
39 
40   // float a_buffer[10][n];
41   // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
42 
43   // double b_buffer[10];
44   // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
45   // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
46   // CHECK: call void @llvm.stackrestore(i8*
47 
48 #pragma omp parallel for reduction(inscan, +:a[:n], b)
49   for (int i = 0; i < 10; ++i) {
50     // CHECK: call void @__kmpc_for_static_init_4(
51     // CHECK: call i8* @llvm.stacksave()
52     // CHECK: store float 0.000000e+00, float* %
53     // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
54     // CHECK: br label %[[DISPATCH:[^,]+]]
55     // CHECK: [[INPUT_PHASE:.+]]:
56     // CHECK: call void @{{.+}}foo{{.+}}(
57 
58     // a_buffer[i][0..n] = a_priv[[0..n];
59     // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
60     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
61     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
62     // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF:%.+]], i64 [[IDX]]
63     // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
64     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
65     // CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
66     // CHECK: [[SRC:%.+]] = bitcast float* [[A_PRIV]] to i8*
67     // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
68 
69     // b_buffer[i] = b_priv;
70     // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF:%.+]], i64 [[BASE_IDX]]
71     // CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]],
72     // CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]],
73     // CHECK: br label %[[LOOP_CONTINUE:.+]]
74 
75     // CHECK: [[DISPATCH]]:
76     // CHECK: br label %[[INPUT_PHASE]]
77     // CHECK: [[LOOP_CONTINUE]]:
78     // CHECK: call void @llvm.stackrestore(i8* %
79     // CHECK: call void @__kmpc_for_static_fini(
80     // CHECK: call void @__kmpc_barrier(
81     foo(n);
82 #pragma omp scan inclusive(a[:n], b)
83     // CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
84     // CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
85     // CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32
86     // CHECK: br label %[[OUTER_BODY:[^,]+]]
87     // CHECK: [[OUTER_BODY]]:
88     // CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ]
89     // CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ]
90     // CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]]
91     // CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]]
92     // CHECK: [[INNER_BODY]]:
93     // CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ]
94 
95     // a_buffer[i] += a_buffer[i-pow(2, k)];
96     // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
97     // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
98     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
99     // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
100     // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
101     // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[I]]
102     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
103     // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[IDX_SUB_K2POW]]
104     // CHECK: [[A_BUF_END:%.+]] = getelementptr float, float* [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
105     // CHECK: [[ISEMPTY:%.+]] = icmp eq float* [[A_BUF_IDX]], [[A_BUF_END]]
106     // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
107     // CHECK: [[RED_BODY]]:
108     // CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi float* [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ]
109     // CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi float* [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ]
110     // CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, float* [[A_BUF_IDX_ELEM]],
111     // CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]],
112     // CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]]
113     // CHECK: store float [[RED]], float* [[A_BUF_IDX_ELEM]],
114     // CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, float* [[A_BUF_IDX_ELEM]], i32 1
115     // CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1
116     // CHECK: [[DONE:%.+]] = icmp eq float* [[A_BUF_IDX_NEXT]], [[A_BUF_END]]
117     // CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]]
118     // CHECK: [[RED_DONE]]:
119 
120     // b_buffer[i] += b_buffer[i-pow(2, k)];
121     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
122     // CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, double* [[B_BUF_IDX_SUB_K2POW]],
123     // CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]]
124     // CHECK: store double [[RED]], double* [[B_BUF_IDX]],
125 
126     // --i;
127     // CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1
128     // CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]]
129     // CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]]
130     // CHECK: [[INNER_EXIT]]:
131 
132     // ++k;
133     // CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1
134     // k2pow <<= 1;
135     // CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1
136     // CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]]
137     // CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]]
138     // CHECK: [[OUTER_EXIT]]:
139     bar();
140     // CHECK: call void @__kmpc_for_static_init_4(
141     // CHECK: call i8* @llvm.stacksave()
142     // CHECK: store float 0.000000e+00, float* %
143     // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
144     // CHECK: br label %[[DISPATCH:[^,]+]]
145 
146     // Skip the before scan body.
147     // CHECK: call void @{{.+}}foo{{.+}}(
148 
149     // CHECK: [[EXIT_INSCAN:[^,]+]]:
150     // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
151 
152     // CHECK: [[DISPATCH]]:
153     // a_priv[[0..n] = a_buffer[i][0..n];
154     // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
155     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
156     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
157     // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
158     // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
159     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
160     // CHECK: [[DEST:%.+]] = bitcast float* [[A_PRIV]] to i8*
161     // CHECK: [[SRC:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
162     // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
163 
164     // b_priv = b_buffer[i];
165     // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]]
166     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
167     // CHECK: store double [[B_BUF_IDX_VAL]], double* [[B_PRIV_ADDR]],
168     // CHECK: br label %[[SCAN_PHASE:[^,]+]]
169 
170     // CHECK: [[SCAN_PHASE]]:
171     // CHECK: call void @{{.+}}bar{{.+}}()
172     // CHECK: br label %[[EXIT_INSCAN]]
173 
174     // CHECK: [[LOOP_CONTINUE]]:
175     // CHECK: call void @llvm.stackrestore(i8* %
176     // CHECK: call void @__kmpc_for_static_fini(
177   }
178 
179 #pragma omp parallel for reduction(inscan, +:a[:n], b)
180   for (int i = 0; i < 10; ++i) {
181     // CHECK: call void @__kmpc_for_static_init_4(
182     // CHECK: call i8* @llvm.stacksave()
183     // CHECK: store float 0.000000e+00, float* %
184     // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
185     // CHECK: br label %[[DISPATCH:[^,]+]]
186 
187     // Skip the before scan body.
188     // CHECK: call void @{{.+}}foo{{.+}}(
189 
190     // CHECK: [[EXIT_INSCAN:[^,]+]]:
191 
192     // a_buffer[i][0..n] = a_priv[[0..n];
193     // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
194     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
195     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
196     // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF:%.+]], i64 [[IDX]]
197     // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
198     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
199     // CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
200     // CHECK: [[SRC:%.+]] = bitcast float* [[A_PRIV]] to i8*
201     // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
202 
203     // b_buffer[i] = b_priv;
204     // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF:%.+]], i64 [[BASE_IDX]]
205     // CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]],
206     // CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]],
207     // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
208 
209     // CHECK: [[DISPATCH]]:
210     // CHECK: br label %[[INPUT_PHASE:[^,]+]]
211 
212     // CHECK: [[INPUT_PHASE]]:
213     // CHECK: call void @{{.+}}bar{{.+}}()
214     // CHECK: br label %[[EXIT_INSCAN]]
215 
216     // CHECK: [[LOOP_CONTINUE]]:
217     // CHECK: call void @llvm.stackrestore(i8* %
218     // CHECK: call void @__kmpc_for_static_fini(
219     // CHECK: call void @__kmpc_barrier(
220     foo(n);
221 #pragma omp scan exclusive(a[:n], b)
222     // CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
223     // CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
224     // CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32
225     // CHECK: br label %[[OUTER_BODY:[^,]+]]
226     // CHECK: [[OUTER_BODY]]:
227     // CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ]
228     // CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ]
229     // CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]]
230     // CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]]
231     // CHECK: [[INNER_BODY]]:
232     // CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ]
233 
234     // a_buffer[i] += a_buffer[i-pow(2, k)];
235     // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
236     // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
237     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
238     // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
239     // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
240     // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[I]]
241     // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
242     // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[IDX_SUB_K2POW]]
243     // CHECK: [[A_BUF_END:%.+]] = getelementptr float, float* [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
244     // CHECK: [[ISEMPTY:%.+]] = icmp eq float* [[A_BUF_IDX]], [[A_BUF_END]]
245     // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
246     // CHECK: [[RED_BODY]]:
247     // CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi float* [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ]
248     // CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi float* [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ]
249     // CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, float* [[A_BUF_IDX_ELEM]],
250     // CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]],
251     // CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]]
252     // CHECK: store float [[RED]], float* [[A_BUF_IDX_ELEM]],
253     // CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, float* [[A_BUF_IDX_ELEM]], i32 1
254     // CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1
255     // CHECK: [[DONE:%.+]] = icmp eq float* [[A_BUF_IDX_NEXT]], [[A_BUF_END]]
256     // CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]]
257     // CHECK: [[RED_DONE]]:
258 
259     // b_buffer[i] += b_buffer[i-pow(2, k)];
260     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
261     // CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, double* [[B_BUF_IDX_SUB_K2POW]],
262     // CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]]
263     // CHECK: store double [[RED]], double* [[B_BUF_IDX]],
264 
265     // --i;
266     // CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1
267     // CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]]
268     // CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]]
269     // CHECK: [[INNER_EXIT]]:
270 
271     // ++k;
272     // CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1
273     // k2pow <<= 1;
274     // CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1
275     // CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]]
276     // CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]]
277     // CHECK: [[OUTER_EXIT]]:
278     bar();
279     // CHECK: call void @__kmpc_for_static_init_4(
280     // CHECK: call i8* @llvm.stacksave()
281     // CHECK: store float 0.000000e+00, float* %
282     // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
283     // CHECK: br label %[[DISPATCH:[^,]+]]
284 
285     // CHECK: [[SCAN_PHASE:.+]]:
286     // CHECK: call void @{{.+}}foo{{.+}}(
287     // CHECK: br label %[[LOOP_CONTINUE:.+]]
288 
289     // CHECK: [[DISPATCH]]:
290     // if (i >0)
291     //   a_priv[[0..n] = a_buffer[i-1][0..n];
292     // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
293     // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
294     // CHECK: [[CMP:%.+]] = icmp eq i64 [[BASE_IDX]], 0
295     // CHECK: br i1 [[CMP]], label %[[IF_DONE:[^,]+]], label %[[IF_THEN:[^,]+]]
296     // CHECK: [[IF_THEN]]:
297     // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1
298     // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]]
299     // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
300     // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
301     // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
302     // CHECK: [[DEST:%.+]] = bitcast float* [[A_PRIV]] to i8*
303     // CHECK: [[SRC:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
304     // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
305 
306     // b_priv = b_buffer[i];
307     // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
308     // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
309     // CHECK: store double [[B_BUF_IDX_VAL]], double* [[B_PRIV_ADDR]],
310     // CHECK: br label %[[SCAN_PHASE]]
311 
312     // CHECK: [[LOOP_CONTINUE]]:
313     // CHECK: call void @llvm.stackrestore(i8* %
314     // CHECK: call void @__kmpc_for_static_fini(
315   }
316 }
317 
318 #endif
319 
320