1*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
2*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
3*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
4cb90e6a7SAlexey Bataev
5*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
6*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
7*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
8cb90e6a7SAlexey Bataev // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
9cb90e6a7SAlexey Bataev // expected-no-diagnostics
10cb90e6a7SAlexey Bataev #ifndef HEADER
11cb90e6a7SAlexey Bataev #define HEADER
12cb90e6a7SAlexey Bataev
13cb90e6a7SAlexey Bataev void foo();
14cb90e6a7SAlexey Bataev void bar();
15cb90e6a7SAlexey Bataev
161b1c8d83Shyeongyu kim // CHECK: define{{.*}} void @{{.*}}baz{{.*}}(i32 noundef %n)
baz(int n)17cb90e6a7SAlexey Bataev void baz(int n) {
18cb90e6a7SAlexey Bataev static float a[10];
19cb90e6a7SAlexey Bataev static double b;
20cb90e6a7SAlexey Bataev
2110c7b9f6SAlexey Bataev // CHECK: call i8* @llvm.stacksave()
2210c7b9f6SAlexey Bataev // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
2310c7b9f6SAlexey Bataev
2410c7b9f6SAlexey Bataev // float a_buffer[10][n];
2510c7b9f6SAlexey Bataev // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
2610c7b9f6SAlexey Bataev // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
2710c7b9f6SAlexey Bataev
28cb90e6a7SAlexey Bataev // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
29cb90e6a7SAlexey Bataev
30cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
31cb90e6a7SAlexey Bataev
32cb90e6a7SAlexey Bataev // float a_buffer[10][n];
33cb90e6a7SAlexey Bataev // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
34cb90e6a7SAlexey Bataev
35cb90e6a7SAlexey Bataev // double b_buffer[10];
36cb90e6a7SAlexey Bataev // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
3710c7b9f6SAlexey Bataev // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
3810c7b9f6SAlexey Bataev // CHECK: call void @llvm.stackrestore(i8*
3910c7b9f6SAlexey Bataev
40cb90e6a7SAlexey Bataev #pragma omp parallel for simd reduction(inscan, +:a[:n], b)
41cb90e6a7SAlexey Bataev for (int i = 0; i < 10; ++i) {
42cb90e6a7SAlexey Bataev // CHECK: call void @__kmpc_for_static_init_4(
43cb90e6a7SAlexey Bataev // CHECK: call i8* @llvm.stacksave()
44cb90e6a7SAlexey Bataev // CHECK: store float 0.000000e+00, float* %
45cb90e6a7SAlexey Bataev // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
46cb90e6a7SAlexey Bataev // CHECK: br label %[[DISPATCH:[^,]+]]
47cb90e6a7SAlexey Bataev // CHECK: [[INPUT_PHASE:.+]]:
48cb90e6a7SAlexey Bataev // CHECK: call void @{{.+}}foo{{.+}}()
49cb90e6a7SAlexey Bataev
50cb90e6a7SAlexey Bataev // a_buffer[i][0..n] = a_priv[[0..n];
51cb90e6a7SAlexey Bataev // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
52cb90e6a7SAlexey Bataev // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
5310c7b9f6SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
5410c7b9f6SAlexey Bataev // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF:%.+]], i64 [[IDX]]
55cb90e6a7SAlexey Bataev // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
56cb90e6a7SAlexey Bataev // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
57cb90e6a7SAlexey Bataev // CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
58cb90e6a7SAlexey Bataev // CHECK: [[SRC:%.+]] = bitcast float* [[A_PRIV]] to i8*
59cb90e6a7SAlexey Bataev // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
60cb90e6a7SAlexey Bataev
61cb90e6a7SAlexey Bataev // b_buffer[i] = b_priv;
6210c7b9f6SAlexey Bataev // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF:%.+]], i64 [[BASE_IDX]]
63cb90e6a7SAlexey Bataev // CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]],
64cb90e6a7SAlexey Bataev // CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]],
65cb90e6a7SAlexey Bataev // CHECK: br label %[[LOOP_CONTINUE:.+]]
66cb90e6a7SAlexey Bataev
67cb90e6a7SAlexey Bataev // CHECK: [[DISPATCH]]:
68cb90e6a7SAlexey Bataev // CHECK: br label %[[INPUT_PHASE]]
69cb90e6a7SAlexey Bataev // CHECK: [[LOOP_CONTINUE]]:
70cb90e6a7SAlexey Bataev // CHECK: call void @llvm.stackrestore(i8* %
71cb90e6a7SAlexey Bataev // CHECK: call void @__kmpc_for_static_fini(
72cb90e6a7SAlexey Bataev // CHECK: call void @__kmpc_barrier(
73cb90e6a7SAlexey Bataev foo();
74cb90e6a7SAlexey Bataev #pragma omp scan inclusive(a[:n], b)
75cb90e6a7SAlexey Bataev // CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
76cb90e6a7SAlexey Bataev // CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
77cb90e6a7SAlexey Bataev // CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32
78cb90e6a7SAlexey Bataev // CHECK: br label %[[OUTER_BODY:[^,]+]]
79cb90e6a7SAlexey Bataev // CHECK: [[OUTER_BODY]]:
80cb90e6a7SAlexey Bataev // CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ]
81cb90e6a7SAlexey Bataev // CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ]
82cb90e6a7SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]]
83cb90e6a7SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]]
84cb90e6a7SAlexey Bataev // CHECK: [[INNER_BODY]]:
85cb90e6a7SAlexey Bataev // CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ]
86cb90e6a7SAlexey Bataev
87cb90e6a7SAlexey Bataev // a_buffer[i] += a_buffer[i-pow(2, k)];
88cb90e6a7SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
89cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
90cb90e6a7SAlexey Bataev // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
91cb90e6a7SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
92cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
93cb90e6a7SAlexey Bataev // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[I]]
94cb90e6a7SAlexey Bataev // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
95cb90e6a7SAlexey Bataev // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[IDX_SUB_K2POW]]
96cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_END:%.+]] = getelementptr float, float* [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
97cb90e6a7SAlexey Bataev // CHECK: [[ISEMPTY:%.+]] = icmp eq float* [[A_BUF_IDX]], [[A_BUF_END]]
98cb90e6a7SAlexey Bataev // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
99cb90e6a7SAlexey Bataev // CHECK: [[RED_BODY]]:
100cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi float* [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ]
101cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi float* [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ]
102cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, float* [[A_BUF_IDX_ELEM]],
103cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]],
104cb90e6a7SAlexey Bataev // CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]]
105cb90e6a7SAlexey Bataev // CHECK: store float [[RED]], float* [[A_BUF_IDX_ELEM]],
106cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, float* [[A_BUF_IDX_ELEM]], i32 1
107cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1
108cb90e6a7SAlexey Bataev // CHECK: [[DONE:%.+]] = icmp eq float* [[A_BUF_IDX_NEXT]], [[A_BUF_END]]
109cb90e6a7SAlexey Bataev // CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]]
110cb90e6a7SAlexey Bataev // CHECK: [[RED_DONE]]:
111cb90e6a7SAlexey Bataev
112cb90e6a7SAlexey Bataev // b_buffer[i] += b_buffer[i-pow(2, k)];
113cb90e6a7SAlexey Bataev // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
114cb90e6a7SAlexey Bataev // CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, double* [[B_BUF_IDX_SUB_K2POW]],
115cb90e6a7SAlexey Bataev // CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]]
116cb90e6a7SAlexey Bataev // CHECK: store double [[RED]], double* [[B_BUF_IDX]],
117cb90e6a7SAlexey Bataev
118cb90e6a7SAlexey Bataev // --i;
119cb90e6a7SAlexey Bataev // CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1
120cb90e6a7SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]]
121cb90e6a7SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]]
122cb90e6a7SAlexey Bataev // CHECK: [[INNER_EXIT]]:
123cb90e6a7SAlexey Bataev
124cb90e6a7SAlexey Bataev // ++k;
125cb90e6a7SAlexey Bataev // CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1
126cb90e6a7SAlexey Bataev // k2pow <<= 1;
127cb90e6a7SAlexey Bataev // CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1
128cb90e6a7SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]]
129cb90e6a7SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]]
130cb90e6a7SAlexey Bataev // CHECK: [[OUTER_EXIT]]:
131cb90e6a7SAlexey Bataev bar();
132cb90e6a7SAlexey Bataev // CHECK: call void @__kmpc_for_static_init_4(
133cb90e6a7SAlexey Bataev // CHECK: call i8* @llvm.stacksave()
134cb90e6a7SAlexey Bataev // CHECK: store float 0.000000e+00, float* %
135cb90e6a7SAlexey Bataev // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
136cb90e6a7SAlexey Bataev // CHECK: br label %[[DISPATCH:[^,]+]]
137cb90e6a7SAlexey Bataev
138cb90e6a7SAlexey Bataev // Skip the before scan body.
139cb90e6a7SAlexey Bataev // CHECK: call void @{{.+}}foo{{.+}}()
140cb90e6a7SAlexey Bataev
141cb90e6a7SAlexey Bataev // CHECK: [[EXIT_INSCAN:[^,]+]]:
142cb90e6a7SAlexey Bataev // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
143cb90e6a7SAlexey Bataev
144cb90e6a7SAlexey Bataev // CHECK: [[DISPATCH]]:
145cb90e6a7SAlexey Bataev // a_priv[[0..n] = a_buffer[i][0..n];
146cb90e6a7SAlexey Bataev // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
147cb90e6a7SAlexey Bataev // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
148cb90e6a7SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
149cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
150cb90e6a7SAlexey Bataev // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
151cb90e6a7SAlexey Bataev // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
152cb90e6a7SAlexey Bataev // CHECK: [[DEST:%.+]] = bitcast float* [[A_PRIV]] to i8*
153cb90e6a7SAlexey Bataev // CHECK: [[SRC:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
154cb90e6a7SAlexey Bataev // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
155cb90e6a7SAlexey Bataev
156cb90e6a7SAlexey Bataev // b_priv = b_buffer[i];
157cb90e6a7SAlexey Bataev // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]]
158cb90e6a7SAlexey Bataev // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
159cb90e6a7SAlexey Bataev // CHECK: store double [[B_BUF_IDX_VAL]], double* [[B_PRIV_ADDR]],
160cb90e6a7SAlexey Bataev // CHECK: br label %[[SCAN_PHASE:[^,]+]]
161cb90e6a7SAlexey Bataev
162cb90e6a7SAlexey Bataev // CHECK: [[SCAN_PHASE]]:
163cb90e6a7SAlexey Bataev // CHECK: call void @{{.+}}bar{{.+}}()
164cb90e6a7SAlexey Bataev // CHECK: br label %[[EXIT_INSCAN]]
165cb90e6a7SAlexey Bataev
166cb90e6a7SAlexey Bataev // CHECK: [[LOOP_CONTINUE]]:
167cb90e6a7SAlexey Bataev // CHECK: call void @llvm.stackrestore(i8* %
168cb90e6a7SAlexey Bataev // CHECK: call void @__kmpc_for_static_fini(
169cb90e6a7SAlexey Bataev }
170cb90e6a7SAlexey Bataev
171cb90e6a7SAlexey Bataev #pragma omp parallel for simd reduction(inscan, +:a[:n], b)
172cb90e6a7SAlexey Bataev for (int i = 0; i < 10; ++i) {
173cb90e6a7SAlexey Bataev // CHECK: call void @__kmpc_for_static_init_4(
174cb90e6a7SAlexey Bataev // CHECK: call i8* @llvm.stacksave()
175cb90e6a7SAlexey Bataev // CHECK: store float 0.000000e+00, float* %
176cb90e6a7SAlexey Bataev // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
177cb90e6a7SAlexey Bataev // CHECK: br label %[[DISPATCH:[^,]+]]
178cb90e6a7SAlexey Bataev
179cb90e6a7SAlexey Bataev // Skip the before scan body.
180cb90e6a7SAlexey Bataev // CHECK: call void @{{.+}}foo{{.+}}()
181cb90e6a7SAlexey Bataev
182cb90e6a7SAlexey Bataev // CHECK: [[EXIT_INSCAN:[^,]+]]:
183cb90e6a7SAlexey Bataev
184cb90e6a7SAlexey Bataev // a_buffer[i][0..n] = a_priv[[0..n];
185cb90e6a7SAlexey Bataev // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
186cb90e6a7SAlexey Bataev // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
18710c7b9f6SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
18810c7b9f6SAlexey Bataev // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF:%.+]], i64 [[IDX]]
189cb90e6a7SAlexey Bataev // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
190cb90e6a7SAlexey Bataev // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
191cb90e6a7SAlexey Bataev // CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
192cb90e6a7SAlexey Bataev // CHECK: [[SRC:%.+]] = bitcast float* [[A_PRIV]] to i8*
193cb90e6a7SAlexey Bataev // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
194cb90e6a7SAlexey Bataev
195cb90e6a7SAlexey Bataev // b_buffer[i] = b_priv;
19610c7b9f6SAlexey Bataev // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF:%.+]], i64 [[BASE_IDX]]
197cb90e6a7SAlexey Bataev // CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]],
198cb90e6a7SAlexey Bataev // CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]],
199cb90e6a7SAlexey Bataev // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
200cb90e6a7SAlexey Bataev
201cb90e6a7SAlexey Bataev // CHECK: [[DISPATCH]]:
202cb90e6a7SAlexey Bataev // CHECK: br label %[[INPUT_PHASE:[^,]+]]
203cb90e6a7SAlexey Bataev
204cb90e6a7SAlexey Bataev // CHECK: [[INPUT_PHASE]]:
205cb90e6a7SAlexey Bataev // CHECK: call void @{{.+}}bar{{.+}}()
206cb90e6a7SAlexey Bataev // CHECK: br label %[[EXIT_INSCAN]]
207cb90e6a7SAlexey Bataev
208cb90e6a7SAlexey Bataev // CHECK: [[LOOP_CONTINUE]]:
209cb90e6a7SAlexey Bataev // CHECK: call void @llvm.stackrestore(i8* %
210cb90e6a7SAlexey Bataev // CHECK: call void @__kmpc_for_static_fini(
211cb90e6a7SAlexey Bataev // CHECK: call void @__kmpc_barrier(
212cb90e6a7SAlexey Bataev foo();
213cb90e6a7SAlexey Bataev #pragma omp scan exclusive(a[:n], b)
214cb90e6a7SAlexey Bataev // CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
215cb90e6a7SAlexey Bataev // CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
216cb90e6a7SAlexey Bataev // CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32
217cb90e6a7SAlexey Bataev // CHECK: br label %[[OUTER_BODY:[^,]+]]
218cb90e6a7SAlexey Bataev // CHECK: [[OUTER_BODY]]:
219cb90e6a7SAlexey Bataev // CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ]
220cb90e6a7SAlexey Bataev // CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ]
221cb90e6a7SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]]
222cb90e6a7SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]]
223cb90e6a7SAlexey Bataev // CHECK: [[INNER_BODY]]:
224cb90e6a7SAlexey Bataev // CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ]
225cb90e6a7SAlexey Bataev
226cb90e6a7SAlexey Bataev // a_buffer[i] += a_buffer[i-pow(2, k)];
227cb90e6a7SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
228cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
229cb90e6a7SAlexey Bataev // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
230cb90e6a7SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
231cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
232cb90e6a7SAlexey Bataev // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[I]]
233cb90e6a7SAlexey Bataev // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
234cb90e6a7SAlexey Bataev // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[IDX_SUB_K2POW]]
235cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_END:%.+]] = getelementptr float, float* [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
236cb90e6a7SAlexey Bataev // CHECK: [[ISEMPTY:%.+]] = icmp eq float* [[A_BUF_IDX]], [[A_BUF_END]]
237cb90e6a7SAlexey Bataev // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
238cb90e6a7SAlexey Bataev // CHECK: [[RED_BODY]]:
239cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi float* [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ]
240cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi float* [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ]
241cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, float* [[A_BUF_IDX_ELEM]],
242cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]],
243cb90e6a7SAlexey Bataev // CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]]
244cb90e6a7SAlexey Bataev // CHECK: store float [[RED]], float* [[A_BUF_IDX_ELEM]],
245cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, float* [[A_BUF_IDX_ELEM]], i32 1
246cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1
247cb90e6a7SAlexey Bataev // CHECK: [[DONE:%.+]] = icmp eq float* [[A_BUF_IDX_NEXT]], [[A_BUF_END]]
248cb90e6a7SAlexey Bataev // CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]]
249cb90e6a7SAlexey Bataev // CHECK: [[RED_DONE]]:
250cb90e6a7SAlexey Bataev
251cb90e6a7SAlexey Bataev // b_buffer[i] += b_buffer[i-pow(2, k)];
252cb90e6a7SAlexey Bataev // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
253cb90e6a7SAlexey Bataev // CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, double* [[B_BUF_IDX_SUB_K2POW]],
254cb90e6a7SAlexey Bataev // CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]]
255cb90e6a7SAlexey Bataev // CHECK: store double [[RED]], double* [[B_BUF_IDX]],
256cb90e6a7SAlexey Bataev
257cb90e6a7SAlexey Bataev // --i;
258cb90e6a7SAlexey Bataev // CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1
259cb90e6a7SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]]
260cb90e6a7SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]]
261cb90e6a7SAlexey Bataev // CHECK: [[INNER_EXIT]]:
262cb90e6a7SAlexey Bataev
263cb90e6a7SAlexey Bataev // ++k;
264cb90e6a7SAlexey Bataev // CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1
265cb90e6a7SAlexey Bataev // k2pow <<= 1;
266cb90e6a7SAlexey Bataev // CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1
267cb90e6a7SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]]
268cb90e6a7SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]]
269cb90e6a7SAlexey Bataev // CHECK: [[OUTER_EXIT]]:
270cb90e6a7SAlexey Bataev bar();
271cb90e6a7SAlexey Bataev // CHECK: call void @__kmpc_for_static_init_4(
272cb90e6a7SAlexey Bataev // CHECK: call i8* @llvm.stacksave()
273cb90e6a7SAlexey Bataev // CHECK: store float 0.000000e+00, float* %
274cb90e6a7SAlexey Bataev // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
275cb90e6a7SAlexey Bataev // CHECK: br label %[[DISPATCH:[^,]+]]
276cb90e6a7SAlexey Bataev
277cb90e6a7SAlexey Bataev // CHECK: [[SCAN_PHASE:.+]]:
278cb90e6a7SAlexey Bataev // CHECK: call void @{{.+}}foo{{.+}}()
279cb90e6a7SAlexey Bataev // CHECK: br label %[[LOOP_CONTINUE:.+]]
280cb90e6a7SAlexey Bataev
281cb90e6a7SAlexey Bataev // CHECK: [[DISPATCH]]:
282cb90e6a7SAlexey Bataev // if (i >0)
283cb90e6a7SAlexey Bataev // a_priv[[0..n] = a_buffer[i-1][0..n];
284cb90e6a7SAlexey Bataev // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
285cb90e6a7SAlexey Bataev // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
286cb90e6a7SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp eq i64 [[BASE_IDX]], 0
287cb90e6a7SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[IF_DONE:[^,]+]], label %[[IF_THEN:[^,]+]]
288cb90e6a7SAlexey Bataev // CHECK: [[IF_THEN]]:
289cb90e6a7SAlexey Bataev // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1
290cb90e6a7SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]]
291cb90e6a7SAlexey Bataev // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
292cb90e6a7SAlexey Bataev // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
293cb90e6a7SAlexey Bataev // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
294cb90e6a7SAlexey Bataev // CHECK: [[DEST:%.+]] = bitcast float* [[A_PRIV]] to i8*
295cb90e6a7SAlexey Bataev // CHECK: [[SRC:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
296cb90e6a7SAlexey Bataev // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
297cb90e6a7SAlexey Bataev
298cb90e6a7SAlexey Bataev // b_priv = b_buffer[i];
299cb90e6a7SAlexey Bataev // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
300cb90e6a7SAlexey Bataev // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
301cb90e6a7SAlexey Bataev // CHECK: store double [[B_BUF_IDX_VAL]], double* [[B_PRIV_ADDR]],
302cb90e6a7SAlexey Bataev // CHECK: br label %[[SCAN_PHASE]]
303cb90e6a7SAlexey Bataev
304cb90e6a7SAlexey Bataev // CHECK: [[LOOP_CONTINUE]]:
305cb90e6a7SAlexey Bataev // CHECK: call void @llvm.stackrestore(i8* %
306cb90e6a7SAlexey Bataev // CHECK: call void @__kmpc_for_static_fini(
307cb90e6a7SAlexey Bataev }
308cb90e6a7SAlexey Bataev }
309cb90e6a7SAlexey Bataev
310cb90e6a7SAlexey Bataev // CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
311cb90e6a7SAlexey Bataev
312cb90e6a7SAlexey Bataev #endif
313cb90e6a7SAlexey Bataev
314