1*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
2*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
3*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
434ee2549SAlexey Bataev
5*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
6*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
7*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
834ee2549SAlexey Bataev // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
934ee2549SAlexey Bataev // expected-no-diagnostics
1034ee2549SAlexey Bataev #ifndef HEADER
1134ee2549SAlexey Bataev #define HEADER
1234ee2549SAlexey Bataev
1334ee2549SAlexey Bataev void foo();
1434ee2549SAlexey Bataev void bar();
1534ee2549SAlexey Bataev
161b1c8d83Shyeongyu kim // CHECK: define{{.*}} void @{{.*}}baz{{.*}}(i32 noundef %n)
baz(int n)1734ee2549SAlexey Bataev void baz(int n) {
1834ee2549SAlexey Bataev static float a[10];
1934ee2549SAlexey Bataev static double b;
2034ee2549SAlexey Bataev // CHECK: call i8* @llvm.stacksave()
2134ee2549SAlexey Bataev // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
2234ee2549SAlexey Bataev
2334ee2549SAlexey Bataev // float a_buffer[10][n];
2434ee2549SAlexey Bataev // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
2534ee2549SAlexey Bataev
2634ee2549SAlexey Bataev // double b_buffer[10];
2734ee2549SAlexey Bataev // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
2834ee2549SAlexey Bataev #pragma omp for simd reduction(inscan, +:a[:n], b)
2934ee2549SAlexey Bataev for (int i = 0; i < 10; ++i) {
3034ee2549SAlexey Bataev // CHECK: call void @__kmpc_for_static_init_4(
3134ee2549SAlexey Bataev // CHECK: call i8* @llvm.stacksave()
3234ee2549SAlexey Bataev // CHECK: store float 0.000000e+00, float* %
3334ee2549SAlexey Bataev // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
3434ee2549SAlexey Bataev // CHECK: br label %[[DISPATCH:[^,]+]]
3534ee2549SAlexey Bataev // CHECK: [[INPUT_PHASE:.+]]:
3634ee2549SAlexey Bataev // CHECK: call void @{{.+}}foo{{.+}}()
3734ee2549SAlexey Bataev
3834ee2549SAlexey Bataev // a_buffer[i][0..n] = a_priv[[0..n];
3934ee2549SAlexey Bataev // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
4034ee2549SAlexey Bataev // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
4134ee2549SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
4234ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
4334ee2549SAlexey Bataev // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
4434ee2549SAlexey Bataev // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
4534ee2549SAlexey Bataev // CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
4634ee2549SAlexey Bataev // CHECK: [[SRC:%.+]] = bitcast float* [[A_PRIV]] to i8*
4734ee2549SAlexey Bataev // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
4834ee2549SAlexey Bataev
4934ee2549SAlexey Bataev // b_buffer[i] = b_priv;
5034ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]]
5134ee2549SAlexey Bataev // CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]],
5234ee2549SAlexey Bataev // CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]],
5334ee2549SAlexey Bataev // CHECK: br label %[[LOOP_CONTINUE:.+]]
5434ee2549SAlexey Bataev
5534ee2549SAlexey Bataev // CHECK: [[DISPATCH]]:
5634ee2549SAlexey Bataev // CHECK: br label %[[INPUT_PHASE]]
5734ee2549SAlexey Bataev // CHECK: [[LOOP_CONTINUE]]:
5834ee2549SAlexey Bataev // CHECK: call void @llvm.stackrestore(i8* %
5934ee2549SAlexey Bataev // CHECK: call void @__kmpc_for_static_fini(
6034ee2549SAlexey Bataev // CHECK: call void @__kmpc_barrier(
6134ee2549SAlexey Bataev foo();
6234ee2549SAlexey Bataev #pragma omp scan inclusive(a[:n], b)
6334ee2549SAlexey Bataev // CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
6434ee2549SAlexey Bataev // CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
6534ee2549SAlexey Bataev // CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32
6634ee2549SAlexey Bataev // CHECK: br label %[[OUTER_BODY:[^,]+]]
6734ee2549SAlexey Bataev // CHECK: [[OUTER_BODY]]:
6834ee2549SAlexey Bataev // CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ]
6934ee2549SAlexey Bataev // CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ]
7034ee2549SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]]
7134ee2549SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]]
7234ee2549SAlexey Bataev // CHECK: [[INNER_BODY]]:
7334ee2549SAlexey Bataev // CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ]
7434ee2549SAlexey Bataev
7534ee2549SAlexey Bataev // a_buffer[i] += a_buffer[i-pow(2, k)];
7634ee2549SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
7734ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
7834ee2549SAlexey Bataev // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
7934ee2549SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
8034ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
8134ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[I]]
8234ee2549SAlexey Bataev // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
8334ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[IDX_SUB_K2POW]]
8434ee2549SAlexey Bataev // CHECK: [[A_BUF_END:%.+]] = getelementptr float, float* [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
8534ee2549SAlexey Bataev // CHECK: [[ISEMPTY:%.+]] = icmp eq float* [[A_BUF_IDX]], [[A_BUF_END]]
8634ee2549SAlexey Bataev // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
8734ee2549SAlexey Bataev // CHECK: [[RED_BODY]]:
8834ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi float* [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ]
8934ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi float* [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ]
9034ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, float* [[A_BUF_IDX_ELEM]],
9134ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]],
9234ee2549SAlexey Bataev // CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]]
9334ee2549SAlexey Bataev // CHECK: store float [[RED]], float* [[A_BUF_IDX_ELEM]],
9434ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, float* [[A_BUF_IDX_ELEM]], i32 1
9534ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1
9634ee2549SAlexey Bataev // CHECK: [[DONE:%.+]] = icmp eq float* [[A_BUF_IDX_NEXT]], [[A_BUF_END]]
9734ee2549SAlexey Bataev // CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]]
9834ee2549SAlexey Bataev // CHECK: [[RED_DONE]]:
9934ee2549SAlexey Bataev
10034ee2549SAlexey Bataev // b_buffer[i] += b_buffer[i-pow(2, k)];
10134ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
10234ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, double* [[B_BUF_IDX_SUB_K2POW]],
10334ee2549SAlexey Bataev // CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]]
10434ee2549SAlexey Bataev // CHECK: store double [[RED]], double* [[B_BUF_IDX]],
10534ee2549SAlexey Bataev
10634ee2549SAlexey Bataev // --i;
10734ee2549SAlexey Bataev // CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1
10834ee2549SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]]
10934ee2549SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]]
11034ee2549SAlexey Bataev // CHECK: [[INNER_EXIT]]:
11134ee2549SAlexey Bataev
11234ee2549SAlexey Bataev // ++k;
11334ee2549SAlexey Bataev // CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1
11434ee2549SAlexey Bataev // k2pow <<= 1;
11534ee2549SAlexey Bataev // CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1
11634ee2549SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]]
11734ee2549SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]]
11834ee2549SAlexey Bataev // CHECK: [[OUTER_EXIT]]:
11934ee2549SAlexey Bataev bar();
12034ee2549SAlexey Bataev // CHECK: call void @__kmpc_for_static_init_4(
12134ee2549SAlexey Bataev // CHECK: call i8* @llvm.stacksave()
12234ee2549SAlexey Bataev // CHECK: store float 0.000000e+00, float* %
12334ee2549SAlexey Bataev // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
12434ee2549SAlexey Bataev // CHECK: br label %[[DISPATCH:[^,]+]]
12534ee2549SAlexey Bataev
12634ee2549SAlexey Bataev // Skip the before scan body.
12734ee2549SAlexey Bataev // CHECK: call void @{{.+}}foo{{.+}}()
12834ee2549SAlexey Bataev
12934ee2549SAlexey Bataev // CHECK: [[EXIT_INSCAN:[^,]+]]:
13034ee2549SAlexey Bataev // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
13134ee2549SAlexey Bataev
13234ee2549SAlexey Bataev // CHECK: [[DISPATCH]]:
13334ee2549SAlexey Bataev // a_priv[[0..n] = a_buffer[i][0..n];
13434ee2549SAlexey Bataev // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
13534ee2549SAlexey Bataev // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
13634ee2549SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
13734ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
13834ee2549SAlexey Bataev // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
13934ee2549SAlexey Bataev // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
14034ee2549SAlexey Bataev // CHECK: [[DEST:%.+]] = bitcast float* [[A_PRIV]] to i8*
14134ee2549SAlexey Bataev // CHECK: [[SRC:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
14234ee2549SAlexey Bataev // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
14334ee2549SAlexey Bataev
14434ee2549SAlexey Bataev // b_priv = b_buffer[i];
14534ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]]
14634ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
14734ee2549SAlexey Bataev // CHECK: store double [[B_BUF_IDX_VAL]], double* [[B_PRIV_ADDR]],
14834ee2549SAlexey Bataev // CHECK: br label %[[SCAN_PHASE:[^,]+]]
14934ee2549SAlexey Bataev
15034ee2549SAlexey Bataev // CHECK: [[SCAN_PHASE]]:
15134ee2549SAlexey Bataev // CHECK: call void @{{.+}}bar{{.+}}()
15234ee2549SAlexey Bataev // CHECK: br label %[[EXIT_INSCAN]]
15334ee2549SAlexey Bataev
15434ee2549SAlexey Bataev // CHECK: [[LOOP_CONTINUE]]:
15534ee2549SAlexey Bataev // CHECK: call void @llvm.stackrestore(i8* %
15634ee2549SAlexey Bataev // CHECK: call void @__kmpc_for_static_fini(
15734ee2549SAlexey Bataev // CHECK: call void @llvm.stackrestore(i8*
15834ee2549SAlexey Bataev // CHECK: call void @__kmpc_barrier(
15934ee2549SAlexey Bataev }
16034ee2549SAlexey Bataev
16134ee2549SAlexey Bataev // CHECK: call i8* @llvm.stacksave()
16234ee2549SAlexey Bataev // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
16334ee2549SAlexey Bataev
16434ee2549SAlexey Bataev // float a_buffer[10][n];
16534ee2549SAlexey Bataev // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
16634ee2549SAlexey Bataev
16734ee2549SAlexey Bataev // double b_buffer[10];
16834ee2549SAlexey Bataev // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
16934ee2549SAlexey Bataev #pragma omp for simd reduction(inscan, +:a[:n], b)
17034ee2549SAlexey Bataev for (int i = 0; i < 10; ++i) {
17134ee2549SAlexey Bataev // CHECK: call void @__kmpc_for_static_init_4(
17234ee2549SAlexey Bataev // CHECK: call i8* @llvm.stacksave()
17334ee2549SAlexey Bataev // CHECK: store float 0.000000e+00, float* %
17434ee2549SAlexey Bataev // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
17534ee2549SAlexey Bataev // CHECK: br label %[[DISPATCH:[^,]+]]
17634ee2549SAlexey Bataev
17734ee2549SAlexey Bataev // Skip the before scan body.
17834ee2549SAlexey Bataev // CHECK: call void @{{.+}}foo{{.+}}()
17934ee2549SAlexey Bataev
18034ee2549SAlexey Bataev // CHECK: [[EXIT_INSCAN:[^,]+]]:
18134ee2549SAlexey Bataev
18234ee2549SAlexey Bataev // a_buffer[i][0..n] = a_priv[[0..n];
18334ee2549SAlexey Bataev // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
18434ee2549SAlexey Bataev // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
18534ee2549SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
18634ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
18734ee2549SAlexey Bataev // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
18834ee2549SAlexey Bataev // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
18934ee2549SAlexey Bataev // CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
19034ee2549SAlexey Bataev // CHECK: [[SRC:%.+]] = bitcast float* [[A_PRIV]] to i8*
19134ee2549SAlexey Bataev // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
19234ee2549SAlexey Bataev
19334ee2549SAlexey Bataev // b_buffer[i] = b_priv;
19434ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]]
19534ee2549SAlexey Bataev // CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]],
19634ee2549SAlexey Bataev // CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]],
19734ee2549SAlexey Bataev // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
19834ee2549SAlexey Bataev
19934ee2549SAlexey Bataev // CHECK: [[DISPATCH]]:
20034ee2549SAlexey Bataev // CHECK: br label %[[INPUT_PHASE:[^,]+]]
20134ee2549SAlexey Bataev
20234ee2549SAlexey Bataev // CHECK: [[INPUT_PHASE]]:
20334ee2549SAlexey Bataev // CHECK: call void @{{.+}}bar{{.+}}()
20434ee2549SAlexey Bataev // CHECK: br label %[[EXIT_INSCAN]]
20534ee2549SAlexey Bataev
20634ee2549SAlexey Bataev // CHECK: [[LOOP_CONTINUE]]:
20734ee2549SAlexey Bataev // CHECK: call void @llvm.stackrestore(i8* %
20834ee2549SAlexey Bataev // CHECK: call void @__kmpc_for_static_fini(
20934ee2549SAlexey Bataev // CHECK: call void @__kmpc_barrier(
21034ee2549SAlexey Bataev foo();
21134ee2549SAlexey Bataev #pragma omp scan exclusive(a[:n], b)
21234ee2549SAlexey Bataev // CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
21334ee2549SAlexey Bataev // CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
21434ee2549SAlexey Bataev // CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32
21534ee2549SAlexey Bataev // CHECK: br label %[[OUTER_BODY:[^,]+]]
21634ee2549SAlexey Bataev // CHECK: [[OUTER_BODY]]:
21734ee2549SAlexey Bataev // CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ]
21834ee2549SAlexey Bataev // CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ]
21934ee2549SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]]
22034ee2549SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]]
22134ee2549SAlexey Bataev // CHECK: [[INNER_BODY]]:
22234ee2549SAlexey Bataev // CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ]
22334ee2549SAlexey Bataev
22434ee2549SAlexey Bataev // a_buffer[i] += a_buffer[i-pow(2, k)];
22534ee2549SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
22634ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
22734ee2549SAlexey Bataev // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
22834ee2549SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
22934ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
23034ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[I]]
23134ee2549SAlexey Bataev // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
23234ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[IDX_SUB_K2POW]]
23334ee2549SAlexey Bataev // CHECK: [[A_BUF_END:%.+]] = getelementptr float, float* [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
23434ee2549SAlexey Bataev // CHECK: [[ISEMPTY:%.+]] = icmp eq float* [[A_BUF_IDX]], [[A_BUF_END]]
23534ee2549SAlexey Bataev // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
23634ee2549SAlexey Bataev // CHECK: [[RED_BODY]]:
23734ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi float* [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ]
23834ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi float* [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ]
23934ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, float* [[A_BUF_IDX_ELEM]],
24034ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]],
24134ee2549SAlexey Bataev // CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]]
24234ee2549SAlexey Bataev // CHECK: store float [[RED]], float* [[A_BUF_IDX_ELEM]],
24334ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, float* [[A_BUF_IDX_ELEM]], i32 1
24434ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, float* [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1
24534ee2549SAlexey Bataev // CHECK: [[DONE:%.+]] = icmp eq float* [[A_BUF_IDX_NEXT]], [[A_BUF_END]]
24634ee2549SAlexey Bataev // CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]]
24734ee2549SAlexey Bataev // CHECK: [[RED_DONE]]:
24834ee2549SAlexey Bataev
24934ee2549SAlexey Bataev // b_buffer[i] += b_buffer[i-pow(2, k)];
25034ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
25134ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, double* [[B_BUF_IDX_SUB_K2POW]],
25234ee2549SAlexey Bataev // CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]]
25334ee2549SAlexey Bataev // CHECK: store double [[RED]], double* [[B_BUF_IDX]],
25434ee2549SAlexey Bataev
25534ee2549SAlexey Bataev // --i;
25634ee2549SAlexey Bataev // CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1
25734ee2549SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]]
25834ee2549SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]]
25934ee2549SAlexey Bataev // CHECK: [[INNER_EXIT]]:
26034ee2549SAlexey Bataev
26134ee2549SAlexey Bataev // ++k;
26234ee2549SAlexey Bataev // CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1
26334ee2549SAlexey Bataev // k2pow <<= 1;
26434ee2549SAlexey Bataev // CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1
26534ee2549SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]]
26634ee2549SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]]
26734ee2549SAlexey Bataev // CHECK: [[OUTER_EXIT]]:
26834ee2549SAlexey Bataev bar();
26934ee2549SAlexey Bataev // CHECK: call void @__kmpc_for_static_init_4(
27034ee2549SAlexey Bataev // CHECK: call i8* @llvm.stacksave()
27134ee2549SAlexey Bataev // CHECK: store float 0.000000e+00, float* %
27234ee2549SAlexey Bataev // CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
27334ee2549SAlexey Bataev // CHECK: br label %[[DISPATCH:[^,]+]]
27434ee2549SAlexey Bataev
27534ee2549SAlexey Bataev // CHECK: [[SCAN_PHASE:.+]]:
27634ee2549SAlexey Bataev // CHECK: call void @{{.+}}foo{{.+}}()
27734ee2549SAlexey Bataev // CHECK: br label %[[LOOP_CONTINUE:.+]]
27834ee2549SAlexey Bataev
27934ee2549SAlexey Bataev // CHECK: [[DISPATCH]]:
28034ee2549SAlexey Bataev // if (i >0)
28134ee2549SAlexey Bataev // a_priv[[0..n] = a_buffer[i-1][0..n];
28234ee2549SAlexey Bataev // CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
28334ee2549SAlexey Bataev // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
28434ee2549SAlexey Bataev // CHECK: [[CMP:%.+]] = icmp eq i64 [[BASE_IDX]], 0
28534ee2549SAlexey Bataev // CHECK: br i1 [[CMP]], label %[[IF_DONE:[^,]+]], label %[[IF_THEN:[^,]+]]
28634ee2549SAlexey Bataev // CHECK: [[IF_THEN]]:
28734ee2549SAlexey Bataev // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1
28834ee2549SAlexey Bataev // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]]
28934ee2549SAlexey Bataev // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
29034ee2549SAlexey Bataev // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
29134ee2549SAlexey Bataev // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
29234ee2549SAlexey Bataev // CHECK: [[DEST:%.+]] = bitcast float* [[A_PRIV]] to i8*
29334ee2549SAlexey Bataev // CHECK: [[SRC:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
29434ee2549SAlexey Bataev // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
29534ee2549SAlexey Bataev
29634ee2549SAlexey Bataev // b_priv = b_buffer[i];
29734ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
29834ee2549SAlexey Bataev // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, double* [[B_BUF_IDX]],
29934ee2549SAlexey Bataev // CHECK: store double [[B_BUF_IDX_VAL]], double* [[B_PRIV_ADDR]],
30034ee2549SAlexey Bataev // CHECK: br label %[[SCAN_PHASE]]
30134ee2549SAlexey Bataev
30234ee2549SAlexey Bataev // CHECK: [[LOOP_CONTINUE]]:
30334ee2549SAlexey Bataev // CHECK: call void @llvm.stackrestore(i8* %
30434ee2549SAlexey Bataev // CHECK: call void @__kmpc_for_static_fini(
30534ee2549SAlexey Bataev // CHECK: call void @llvm.stackrestore(i8*
30634ee2549SAlexey Bataev // CHECK: call void @__kmpc_barrier(
30734ee2549SAlexey Bataev }
30834ee2549SAlexey Bataev }
30934ee2549SAlexey Bataev
31034ee2549SAlexey Bataev #endif
31134ee2549SAlexey Bataev // CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
31234ee2549SAlexey Bataev
313