1 // REQUIRES: powerpc-registered-target 2 // REQUIRES: nvptx-registered-target 3 4 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix HOST 5 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 6 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefixes=CLASS,FUN,CHECK 7 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t 8 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=CLASS,CHECK 9 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=FUN,CHECK 10 11 // expected-no-diagnostics 12 #ifndef HEADER 13 #define HEADER 14 15 // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4] 16 // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 288, i64 673, i64 673, i64 544, i64 33, i64 673, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720] 17 // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4] 18 // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 673, i64 673, i64 673, i64 544, i64 673, i64 673, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720] 19 // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8] 20 // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953422096] 21 // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8] 22 // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953422096] 23 // HOST-DAG: = private unnamed_addr constant [2 x i64] [i64 8, i64 8] 24 // HOST-DAG: = private unnamed_addr constant [2 x i64] [i64 673, i64 281474976711440] 25 // CHECK-DAG: [[S:%.+]] = type { i32 } 26 // CHECK-DAG: [[CAP1:%.+]] = type { [[S]]* } 27 // CHECK-DAG: [[CAP2:%.+]] = type { i32*, i32*, i32*, i32**, i32* } 28 29 // CLASS: define internal void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l67_worker() 30 // CLASS: define weak void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l67([[S]]* {{%.+}}, [[CAP1]]* nonnull align 8 dereferenceable(8) {{%.+}}) 31 // CLASS-NOT: getelementptr 32 // CLASS: br i1 % 33 // CLASS: call void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l67_worker() 34 // CLASS: br label % 35 // CLASS: br i1 % 36 // CLASS: call void @__kmpc_kernel_init( 37 // CLASS: call void @__kmpc_data_sharing_init_stack() 38 // CLASS: call void @llvm.memcpy. 39 // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]], 40 // CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0 41 // CLASS: store [[S]]* [[S_:%.+]], [[S]]** [[THIS_REF]], 42 // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]], 43 // CLASS: call i32 [[LAMBDA1:@.+foo.+]]([[CAP1]]* [[L]]) 44 // CLASS: ret void 45 46 // CLASS: define weak void @__omp_offloading_{{.+}}foo{{.+}}_l69([[S]]* %{{.+}}, [[CAP1]]* nonnull align 8 dereferenceable(8) %{{.+}}) 47 // CLASS-NOT: getelementptr 48 // CLASS: call void @llvm.memcpy. 49 // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]], 50 // CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0 51 // CLASS: store [[S]]* %{{.+}}, [[S]]** [[THIS_REF]], 52 // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]], 53 // CLASS: call i32 [[LAMBDA1]]([[CAP1]]* [[L]]) 54 // CLASS: ret void 55 56 template <typename T> 57 int foo(const T &t) { 58 #pragma omp target parallel 59 t(); 60 return 0; 61 } 62 63 struct S { 64 int a = 15; 65 int foo() { 66 auto &&L = [&]() { return a; }; 67 #pragma omp target 68 L(); 69 #pragma omp target parallel 70 L(); 71 return a + ::foo(L); 72 } 73 } s; 74 75 // FUN: define internal void @__omp_offloading_{{.+}}_main_l124_worker() 76 // FUN: define weak void @__omp_offloading_{{.+}}_main_l124(i64 %{{.+}}, i32* nonnull align 4 dereferenceable(4) %{{.+}}, i32* nonnull align 4 dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* nonnull align 4 dereferenceable(4) %{{.+}}, [[CAP2]]* nonnull align 8 dereferenceable(40) %{{.+}}) 77 // FUN-NOT: getelementptr 78 // FUN: br i1 % 79 // FUN: call void @__omp_offloading_{{.*}}_{{.*}}main{{.*}}_l124_worker() 80 // FUN: br label % 81 // FUN: br i1 % 82 // FUN: call void @__kmpc_kernel_init( 83 // FUN: call void @__kmpc_data_sharing_init_stack() 84 // FUN: call void @llvm.memcpy. 85 // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]], 86 // FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0 87 // FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]], 88 // FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1 89 // FUN: store i32* %{{.+}}, i32** [[B_CAP]], 90 // FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2 91 // FUN: store i32* %{{.+}}, i32** [[C_CAP]], 92 // FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3 93 // FUN: store i32** %{{.+}}, i32*** [[D_CAP]], 94 // FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4 95 // FUN: store i32* %{{.+}}, i32** [[A_CAP]], 96 // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]], 97 // FUN: call i64 [[LAMBDA2:@.+main.+]]([[CAP2]]* [[L]]) 98 // FUN: ret void 99 100 // FUN: define weak void @__omp_offloading_{{.+}}_main_l126(i32* nonnull align 4 dereferenceable(4) %{{.+}}, i32* nonnull align 4 dereferenceable(4) %{{.+}} i32* nonnull align 4 dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* nonnull align 4 dereferenceable(4) %{{.+}}, [[CAP2]]* nonnull align 8 dereferenceable(40) %{{.+}}) 101 // FUN-NOT: getelementptr 102 // FUN: call void @llvm.memcpy. 103 // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]], 104 // FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0 105 // FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]], 106 // FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1 107 // FUN: store i32* %{{.+}}, i32** [[B_CAP]], 108 // FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2 109 // FUN: store i32* %{{.+}}, i32** [[C_CAP]], 110 // FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3 111 // FUN: store i32** %{{.+}}, i32*** [[D_CAP]], 112 // FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4 113 // FUN: store i32* %{{.+}}, i32** [[A_CAP]], 114 // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]], 115 // FUN: call i64 [[LAMBDA2]]([[CAP2]]* [[L]]) 116 // FUN: ret void 117 118 int main(int argc, char **argv) { 119 int &b = argc; 120 int &&c = 1; 121 int *d = &argc; 122 int a; 123 auto &&L = [&]() { return argc + b + c + reinterpret_cast<long int>(d) + a; }; 124 #pragma omp target firstprivate(argc) map(to : a) 125 L(); 126 #pragma omp target parallel 127 L(); 128 return argc + s.foo(); 129 } 130 131 132 // HOST-LABEL: @main 133 134 // HOST-DAG: call i32 @__tgt_target(i64 -1, i8* @{{.+}}, i32 11, i8** [[BASES:%.+]], i8** [[PTRS:%.+]], 135 // HOST-DAG: [[BASES:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[BASE_PTR:%.+]], i32 0, i32 0 136 // HOST-DAG: [[PTRS:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[PTR_PTR:%.+]], i32 0, i32 0 137 // HOST-DAG: [[BASE_REF:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[BASE_PTR]], i32 0, i32 6 138 // HOST-DAG: [[BASE_REF_CAST:%.+]] = bitcast i8** [[BASE_REF]] to i32*** 139 // HOST-DAG: store i32** [[BASE:%.+]], i32*** [[BASE_REF_CAST]], 140 // HOST-DAG: [[BASE]] = getelementptr inbounds [[LAMBDA:%.+]], [[LAMBDA]]* [[LAMBDA_ADDR:%.+]], i32 0, i32 0 141 // HOST-DAG: [[PTR_REF:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[PTR_PTR]], i32 0, i32 6 142 // HOST-DAG: [[PTR_REF_CAST:%.+]] = bitcast i8** [[PTR_REF]] to i32** 143 // HOST-DAG: store i32* [[PTR:%.+]], i32** [[PTR_REF_CAST]], 144 // HOST-DAG: [[PTR]] = load i32*, i32** [[PTR_REF:%.+]], 145 // HOST-DAG: [[PTR_REF]] = getelementptr inbounds [[LAMBDA]], [[LAMBDA]]* [[LAMBDA_ADDR]], i32 0, i32 0 146 #endif // HEADER 147