1 // REQUIRES: powerpc-registered-target 2 // REQUIRES: nvptx-registered-target 3 4 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix HOST 5 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 6 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefixes=CLASS,FUN,CHECK 7 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t 8 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=CLASS,CHECK 9 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=FUN,CHECK 10 11 // expected-no-diagnostics 12 #ifndef HEADER 13 #define HEADER 14 15 // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4, i64 4] 16 // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 547, i64 547, i64 544, i64 33, i64 673, i64 1407374883554064, i64 1407374883554064, i64 1407374883554064, i64 1407374883554064, i64 1407374883554064, i64 800] 17 // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4] 18 // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 547, i64 547, i64 547, i64 544, i64 547, i64 673, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720] 19 // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8] 20 // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953422096] 21 // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8] 22 // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953422096] 23 // HOST-DAG: = private unnamed_addr constant [2 x i64] [i64 8, i64 8] 24 // HOST-DAG: = private unnamed_addr constant [2 x i64] [i64 673, i64 281474976711440] 25 // CHECK-DAG: [[S:%.+]] = type { i32 } 26 // CHECK-DAG: [[CAP1:%.+]] = type { [[S]]* } 27 // CHECK-DAG: [[CAP2:%.+]] = type { i32*, i32*, i32*, i32**, i32* } 28 29 // CLASS: define internal void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l72_worker() 30 // CLASS: define weak void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l72([[S]]* {{%.+}}, [[CAP1]]* dereferenceable(8) {{%.+}}) 31 // CLASS-NOT: getelementptr 32 // CLASS: br i1 % 33 // CLASS: call void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l72_worker() 34 // CLASS: br label % 35 // CLASS: br i1 % 36 // CLASS: call void @__kmpc_kernel_init( 37 // CLASS: call void @__kmpc_data_sharing_init_stack() 38 // CLASS: call void @llvm.memcpy. 39 // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]], 40 // CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0 41 // CLASS: store [[S]]* [[S_:%.+]], [[S]]** [[THIS_REF]], 42 // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]], 43 // CLASS: call i32 [[LAMBDA1:@.+foo.+]]([[CAP1]]* [[L]]) 44 // CLASS: ret void 45 46 // CLASS: define weak void @__omp_offloading_{{.+}}foo{{.+}}_l74([[S]]* %{{.+}}, [[CAP1]]* dereferenceable(8) %{{.+}}) 47 // CLASS-NOT: getelementptr 48 // CLASS: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, [[S]]* %{{.+}}, [[CAP1]]* %{{.+}}) 49 // CLASS: ret void 50 51 // CLASS: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, [[S]]* %{{.+}}, [[CAP1]]* dereferenceable(8) %{{.+}}) 52 // CLASS-NOT: getelementptr 53 // CLASS: call void @llvm.memcpy. 54 // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]], 55 // CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0 56 // CLASS: store [[S]]* %{{.+}}, [[S]]** [[THIS_REF]], 57 // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]], 58 // CLASS: call i32 [[LAMBDA1]]([[CAP1]]* [[L]]) 59 // CLASS: ret void 60 61 template <typename T> 62 int foo(const T &t) { 63 #pragma omp target parallel 64 t(); 65 return 0; 66 } 67 68 struct S { 69 int a = 15; 70 int foo() { 71 auto &&L = [&]() { return a; }; 72 #pragma omp target 73 L(); 74 #pragma omp target parallel 75 L(); 76 return a + ::foo(L); 77 } 78 } s; 79 80 // FUN: define internal void @__omp_offloading_{{.+}}_main_l134_worker() 81 // FUN: define weak void @__omp_offloading_{{.+}}_main_l134(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}}, i64 %{{.+}}) 82 // FUN-NOT: getelementptr 83 // FUN: br i1 % 84 // FUN: call void @__omp_offloading_{{.*}}_{{.*}}main{{.*}}_l134_worker() 85 // FUN: br label % 86 // FUN: br i1 % 87 // FUN: call void @__kmpc_kernel_init( 88 // FUN: call void @__kmpc_data_sharing_init_stack() 89 // FUN: call void @llvm.memcpy. 90 // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]], 91 // FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0 92 // FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]], 93 // FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1 94 // FUN: store i32* %{{.+}}, i32** [[B_CAP]], 95 // FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2 96 // FUN: store i32* %{{.+}}, i32** [[C_CAP]], 97 // FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3 98 // FUN: store i32** %{{.+}}, i32*** [[D_CAP]], 99 // FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4 100 // FUN: store i32* %{{.+}}, i32** [[A_CAP]], 101 // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]], 102 // FUN: call i64 [[LAMBDA2:@.+main.+]]([[CAP2]]* [[L]]) 103 // FUN: ret void 104 105 // FUN: define weak void @__omp_offloading_{{.+}}_main_l136(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}} i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}}) 106 // FUN-NOT: getelementptr 107 // FUN: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, [[CAP2]]* %{{.+}}) 108 // FUN: ret void 109 110 // FUN: define internal void [[PARALLEL:@.+]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}}) 111 // FUN-NOT: getelementptr 112 // FUN: call void @llvm.memcpy. 113 // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]], 114 // FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0 115 // FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]], 116 // FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1 117 // FUN: store i32* %{{.+}}, i32** [[B_CAP]], 118 // FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2 119 // FUN: store i32* %{{.+}}, i32** [[C_CAP]], 120 // FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3 121 // FUN: store i32** %{{.+}}, i32*** [[D_CAP]], 122 // FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4 123 // FUN: store i32* %{{.+}}, i32** [[A_CAP]], 124 // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]], 125 // FUN: call i64 [[LAMBDA2]]([[CAP2]]* [[L]]) 126 // FUN: ret void 127 128 int main(int argc, char **argv) { 129 int &b = argc; 130 int &&c = 1; 131 int *d = &argc; 132 int a; 133 auto &&L = [&]() { return argc + b + c + reinterpret_cast<long int>(d) + a; }; 134 #pragma omp target firstprivate(argc) map(to : a) 135 L(); 136 #pragma omp target parallel 137 L(); 138 return argc + s.foo(); 139 } 140 141 142 // HOST-LABEL: @main 143 144 // HOST-DAG: call i32 @__tgt_target(i64 -1, i8* @{{.+}}, i32 11, i8** [[BASES:%.+]], i8** [[PTRS:%.+]], 145 // HOST-DAG: [[BASES:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[BASE_PTR:%.+]], i32 0, i32 0 146 // HOST-DAG: [[PTRS:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[PTR_PTR:%.+]], i32 0, i32 0 147 // HOST-DAG: [[BASE_REF:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[BASE_PTR]], i32 0, i32 5 148 // HOST-DAG: [[BASE_REF_CAST:%.+]] = bitcast i8** [[BASE_REF]] to i32*** 149 // HOST-DAG: store i32** [[BASE:%.+]], i32*** [[BASE_REF_CAST]], 150 // HOST-DAG: [[BASE]] = getelementptr inbounds [[LAMBDA:%.+]], [[LAMBDA]]* [[LAMBDA_ADDR:%.+]], i32 0, i32 0 151 // HOST-DAG: [[PTR_REF:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[PTR_PTR]], i32 0, i32 5 152 // HOST-DAG: [[PTR_REF_CAST:%.+]] = bitcast i8** [[PTR_REF]] to i32** 153 // HOST-DAG: store i32* [[PTR:%.+]], i32** [[PTR_REF_CAST]], 154 // HOST-DAG: [[PTR]] = load i32*, i32** [[PTR_REF:%.+]], 155 // HOST-DAG: [[PTR_REF]] = getelementptr inbounds [[LAMBDA]], [[LAMBDA]]* [[LAMBDA_ADDR]], i32 0, i32 0 156 #endif // HEADER 157