1207b08a9SGiorgis Georgakoudis // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2a2dbfb6bSGiorgis Georgakoudis // Test target codegen - host bc file has to be created first. 3a2dbfb6bSGiorgis Georgakoudis // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 4207b08a9SGiorgis Georgakoudis // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 5a2dbfb6bSGiorgis Georgakoudis // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 6207b08a9SGiorgis Georgakoudis // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 7207b08a9SGiorgis Georgakoudis // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK3 8a2dbfb6bSGiorgis Georgakoudis 9a2dbfb6bSGiorgis Georgakoudis // expected-no-diagnostics 10a2dbfb6bSGiorgis Georgakoudis #ifndef HEADER 11a2dbfb6bSGiorgis Georgakoudis #define HEADER 12a2dbfb6bSGiorgis Georgakoudis 13a2dbfb6bSGiorgis Georgakoudis void work(int *C) { 14a2dbfb6bSGiorgis Georgakoudis #pragma omp atomic 15a2dbfb6bSGiorgis Georgakoudis ++(*C); 16a2dbfb6bSGiorgis Georgakoudis } 17a2dbfb6bSGiorgis Georgakoudis 18a2dbfb6bSGiorgis Georgakoudis void use(int *C) { 19a2dbfb6bSGiorgis Georgakoudis #pragma omp parallel num_threads(2) 20a2dbfb6bSGiorgis Georgakoudis work(C); 21a2dbfb6bSGiorgis Georgakoudis } 22a2dbfb6bSGiorgis Georgakoudis 23a2dbfb6bSGiorgis Georgakoudis int main() { 24a2dbfb6bSGiorgis Georgakoudis int C = 0; 25a2dbfb6bSGiorgis Georgakoudis #pragma omp target map(C) 26a2dbfb6bSGiorgis Georgakoudis { 27a2dbfb6bSGiorgis Georgakoudis use(&C); 28a2dbfb6bSGiorgis Georgakoudis #pragma omp parallel num_threads(2) 29a2dbfb6bSGiorgis Georgakoudis use(&C); 30a2dbfb6bSGiorgis Georgakoudis } 31a2dbfb6bSGiorgis Georgakoudis 32a2dbfb6bSGiorgis Georgakoudis return C; 33a2dbfb6bSGiorgis Georgakoudis } 34a2dbfb6bSGiorgis Georgakoudis 35a2dbfb6bSGiorgis Georgakoudis #endif 36a2dbfb6bSGiorgis Georgakoudis // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker 37a2dbfb6bSGiorgis Georgakoudis // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { 38a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: entry: 39a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 40a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 41a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 42a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 43a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] 44a2dbfb6bSGiorgis Georgakoudis // CHECK1: .await.work: 45a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 46a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) 47a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 48a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 49a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 50a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null 51a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] 52a2dbfb6bSGiorgis Georgakoudis // CHECK1: .select.workers: 53a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 54a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 55a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] 56a2dbfb6bSGiorgis Georgakoudis // CHECK1: .execute.parallel: 57a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) 58a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 59a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*) 60a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] 61a2dbfb6bSGiorgis Georgakoudis // CHECK1: .execute.fn: 62*df729e2bSJohannes Doerfert // CHECK1-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR4:[0-9]+]] 63a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] 64a2dbfb6bSGiorgis Georgakoudis // CHECK1: .check.next: 65*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* 66*df729e2bSJohannes Doerfert // CHECK1-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) 67a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL]] 68a2dbfb6bSGiorgis Georgakoudis // CHECK1: .terminate.parallel: 69a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() 70a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] 71a2dbfb6bSGiorgis Georgakoudis // CHECK1: .barrier.parallel: 72a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 73a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br label [[DOTAWAIT_WORK]] 74a2dbfb6bSGiorgis Georgakoudis // CHECK1: .exit: 75a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: ret void 76a2dbfb6bSGiorgis Georgakoudis // 77a2dbfb6bSGiorgis Georgakoudis // 78a2dbfb6bSGiorgis Georgakoudis // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25 79a2dbfb6bSGiorgis Georgakoudis // CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1:[0-9]+]] { 80a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: entry: 81a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 82a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 83a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 84a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 85a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 86a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 87a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 88a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] 89a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] 90a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] 91a2dbfb6bSGiorgis Georgakoudis // CHECK1: .worker: 92*df729e2bSJohannes Doerfert // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker() #[[ATTR4]] 93a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br label [[DOTEXIT:%.*]] 94a2dbfb6bSGiorgis Georgakoudis // CHECK1: .mastercheck: 95a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 96a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 97a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 98a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 99a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 100a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 101a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] 102a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] 103a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] 104a2dbfb6bSGiorgis Georgakoudis // CHECK1: .master: 105a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 106a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 107a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] 108a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) 109a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() 110a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 111a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7:[0-9]+]] 112a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2) 113a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 114a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP0]] to i8* 115a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 116a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 117*df729e2bSJohannes Doerfert // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP9]], i64 1) 118a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] 119a2dbfb6bSGiorgis Georgakoudis // CHECK1: .termination.notifier: 120a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) 121a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 122a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br label [[DOTEXIT]] 123a2dbfb6bSGiorgis Georgakoudis // CHECK1: .exit: 124a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: ret void 125a2dbfb6bSGiorgis Georgakoudis // 126a2dbfb6bSGiorgis Georgakoudis // 127a2dbfb6bSGiorgis Georgakoudis // CHECK1-LABEL: define {{[^@]+}}@_Z3usePi 128a2dbfb6bSGiorgis Georgakoudis // CHECK1-SAME: (i32* [[C:%.*]]) #[[ATTR3:[0-9]+]] { 129a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: entry: 130a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 131a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 132*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 133a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 134*df729e2bSJohannes Doerfert // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 2) 135a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 136a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32** [[C_ADDR]] to i8* 137a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 8 138a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 139*df729e2bSJohannes Doerfert // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i64 1) 140a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: ret void 141a2dbfb6bSGiorgis Georgakoudis // 142a2dbfb6bSGiorgis Georgakoudis // 143a2dbfb6bSGiorgis Georgakoudis // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ 144*df729e2bSJohannes Doerfert // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { 145*df729e2bSJohannes Doerfert // CHECK1-NEXT: entry: 146*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 147*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 148*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 149*df729e2bSJohannes Doerfert // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 150*df729e2bSJohannes Doerfert // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 151*df729e2bSJohannes Doerfert // CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 152*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 153*df729e2bSJohannes Doerfert // CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7]] 154*df729e2bSJohannes Doerfert // CHECK1-NEXT: ret void 155*df729e2bSJohannes Doerfert // 156*df729e2bSJohannes Doerfert // 157*df729e2bSJohannes Doerfert // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___wrapper 158*df729e2bSJohannes Doerfert // CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 159*df729e2bSJohannes Doerfert // CHECK1-NEXT: entry: 160*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 161*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 162*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 163*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 164*df729e2bSJohannes Doerfert // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 165*df729e2bSJohannes Doerfert // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 166*df729e2bSJohannes Doerfert // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 167*df729e2bSJohannes Doerfert // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 168*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 169*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 170*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** 171*df729e2bSJohannes Doerfert // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 172*df729e2bSJohannes Doerfert // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] 173*df729e2bSJohannes Doerfert // CHECK1-NEXT: ret void 174*df729e2bSJohannes Doerfert // 175*df729e2bSJohannes Doerfert // 176*df729e2bSJohannes Doerfert // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 177a2dbfb6bSGiorgis Georgakoudis // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32** nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { 178a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: entry: 179a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 180a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 181a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 182a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 183a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 184a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 185a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 186a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 8 187a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR7]] 188a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: ret void 189a2dbfb6bSGiorgis Georgakoudis // 190a2dbfb6bSGiorgis Georgakoudis // 191a2dbfb6bSGiorgis Georgakoudis // CHECK1-LABEL: define {{[^@]+}}@_Z4workPi 192a2dbfb6bSGiorgis Georgakoudis // CHECK1-SAME: (i32* [[C:%.*]]) #[[ATTR3]] { 193a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: entry: 194a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 195a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 196a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 197a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 198a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 199a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* 200a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* 201a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: call void @__atomic_load(i64 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR7]] 202a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] 203a2dbfb6bSGiorgis Georgakoudis // CHECK1: atomic_cont: 204a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 205a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 206a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i32 [[ADD]], i32* [[ATOMIC_TEMP1]], align 4 207a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* 208a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* 209a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8* 210a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR7]] 211a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] 212a2dbfb6bSGiorgis Georgakoudis // CHECK1: atomic_exit: 213a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: ret void 214a2dbfb6bSGiorgis Georgakoudis // 215a2dbfb6bSGiorgis Georgakoudis // 216*df729e2bSJohannes Doerfert // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 217a2dbfb6bSGiorgis Georgakoudis // CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 218a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: entry: 219a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 220a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 221a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 222a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 223a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 224a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 225a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 226a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 227a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 228a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 229a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32*** 230a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: [[TMP5:%.*]] = load i32**, i32*** [[TMP4]], align 8 231*df729e2bSJohannes Doerfert // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR4]] 232a2dbfb6bSGiorgis Georgakoudis // CHECK1-NEXT: ret void 233a2dbfb6bSGiorgis Georgakoudis // 234a2dbfb6bSGiorgis Georgakoudis // 235a2dbfb6bSGiorgis Georgakoudis // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker 236a2dbfb6bSGiorgis Georgakoudis // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { 237a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: entry: 238a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 239a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 240a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 4 241a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 242a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] 243a2dbfb6bSGiorgis Georgakoudis // CHECK2: .await.work: 244a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 245a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) 246a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 247a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 248a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 249a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null 250a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] 251a2dbfb6bSGiorgis Georgakoudis // CHECK2: .select.workers: 252a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 253a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 254a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] 255a2dbfb6bSGiorgis Georgakoudis // CHECK2: .execute.parallel: 256a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) 257a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 258a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*) 259a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] 260a2dbfb6bSGiorgis Georgakoudis // CHECK2: .execute.fn: 261*df729e2bSJohannes Doerfert // CHECK2-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR4:[0-9]+]] 262a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] 263a2dbfb6bSGiorgis Georgakoudis // CHECK2: .check.next: 264*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* 265*df729e2bSJohannes Doerfert // CHECK2-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) 266a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL]] 267a2dbfb6bSGiorgis Georgakoudis // CHECK2: .terminate.parallel: 268a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() 269a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] 270a2dbfb6bSGiorgis Georgakoudis // CHECK2: .barrier.parallel: 271a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 272a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br label [[DOTAWAIT_WORK]] 273a2dbfb6bSGiorgis Georgakoudis // CHECK2: .exit: 274a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: ret void 275a2dbfb6bSGiorgis Georgakoudis // 276a2dbfb6bSGiorgis Georgakoudis // 277a2dbfb6bSGiorgis Georgakoudis // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25 278a2dbfb6bSGiorgis Georgakoudis // CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1:[0-9]+]] { 279a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: entry: 280a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 281a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 282a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 283a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 284a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 285a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 286a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 287a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] 288a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] 289a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] 290a2dbfb6bSGiorgis Georgakoudis // CHECK2: .worker: 291*df729e2bSJohannes Doerfert // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker() #[[ATTR4]] 292a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br label [[DOTEXIT:%.*]] 293a2dbfb6bSGiorgis Georgakoudis // CHECK2: .mastercheck: 294a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 295a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 296a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 297a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 298a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 299a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 300a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] 301a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] 302a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] 303a2dbfb6bSGiorgis Georgakoudis // CHECK2: .master: 304a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 305a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 306a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] 307a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) 308a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() 309a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 310a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7:[0-9]+]] 311a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2) 312a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 313a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP0]] to i8* 314a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 315a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 316*df729e2bSJohannes Doerfert // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP9]], i32 1) 317a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] 318a2dbfb6bSGiorgis Georgakoudis // CHECK2: .termination.notifier: 319a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) 320a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 321a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br label [[DOTEXIT]] 322a2dbfb6bSGiorgis Georgakoudis // CHECK2: .exit: 323a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: ret void 324a2dbfb6bSGiorgis Georgakoudis // 325a2dbfb6bSGiorgis Georgakoudis // 326a2dbfb6bSGiorgis Georgakoudis // CHECK2-LABEL: define {{[^@]+}}@_Z3usePi 327a2dbfb6bSGiorgis Georgakoudis // CHECK2-SAME: (i32* [[C:%.*]]) #[[ATTR3:[0-9]+]] { 328a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: entry: 329a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 330a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 331*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 332a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 333*df729e2bSJohannes Doerfert // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 2) 334a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 335a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32** [[C_ADDR]] to i8* 336a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 4 337a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 338*df729e2bSJohannes Doerfert // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i32 1) 339a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: ret void 340a2dbfb6bSGiorgis Georgakoudis // 341a2dbfb6bSGiorgis Georgakoudis // 342a2dbfb6bSGiorgis Georgakoudis // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ 343*df729e2bSJohannes Doerfert // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { 344*df729e2bSJohannes Doerfert // CHECK2-NEXT: entry: 345*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 346*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 347*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 348*df729e2bSJohannes Doerfert // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 349*df729e2bSJohannes Doerfert // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 350*df729e2bSJohannes Doerfert // CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 351*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 352*df729e2bSJohannes Doerfert // CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7]] 353*df729e2bSJohannes Doerfert // CHECK2-NEXT: ret void 354*df729e2bSJohannes Doerfert // 355*df729e2bSJohannes Doerfert // 356*df729e2bSJohannes Doerfert // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined___wrapper 357*df729e2bSJohannes Doerfert // CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 358*df729e2bSJohannes Doerfert // CHECK2-NEXT: entry: 359*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 360*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 361*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 362*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 363*df729e2bSJohannes Doerfert // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 364*df729e2bSJohannes Doerfert // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 365*df729e2bSJohannes Doerfert // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 366*df729e2bSJohannes Doerfert // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 367*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 368*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 369*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** 370*df729e2bSJohannes Doerfert // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 371*df729e2bSJohannes Doerfert // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] 372*df729e2bSJohannes Doerfert // CHECK2-NEXT: ret void 373*df729e2bSJohannes Doerfert // 374*df729e2bSJohannes Doerfert // 375*df729e2bSJohannes Doerfert // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 376a2dbfb6bSGiorgis Georgakoudis // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32** nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { 377a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: entry: 378a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 379a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 380a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 381a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 382a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 383a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 384a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 385a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 4 386a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR7]] 387a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: ret void 388a2dbfb6bSGiorgis Georgakoudis // 389a2dbfb6bSGiorgis Georgakoudis // 390a2dbfb6bSGiorgis Georgakoudis // CHECK2-LABEL: define {{[^@]+}}@_Z4workPi 391a2dbfb6bSGiorgis Georgakoudis // CHECK2-SAME: (i32* [[C:%.*]]) #[[ATTR3]] { 392a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: entry: 393a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 394a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 395a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 396a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 397a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 398a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* 399a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* 400a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR7]] 401a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] 402a2dbfb6bSGiorgis Georgakoudis // CHECK2: atomic_cont: 403a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 404a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 405a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i32 [[ADD]], i32* [[ATOMIC_TEMP1]], align 4 406a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* 407a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* 408a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8* 409a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR7]] 410a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] 411a2dbfb6bSGiorgis Georgakoudis // CHECK2: atomic_exit: 412a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: ret void 413a2dbfb6bSGiorgis Georgakoudis // 414a2dbfb6bSGiorgis Georgakoudis // 415*df729e2bSJohannes Doerfert // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 416a2dbfb6bSGiorgis Georgakoudis // CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 417a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: entry: 418a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 419a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 420a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 421a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 422a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 423a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 424a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 425a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 426a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 427a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 428a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32*** 429a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: [[TMP5:%.*]] = load i32**, i32*** [[TMP4]], align 4 430*df729e2bSJohannes Doerfert // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR4]] 431a2dbfb6bSGiorgis Georgakoudis // CHECK2-NEXT: ret void 432a2dbfb6bSGiorgis Georgakoudis // 433a2dbfb6bSGiorgis Georgakoudis // 434a2dbfb6bSGiorgis Georgakoudis // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker 435a2dbfb6bSGiorgis Georgakoudis // CHECK3-SAME: () #[[ATTR0:[0-9]+]] { 436a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: entry: 437a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 438a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 439a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i8* null, i8** [[WORK_FN]], align 4 440a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 441a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br label [[DOTAWAIT_WORK:%.*]] 442a2dbfb6bSGiorgis Georgakoudis // CHECK3: .await.work: 443a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 444a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) 445a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 446a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 447a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 448a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null 449a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] 450a2dbfb6bSGiorgis Georgakoudis // CHECK3: .select.workers: 451a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 452a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 453a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] 454a2dbfb6bSGiorgis Georgakoudis // CHECK3: .execute.parallel: 455a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) 456a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 457a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*) 458a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] 459a2dbfb6bSGiorgis Georgakoudis // CHECK3: .execute.fn: 460*df729e2bSJohannes Doerfert // CHECK3-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR4:[0-9]+]] 461a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] 462a2dbfb6bSGiorgis Georgakoudis // CHECK3: .check.next: 463*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* 464*df729e2bSJohannes Doerfert // CHECK3-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) 465a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL]] 466a2dbfb6bSGiorgis Georgakoudis // CHECK3: .terminate.parallel: 467a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: call void @__kmpc_kernel_end_parallel() 468a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br label [[DOTBARRIER_PARALLEL]] 469a2dbfb6bSGiorgis Georgakoudis // CHECK3: .barrier.parallel: 470a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 471a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br label [[DOTAWAIT_WORK]] 472a2dbfb6bSGiorgis Georgakoudis // CHECK3: .exit: 473a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: ret void 474a2dbfb6bSGiorgis Georgakoudis // 475a2dbfb6bSGiorgis Georgakoudis // 476a2dbfb6bSGiorgis Georgakoudis // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25 477a2dbfb6bSGiorgis Georgakoudis // CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1:[0-9]+]] { 478a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: entry: 479a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 480a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 481a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 482a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 483a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 484a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 485a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 486a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] 487a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] 488a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] 489a2dbfb6bSGiorgis Georgakoudis // CHECK3: .worker: 490*df729e2bSJohannes Doerfert // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker() #[[ATTR4]] 491a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br label [[DOTEXIT:%.*]] 492a2dbfb6bSGiorgis Georgakoudis // CHECK3: .mastercheck: 493a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 494a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 495a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 496a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 497a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 498a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 499a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] 500a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] 501a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] 502a2dbfb6bSGiorgis Georgakoudis // CHECK3: .master: 503a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 504a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 505a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] 506a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) 507a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() 508a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 509a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7:[0-9]+]] 510a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2) 511a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 512a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP0]] to i8* 513a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 514a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 515*df729e2bSJohannes Doerfert // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP9]], i32 1) 516a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] 517a2dbfb6bSGiorgis Georgakoudis // CHECK3: .termination.notifier: 518a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) 519a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 520a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br label [[DOTEXIT]] 521a2dbfb6bSGiorgis Georgakoudis // CHECK3: .exit: 522a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: ret void 523a2dbfb6bSGiorgis Georgakoudis // 524a2dbfb6bSGiorgis Georgakoudis // 525a2dbfb6bSGiorgis Georgakoudis // CHECK3-LABEL: define {{[^@]+}}@_Z3usePi 526a2dbfb6bSGiorgis Georgakoudis // CHECK3-SAME: (i32* [[C:%.*]]) #[[ATTR3:[0-9]+]] { 527a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: entry: 528a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 529a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 530*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 531a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 532*df729e2bSJohannes Doerfert // CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 2) 533a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 534a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP2:%.*]] = bitcast i32** [[C_ADDR]] to i8* 535a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 4 536a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 537*df729e2bSJohannes Doerfert // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i32 1) 538a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: ret void 539a2dbfb6bSGiorgis Georgakoudis // 540a2dbfb6bSGiorgis Georgakoudis // 541a2dbfb6bSGiorgis Georgakoudis // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ 542*df729e2bSJohannes Doerfert // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { 543*df729e2bSJohannes Doerfert // CHECK3-NEXT: entry: 544*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 545*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 546*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 547*df729e2bSJohannes Doerfert // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 548*df729e2bSJohannes Doerfert // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 549*df729e2bSJohannes Doerfert // CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 550*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 551*df729e2bSJohannes Doerfert // CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7]] 552*df729e2bSJohannes Doerfert // CHECK3-NEXT: ret void 553*df729e2bSJohannes Doerfert // 554*df729e2bSJohannes Doerfert // 555*df729e2bSJohannes Doerfert // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined___wrapper 556*df729e2bSJohannes Doerfert // CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 557*df729e2bSJohannes Doerfert // CHECK3-NEXT: entry: 558*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 559*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 560*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 561*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 562*df729e2bSJohannes Doerfert // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 563*df729e2bSJohannes Doerfert // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 564*df729e2bSJohannes Doerfert // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 565*df729e2bSJohannes Doerfert // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 566*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 567*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 568*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** 569*df729e2bSJohannes Doerfert // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 570*df729e2bSJohannes Doerfert // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] 571*df729e2bSJohannes Doerfert // CHECK3-NEXT: ret void 572*df729e2bSJohannes Doerfert // 573*df729e2bSJohannes Doerfert // 574*df729e2bSJohannes Doerfert // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 575a2dbfb6bSGiorgis Georgakoudis // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32** nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { 576a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: entry: 577a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 578a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 579a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 580a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 581a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 582a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 583a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 584a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 4 585a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR7]] 586a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: ret void 587a2dbfb6bSGiorgis Georgakoudis // 588a2dbfb6bSGiorgis Georgakoudis // 589a2dbfb6bSGiorgis Georgakoudis // CHECK3-LABEL: define {{[^@]+}}@_Z4workPi 590a2dbfb6bSGiorgis Georgakoudis // CHECK3-SAME: (i32* [[C:%.*]]) #[[ATTR3]] { 591a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: entry: 592a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 593a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 594a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 595a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 596a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 597a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* 598a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* 599a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR7]] 600a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]] 601a2dbfb6bSGiorgis Georgakoudis // CHECK3: atomic_cont: 602a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 603a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 604a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i32 [[ADD]], i32* [[ATOMIC_TEMP1]], align 4 605a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* 606a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* 607a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8* 608a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR7]] 609a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] 610a2dbfb6bSGiorgis Georgakoudis // CHECK3: atomic_exit: 611a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: ret void 612a2dbfb6bSGiorgis Georgakoudis // 613a2dbfb6bSGiorgis Georgakoudis // 614*df729e2bSJohannes Doerfert // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 615a2dbfb6bSGiorgis Georgakoudis // CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { 616a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: entry: 617a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 618a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 619a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 620a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 621a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 622a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 623a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 624a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 625a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 626a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 627a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32*** 628a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: [[TMP5:%.*]] = load i32**, i32*** [[TMP4]], align 4 629*df729e2bSJohannes Doerfert // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR4]] 630a2dbfb6bSGiorgis Georgakoudis // CHECK3-NEXT: ret void 631a2dbfb6bSGiorgis Georgakoudis // 632