1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // Test target codegen - host bc file has to be created first. 3 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 4 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 5 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 6 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 7 // expected-no-diagnostics 8 #ifndef HEADER 9 #define HEADER 10 11 #ifdef CK1 12 13 template <typename T> 14 int tmain(T argc) { 15 #pragma omp target 16 #pragma omp teams 17 argc = 0; 18 return 0; 19 } 20 21 22 int main (int argc, char **argv) { 23 #pragma omp target 24 #pragma omp teams 25 { 26 argc = 0; 27 } 28 return tmain(argv); 29 } 30 31 32 // only nvptx side: do not outline teams region and do not call fork_teams 33 34 35 // target region in template 36 37 38 39 #endif // CK1 40 41 // Test target codegen - host bc file has to be created first. 42 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 43 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK3 44 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 45 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK4 46 // expected-no-diagnostics 47 #ifdef CK2 48 49 template <typename T> 50 int tmain(T argc) { 51 int a = 10; 52 int b = 5; 53 #pragma omp target 54 #pragma omp teams num_teams(a) thread_limit(b) 55 { 56 argc = 0; 57 } 58 return 0; 59 } 60 61 int main (int argc, char **argv) { 62 int a = 20; 63 int b = 5; 64 #pragma omp target 65 #pragma omp teams num_teams(a) thread_limit(b) 66 { 67 argc = 0; 68 } 69 return tmain(argv); 70 } 71 72 73 74 75 76 77 #endif // CK2 78 #endif 79 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker 80 // CHECK5-SAME: () #[[ATTR0:[0-9]+]] { 81 // CHECK5-NEXT: entry: 82 // CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 83 // CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 84 // CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 8 85 // CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 86 // CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] 87 // CHECK5: .await.work: 88 // CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 89 // CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) 90 // CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 91 // CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 92 // CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 93 // CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null 94 // CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] 95 // CHECK5: .select.workers: 96 // CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 97 // CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 98 // CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] 99 // CHECK5: .execute.parallel: 100 // CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) 101 // CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* 102 // CHECK5-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) 103 // CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] 104 // CHECK5: .terminate.parallel: 105 // CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() 106 // CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] 107 // CHECK5: .barrier.parallel: 108 // CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 109 // CHECK5-NEXT: br label [[DOTAWAIT_WORK]] 110 // CHECK5: .exit: 111 // CHECK5-NEXT: ret void 112 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 113 // CHECK5-SAME: (i64 [[A:%.*]], i64 [[B:%.*]], i64 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { 114 // CHECK5-NEXT: entry: 115 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 116 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 117 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 118 // CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 119 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 120 // CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 121 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 122 // CHECK5-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 123 // CHECK5-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 124 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* 125 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* 126 // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* 127 // CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 128 // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 129 // CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 130 // CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] 131 // CHECK5-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] 132 // CHECK5-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] 133 // CHECK5: .worker: 134 // CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker() #[[ATTR3:[0-9]+]] 135 // CHECK5-NEXT: br label [[DOTEXIT:%.*]] 136 // CHECK5: .mastercheck: 137 // CHECK5-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 138 // CHECK5-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 139 // CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 140 // CHECK5-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE5]], 1 141 // CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], 1 142 // CHECK5-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 143 // CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] 144 // CHECK5-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID3]], [[MASTER_TID]] 145 // CHECK5-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] 146 // CHECK5: .master: 147 // CHECK5-NEXT: [[NVPTX_NUM_THREADS6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 148 // CHECK5-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 149 // CHECK5-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] 150 // CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) 151 // CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() 152 // CHECK5-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 153 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* @"_openmp_static_kernel$size", align 8 154 // CHECK5-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) 155 // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 156 // CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 157 // CHECK5-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty* 158 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV2]], align 8 159 // CHECK5-NEXT: [[ARGC9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP9]], i32 0, i32 0 160 // CHECK5-NEXT: store i32 [[TMP10]], i32* [[ARGC9]], align 4 161 // CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 162 // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTTHREADID_TEMP_]], align 4 163 // CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC9]]) #[[ATTR3]] 164 // CHECK5-NEXT: [[TMP12:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 165 // CHECK5-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP12]]) 166 // CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] 167 // CHECK5: .termination.notifier: 168 // CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) 169 // CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 170 // CHECK5-NEXT: br label [[DOTEXIT]] 171 // CHECK5: .exit: 172 // CHECK5-NEXT: ret void 173 // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ 174 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { 175 // CHECK5-NEXT: entry: 176 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 177 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 178 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 179 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 180 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 181 // CHECK5-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 182 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 183 // CHECK5-NEXT: store i32 0, i32* [[TMP0]], align 4 184 // CHECK5-NEXT: ret void 185 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker 186 // CHECK5-SAME: () #[[ATTR0]] { 187 // CHECK5-NEXT: entry: 188 // CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 189 // CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 190 // CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 8 191 // CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 192 // CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] 193 // CHECK5: .await.work: 194 // CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 195 // CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) 196 // CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 197 // CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 198 // CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 199 // CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null 200 // CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] 201 // CHECK5: .select.workers: 202 // CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 203 // CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 204 // CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] 205 // CHECK5: .execute.parallel: 206 // CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 207 // CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* 208 // CHECK5-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) 209 // CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] 210 // CHECK5: .terminate.parallel: 211 // CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() 212 // CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] 213 // CHECK5: .barrier.parallel: 214 // CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 215 // CHECK5-NEXT: br label [[DOTAWAIT_WORK]] 216 // CHECK5: .exit: 217 // CHECK5-NEXT: ret void 218 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57 219 // CHECK5-SAME: (i64 [[A:%.*]], i64 [[B:%.*]], i8** [[ARGC:%.*]]) #[[ATTR1]] { 220 // CHECK5-NEXT: entry: 221 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 222 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 223 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 224 // CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 225 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 226 // CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 227 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 228 // CHECK5-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 229 // CHECK5-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 230 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* 231 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* 232 // CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 233 // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 234 // CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 235 // CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] 236 // CHECK5-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] 237 // CHECK5-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] 238 // CHECK5: .worker: 239 // CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker() #[[ATTR3]] 240 // CHECK5-NEXT: br label [[DOTEXIT:%.*]] 241 // CHECK5: .mastercheck: 242 // CHECK5-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 243 // CHECK5-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 244 // CHECK5-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 245 // CHECK5-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 246 // CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 247 // CHECK5-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 248 // CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] 249 // CHECK5-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] 250 // CHECK5-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] 251 // CHECK5: .master: 252 // CHECK5-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 253 // CHECK5-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 254 // CHECK5-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] 255 // CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) 256 // CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() 257 // CHECK5-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 258 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* @"_openmp_static_kernel$size2", align 8 259 // CHECK5-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) 260 // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 261 // CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 262 // CHECK5-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty.0* 263 // CHECK5-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 264 // CHECK5-NEXT: [[ARGC8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP9]], i32 0, i32 0 265 // CHECK5-NEXT: store i8** [[TMP10]], i8*** [[ARGC8]], align 8 266 // CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 267 // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTTHREADID_TEMP_]], align 4 268 // CHECK5-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC8]]) #[[ATTR3]] 269 // CHECK5-NEXT: [[TMP12:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 270 // CHECK5-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP12]]) 271 // CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] 272 // CHECK5: .termination.notifier: 273 // CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) 274 // CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 275 // CHECK5-NEXT: br label [[DOTEXIT]] 276 // CHECK5: .exit: 277 // CHECK5-NEXT: ret void 278 // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__3 279 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR1]] { 280 // CHECK5-NEXT: entry: 281 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 282 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 283 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 284 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 285 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 286 // CHECK5-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 287 // CHECK5-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 288 // CHECK5-NEXT: store i8** null, i8*** [[TMP0]], align 8 289 // CHECK5-NEXT: ret void 290 // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker 291 // CHECK6-SAME: () #[[ATTR0:[0-9]+]] { 292 // CHECK6-NEXT: entry: 293 // CHECK6-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 294 // CHECK6-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 295 // CHECK6-NEXT: store i8* null, i8** [[WORK_FN]], align 8 296 // CHECK6-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 297 // CHECK6-NEXT: br label [[DOTAWAIT_WORK:%.*]] 298 // CHECK6: .await.work: 299 // CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 300 // CHECK6-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) 301 // CHECK6-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 302 // CHECK6-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 303 // CHECK6-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 304 // CHECK6-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null 305 // CHECK6-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] 306 // CHECK6: .select.workers: 307 // CHECK6-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 308 // CHECK6-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 309 // CHECK6-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] 310 // CHECK6: .execute.parallel: 311 // CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) 312 // CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* 313 // CHECK6-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) 314 // CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] 315 // CHECK6: .terminate.parallel: 316 // CHECK6-NEXT: call void @__kmpc_kernel_end_parallel() 317 // CHECK6-NEXT: br label [[DOTBARRIER_PARALLEL]] 318 // CHECK6: .barrier.parallel: 319 // CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 320 // CHECK6-NEXT: br label [[DOTAWAIT_WORK]] 321 // CHECK6: .exit: 322 // CHECK6-NEXT: ret void 323 // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 324 // CHECK6-SAME: (i64 [[A:%.*]], i64 [[B:%.*]], i64 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { 325 // CHECK6-NEXT: entry: 326 // CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 327 // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 328 // CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 329 // CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 330 // CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 331 // CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 332 // CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 333 // CHECK6-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 334 // CHECK6-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 335 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* 336 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* 337 // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* 338 // CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 339 // CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 340 // CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 341 // CHECK6-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] 342 // CHECK6-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] 343 // CHECK6-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] 344 // CHECK6: .worker: 345 // CHECK6-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker() #[[ATTR3:[0-9]+]] 346 // CHECK6-NEXT: br label [[DOTEXIT:%.*]] 347 // CHECK6: .mastercheck: 348 // CHECK6-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 349 // CHECK6-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 350 // CHECK6-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 351 // CHECK6-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE5]], 1 352 // CHECK6-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], 1 353 // CHECK6-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 354 // CHECK6-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] 355 // CHECK6-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID3]], [[MASTER_TID]] 356 // CHECK6-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] 357 // CHECK6: .master: 358 // CHECK6-NEXT: [[NVPTX_NUM_THREADS6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 359 // CHECK6-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 360 // CHECK6-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] 361 // CHECK6-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) 362 // CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack() 363 // CHECK6-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 1) 364 // CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* 365 // CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV2]], align 8 366 // CHECK6-NEXT: [[ARGC9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 367 // CHECK6-NEXT: store i32 [[TMP7]], i32* [[ARGC9]], align 4 368 // CHECK6-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 369 // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 370 // CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC9]]) #[[ATTR3]] 371 // CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) 372 // CHECK6-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] 373 // CHECK6: .termination.notifier: 374 // CHECK6-NEXT: call void @__kmpc_kernel_deinit(i16 1) 375 // CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 376 // CHECK6-NEXT: br label [[DOTEXIT]] 377 // CHECK6: .exit: 378 // CHECK6-NEXT: ret void 379 // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__ 380 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { 381 // CHECK6-NEXT: entry: 382 // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 383 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 384 // CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 385 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 386 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 387 // CHECK6-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 388 // CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 389 // CHECK6-NEXT: store i32 0, i32* [[TMP0]], align 4 390 // CHECK6-NEXT: ret void 391 // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker 392 // CHECK6-SAME: () #[[ATTR0]] { 393 // CHECK6-NEXT: entry: 394 // CHECK6-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 395 // CHECK6-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 396 // CHECK6-NEXT: store i8* null, i8** [[WORK_FN]], align 8 397 // CHECK6-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 398 // CHECK6-NEXT: br label [[DOTAWAIT_WORK:%.*]] 399 // CHECK6: .await.work: 400 // CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 401 // CHECK6-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) 402 // CHECK6-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 403 // CHECK6-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 404 // CHECK6-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 405 // CHECK6-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null 406 // CHECK6-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] 407 // CHECK6: .select.workers: 408 // CHECK6-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 409 // CHECK6-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 410 // CHECK6-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] 411 // CHECK6: .execute.parallel: 412 // CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 413 // CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* 414 // CHECK6-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) 415 // CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] 416 // CHECK6: .terminate.parallel: 417 // CHECK6-NEXT: call void @__kmpc_kernel_end_parallel() 418 // CHECK6-NEXT: br label [[DOTBARRIER_PARALLEL]] 419 // CHECK6: .barrier.parallel: 420 // CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 421 // CHECK6-NEXT: br label [[DOTAWAIT_WORK]] 422 // CHECK6: .exit: 423 // CHECK6-NEXT: ret void 424 // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57 425 // CHECK6-SAME: (i64 [[A:%.*]], i64 [[B:%.*]], i8** [[ARGC:%.*]]) #[[ATTR1]] { 426 // CHECK6-NEXT: entry: 427 // CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 428 // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 429 // CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 430 // CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 431 // CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 432 // CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 433 // CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 434 // CHECK6-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 435 // CHECK6-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 436 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* 437 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* 438 // CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 439 // CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 440 // CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 441 // CHECK6-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] 442 // CHECK6-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] 443 // CHECK6-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] 444 // CHECK6: .worker: 445 // CHECK6-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker() #[[ATTR3]] 446 // CHECK6-NEXT: br label [[DOTEXIT:%.*]] 447 // CHECK6: .mastercheck: 448 // CHECK6-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 449 // CHECK6-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 450 // CHECK6-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 451 // CHECK6-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 452 // CHECK6-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 453 // CHECK6-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 454 // CHECK6-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] 455 // CHECK6-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] 456 // CHECK6-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] 457 // CHECK6: .master: 458 // CHECK6-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 459 // CHECK6-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 460 // CHECK6-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] 461 // CHECK6-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) 462 // CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack() 463 // CHECK6-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, i16 1) 464 // CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* 465 // CHECK6-NEXT: [[TMP7:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 466 // CHECK6-NEXT: [[ARGC8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 467 // CHECK6-NEXT: store i8** [[TMP7]], i8*** [[ARGC8]], align 8 468 // CHECK6-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 469 // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 470 // CHECK6-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC8]]) #[[ATTR3]] 471 // CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) 472 // CHECK6-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] 473 // CHECK6: .termination.notifier: 474 // CHECK6-NEXT: call void @__kmpc_kernel_deinit(i16 1) 475 // CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 476 // CHECK6-NEXT: br label [[DOTEXIT]] 477 // CHECK6: .exit: 478 // CHECK6-NEXT: ret void 479 // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1 480 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR1]] { 481 // CHECK6-NEXT: entry: 482 // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 483 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 484 // CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 485 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 486 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 487 // CHECK6-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 488 // CHECK6-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 489 // CHECK6-NEXT: store i8** null, i8*** [[TMP0]], align 8 490 // CHECK6-NEXT: ret void 491 // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker 492 // CHECK7-SAME: () #[[ATTR0:[0-9]+]] { 493 // CHECK7-NEXT: entry: 494 // CHECK7-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 495 // CHECK7-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 496 // CHECK7-NEXT: store i8* null, i8** [[WORK_FN]], align 4 497 // CHECK7-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 498 // CHECK7-NEXT: br label [[DOTAWAIT_WORK:%.*]] 499 // CHECK7: .await.work: 500 // CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 501 // CHECK7-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) 502 // CHECK7-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 503 // CHECK7-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 504 // CHECK7-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 505 // CHECK7-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null 506 // CHECK7-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] 507 // CHECK7: .select.workers: 508 // CHECK7-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 509 // CHECK7-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 510 // CHECK7-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] 511 // CHECK7: .execute.parallel: 512 // CHECK7-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) 513 // CHECK7-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* 514 // CHECK7-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) 515 // CHECK7-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] 516 // CHECK7: .terminate.parallel: 517 // CHECK7-NEXT: call void @__kmpc_kernel_end_parallel() 518 // CHECK7-NEXT: br label [[DOTBARRIER_PARALLEL]] 519 // CHECK7: .barrier.parallel: 520 // CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 521 // CHECK7-NEXT: br label [[DOTAWAIT_WORK]] 522 // CHECK7: .exit: 523 // CHECK7-NEXT: ret void 524 // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 525 // CHECK7-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { 526 // CHECK7-NEXT: entry: 527 // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 528 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 529 // CHECK7-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 530 // CHECK7-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 531 // CHECK7-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 532 // CHECK7-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 533 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 534 // CHECK7-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 535 // CHECK7-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 536 // CHECK7-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 537 // CHECK7-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 538 // CHECK7-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 539 // CHECK7-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] 540 // CHECK7-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] 541 // CHECK7-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] 542 // CHECK7: .worker: 543 // CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker() #[[ATTR3:[0-9]+]] 544 // CHECK7-NEXT: br label [[DOTEXIT:%.*]] 545 // CHECK7: .mastercheck: 546 // CHECK7-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 547 // CHECK7-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 548 // CHECK7-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 549 // CHECK7-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 550 // CHECK7-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 551 // CHECK7-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 552 // CHECK7-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] 553 // CHECK7-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] 554 // CHECK7-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] 555 // CHECK7: .master: 556 // CHECK7-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 557 // CHECK7-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 558 // CHECK7-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] 559 // CHECK7-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) 560 // CHECK7-NEXT: call void @__kmpc_data_sharing_init_stack() 561 // CHECK7-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 562 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* @"_openmp_static_kernel$size", align 4 563 // CHECK7-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) 564 // CHECK7-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 565 // CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i32 0 566 // CHECK7-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty* 567 // CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 568 // CHECK7-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP9]], i32 0, i32 0 569 // CHECK7-NEXT: store i32 [[TMP10]], i32* [[ARGC7]], align 4 570 // CHECK7-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 571 // CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTTHREADID_TEMP_]], align 4 572 // CHECK7-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC7]]) #[[ATTR3]] 573 // CHECK7-NEXT: [[TMP12:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 574 // CHECK7-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP12]]) 575 // CHECK7-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] 576 // CHECK7: .termination.notifier: 577 // CHECK7-NEXT: call void @__kmpc_kernel_deinit(i16 1) 578 // CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 579 // CHECK7-NEXT: br label [[DOTEXIT]] 580 // CHECK7: .exit: 581 // CHECK7-NEXT: ret void 582 // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__ 583 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { 584 // CHECK7-NEXT: entry: 585 // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 586 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 587 // CHECK7-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 588 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 589 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 590 // CHECK7-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 591 // CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 592 // CHECK7-NEXT: store i32 0, i32* [[TMP0]], align 4 593 // CHECK7-NEXT: ret void 594 // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker 595 // CHECK7-SAME: () #[[ATTR0]] { 596 // CHECK7-NEXT: entry: 597 // CHECK7-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 598 // CHECK7-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 599 // CHECK7-NEXT: store i8* null, i8** [[WORK_FN]], align 4 600 // CHECK7-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 601 // CHECK7-NEXT: br label [[DOTAWAIT_WORK:%.*]] 602 // CHECK7: .await.work: 603 // CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 604 // CHECK7-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) 605 // CHECK7-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 606 // CHECK7-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 607 // CHECK7-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 608 // CHECK7-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null 609 // CHECK7-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] 610 // CHECK7: .select.workers: 611 // CHECK7-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 612 // CHECK7-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 613 // CHECK7-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] 614 // CHECK7: .execute.parallel: 615 // CHECK7-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 616 // CHECK7-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* 617 // CHECK7-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) 618 // CHECK7-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] 619 // CHECK7: .terminate.parallel: 620 // CHECK7-NEXT: call void @__kmpc_kernel_end_parallel() 621 // CHECK7-NEXT: br label [[DOTBARRIER_PARALLEL]] 622 // CHECK7: .barrier.parallel: 623 // CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 624 // CHECK7-NEXT: br label [[DOTAWAIT_WORK]] 625 // CHECK7: .exit: 626 // CHECK7-NEXT: ret void 627 // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57 628 // CHECK7-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i8** [[ARGC:%.*]]) #[[ATTR1]] { 629 // CHECK7-NEXT: entry: 630 // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 631 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 632 // CHECK7-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 633 // CHECK7-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 634 // CHECK7-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 635 // CHECK7-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 636 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 637 // CHECK7-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 638 // CHECK7-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 639 // CHECK7-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 640 // CHECK7-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 641 // CHECK7-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 642 // CHECK7-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] 643 // CHECK7-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] 644 // CHECK7-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] 645 // CHECK7: .worker: 646 // CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker() #[[ATTR3]] 647 // CHECK7-NEXT: br label [[DOTEXIT:%.*]] 648 // CHECK7: .mastercheck: 649 // CHECK7-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 650 // CHECK7-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 651 // CHECK7-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 652 // CHECK7-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 653 // CHECK7-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 654 // CHECK7-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 655 // CHECK7-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] 656 // CHECK7-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] 657 // CHECK7-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] 658 // CHECK7: .master: 659 // CHECK7-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 660 // CHECK7-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 661 // CHECK7-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] 662 // CHECK7-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) 663 // CHECK7-NEXT: call void @__kmpc_data_sharing_init_stack() 664 // CHECK7-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 665 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* @"_openmp_static_kernel$size2", align 4 666 // CHECK7-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) 667 // CHECK7-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 668 // CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i32 0 669 // CHECK7-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty.0* 670 // CHECK7-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 671 // CHECK7-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP9]], i32 0, i32 0 672 // CHECK7-NEXT: store i8** [[TMP10]], i8*** [[ARGC7]], align 4 673 // CHECK7-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 674 // CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTTHREADID_TEMP_]], align 4 675 // CHECK7-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC7]]) #[[ATTR3]] 676 // CHECK7-NEXT: [[TMP12:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 677 // CHECK7-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP12]]) 678 // CHECK7-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] 679 // CHECK7: .termination.notifier: 680 // CHECK7-NEXT: call void @__kmpc_kernel_deinit(i16 1) 681 // CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 682 // CHECK7-NEXT: br label [[DOTEXIT]] 683 // CHECK7: .exit: 684 // CHECK7-NEXT: ret void 685 // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__3 686 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { 687 // CHECK7-NEXT: entry: 688 // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 689 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 690 // CHECK7-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 691 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 692 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 693 // CHECK7-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 694 // CHECK7-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 695 // CHECK7-NEXT: store i8** null, i8*** [[TMP0]], align 4 696 // CHECK7-NEXT: ret void 697 // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker 698 // CHECK8-SAME: () #[[ATTR0:[0-9]+]] { 699 // CHECK8-NEXT: entry: 700 // CHECK8-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 701 // CHECK8-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 702 // CHECK8-NEXT: store i8* null, i8** [[WORK_FN]], align 4 703 // CHECK8-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 704 // CHECK8-NEXT: br label [[DOTAWAIT_WORK:%.*]] 705 // CHECK8: .await.work: 706 // CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 707 // CHECK8-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) 708 // CHECK8-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 709 // CHECK8-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 710 // CHECK8-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 711 // CHECK8-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null 712 // CHECK8-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] 713 // CHECK8: .select.workers: 714 // CHECK8-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 715 // CHECK8-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 716 // CHECK8-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] 717 // CHECK8: .execute.parallel: 718 // CHECK8-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) 719 // CHECK8-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* 720 // CHECK8-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) 721 // CHECK8-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] 722 // CHECK8: .terminate.parallel: 723 // CHECK8-NEXT: call void @__kmpc_kernel_end_parallel() 724 // CHECK8-NEXT: br label [[DOTBARRIER_PARALLEL]] 725 // CHECK8: .barrier.parallel: 726 // CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 727 // CHECK8-NEXT: br label [[DOTAWAIT_WORK]] 728 // CHECK8: .exit: 729 // CHECK8-NEXT: ret void 730 // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 731 // CHECK8-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { 732 // CHECK8-NEXT: entry: 733 // CHECK8-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 734 // CHECK8-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 735 // CHECK8-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 736 // CHECK8-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 737 // CHECK8-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 738 // CHECK8-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 739 // CHECK8-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 740 // CHECK8-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 741 // CHECK8-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 742 // CHECK8-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 743 // CHECK8-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 744 // CHECK8-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 745 // CHECK8-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] 746 // CHECK8-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] 747 // CHECK8-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] 748 // CHECK8: .worker: 749 // CHECK8-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker() #[[ATTR3:[0-9]+]] 750 // CHECK8-NEXT: br label [[DOTEXIT:%.*]] 751 // CHECK8: .mastercheck: 752 // CHECK8-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 753 // CHECK8-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 754 // CHECK8-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 755 // CHECK8-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 756 // CHECK8-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 757 // CHECK8-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 758 // CHECK8-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] 759 // CHECK8-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] 760 // CHECK8-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] 761 // CHECK8: .master: 762 // CHECK8-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 763 // CHECK8-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 764 // CHECK8-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] 765 // CHECK8-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) 766 // CHECK8-NEXT: call void @__kmpc_data_sharing_init_stack() 767 // CHECK8-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) 768 // CHECK8-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* 769 // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 770 // CHECK8-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 771 // CHECK8-NEXT: store i32 [[TMP7]], i32* [[ARGC7]], align 4 772 // CHECK8-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 773 // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 774 // CHECK8-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC7]]) #[[ATTR3]] 775 // CHECK8-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) 776 // CHECK8-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] 777 // CHECK8: .termination.notifier: 778 // CHECK8-NEXT: call void @__kmpc_kernel_deinit(i16 1) 779 // CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 780 // CHECK8-NEXT: br label [[DOTEXIT]] 781 // CHECK8: .exit: 782 // CHECK8-NEXT: ret void 783 // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__ 784 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { 785 // CHECK8-NEXT: entry: 786 // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 787 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 788 // CHECK8-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 789 // CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 790 // CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 791 // CHECK8-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 792 // CHECK8-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 793 // CHECK8-NEXT: store i32 0, i32* [[TMP0]], align 4 794 // CHECK8-NEXT: ret void 795 // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker 796 // CHECK8-SAME: () #[[ATTR0]] { 797 // CHECK8-NEXT: entry: 798 // CHECK8-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 799 // CHECK8-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 800 // CHECK8-NEXT: store i8* null, i8** [[WORK_FN]], align 4 801 // CHECK8-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 802 // CHECK8-NEXT: br label [[DOTAWAIT_WORK:%.*]] 803 // CHECK8: .await.work: 804 // CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 805 // CHECK8-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) 806 // CHECK8-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 807 // CHECK8-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 808 // CHECK8-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 809 // CHECK8-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null 810 // CHECK8-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] 811 // CHECK8: .select.workers: 812 // CHECK8-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 813 // CHECK8-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 814 // CHECK8-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] 815 // CHECK8: .execute.parallel: 816 // CHECK8-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 817 // CHECK8-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* 818 // CHECK8-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) 819 // CHECK8-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] 820 // CHECK8: .terminate.parallel: 821 // CHECK8-NEXT: call void @__kmpc_kernel_end_parallel() 822 // CHECK8-NEXT: br label [[DOTBARRIER_PARALLEL]] 823 // CHECK8: .barrier.parallel: 824 // CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 825 // CHECK8-NEXT: br label [[DOTAWAIT_WORK]] 826 // CHECK8: .exit: 827 // CHECK8-NEXT: ret void 828 // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57 829 // CHECK8-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i8** [[ARGC:%.*]]) #[[ATTR1]] { 830 // CHECK8-NEXT: entry: 831 // CHECK8-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 832 // CHECK8-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 833 // CHECK8-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 834 // CHECK8-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 835 // CHECK8-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 836 // CHECK8-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 837 // CHECK8-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 838 // CHECK8-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 839 // CHECK8-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 840 // CHECK8-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 841 // CHECK8-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 842 // CHECK8-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 843 // CHECK8-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] 844 // CHECK8-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] 845 // CHECK8-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] 846 // CHECK8: .worker: 847 // CHECK8-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker() #[[ATTR3]] 848 // CHECK8-NEXT: br label [[DOTEXIT:%.*]] 849 // CHECK8: .mastercheck: 850 // CHECK8-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 851 // CHECK8-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 852 // CHECK8-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 853 // CHECK8-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 854 // CHECK8-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 855 // CHECK8-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 856 // CHECK8-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] 857 // CHECK8-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] 858 // CHECK8-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] 859 // CHECK8: .master: 860 // CHECK8-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() 861 // CHECK8-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() 862 // CHECK8-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] 863 // CHECK8-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) 864 // CHECK8-NEXT: call void @__kmpc_data_sharing_init_stack() 865 // CHECK8-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) 866 // CHECK8-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* 867 // CHECK8-NEXT: [[TMP7:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 868 // CHECK8-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 869 // CHECK8-NEXT: store i8** [[TMP7]], i8*** [[ARGC7]], align 4 870 // CHECK8-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 871 // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 872 // CHECK8-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC7]]) #[[ATTR3]] 873 // CHECK8-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) 874 // CHECK8-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] 875 // CHECK8: .termination.notifier: 876 // CHECK8-NEXT: call void @__kmpc_kernel_deinit(i16 1) 877 // CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 878 // CHECK8-NEXT: br label [[DOTEXIT]] 879 // CHECK8: .exit: 880 // CHECK8-NEXT: ret void 881 // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__1 882 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { 883 // CHECK8-NEXT: entry: 884 // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 885 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 886 // CHECK8-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 887 // CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 888 // CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 889 // CHECK8-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 890 // CHECK8-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 891 // CHECK8-NEXT: store i8** null, i8*** [[TMP0]], align 4 892 // CHECK8-NEXT: ret void 893 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 894 // CHECK1-SAME: (i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { 895 // CHECK1-NEXT: entry: 896 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 897 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 898 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 899 // CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 900 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* 901 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 902 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 903 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 904 // CHECK1: user_code.entry: 905 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 906 // CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) 907 // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* 908 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 909 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 910 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 911 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 912 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] 913 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 4) 914 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 915 // CHECK1-NEXT: ret void 916 // CHECK1: worker.exit: 917 // CHECK1-NEXT: ret void 918 // 919 // 920 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ 921 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { 922 // CHECK1-NEXT: entry: 923 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 924 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 925 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 926 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 927 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 928 // CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 929 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 930 // CHECK1-NEXT: store i32 0, i32* [[TMP0]], align 4 931 // CHECK1-NEXT: ret void 932 // 933 // 934 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15 935 // CHECK1-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR0]] { 936 // CHECK1-NEXT: entry: 937 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 938 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 939 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 940 // CHECK1-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 941 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 942 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 943 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 944 // CHECK1: user_code.entry: 945 // CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 946 // CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) 947 // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** 948 // CHECK1-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 949 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 950 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 951 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 952 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] 953 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 8) 954 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 955 // CHECK1-NEXT: ret void 956 // CHECK1: worker.exit: 957 // CHECK1-NEXT: ret void 958 // 959 // 960 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 961 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR0]] { 962 // CHECK1-NEXT: entry: 963 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 964 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 965 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 966 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 967 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 968 // CHECK1-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 969 // CHECK1-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 970 // CHECK1-NEXT: store i8** null, i8*** [[TMP0]], align 8 971 // CHECK1-NEXT: ret void 972 // 973 // 974 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 975 // CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { 976 // CHECK2-NEXT: entry: 977 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 978 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 979 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 980 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 981 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 982 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 983 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 984 // CHECK2: user_code.entry: 985 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 986 // CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) 987 // CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* 988 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 989 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 990 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 991 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 992 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] 993 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) 994 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 995 // CHECK2-NEXT: ret void 996 // CHECK2: worker.exit: 997 // CHECK2-NEXT: ret void 998 // 999 // 1000 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ 1001 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { 1002 // CHECK2-NEXT: entry: 1003 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 1004 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 1005 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 1006 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 1007 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 1008 // CHECK2-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 1009 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 1010 // CHECK2-NEXT: store i32 0, i32* [[TMP0]], align 4 1011 // CHECK2-NEXT: ret void 1012 // 1013 // 1014 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15 1015 // CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR0]] { 1016 // CHECK2-NEXT: entry: 1017 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 1018 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1019 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1020 // CHECK2-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 1021 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 1022 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1023 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1024 // CHECK2: user_code.entry: 1025 // CHECK2-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 1026 // CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) 1027 // CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** 1028 // CHECK2-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 1029 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 1030 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 1031 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 1032 // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] 1033 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) 1034 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1035 // CHECK2-NEXT: ret void 1036 // CHECK2: worker.exit: 1037 // CHECK2-NEXT: ret void 1038 // 1039 // 1040 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 1041 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { 1042 // CHECK2-NEXT: entry: 1043 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 1044 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 1045 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 1046 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 1047 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 1048 // CHECK2-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 1049 // CHECK2-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 1050 // CHECK2-NEXT: store i8** null, i8*** [[TMP0]], align 4 1051 // CHECK2-NEXT: ret void 1052 // 1053 // 1054 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64 1055 // CHECK3-SAME: (i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { 1056 // CHECK3-NEXT: entry: 1057 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 1058 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 1059 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 1060 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1061 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1062 // CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 1063 // CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 1064 // CHECK3-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 1065 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* 1066 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* 1067 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* 1068 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 1069 // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1070 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1071 // CHECK3: user_code.entry: 1072 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 1073 // CHECK3-NEXT: [[ARGC3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) 1074 // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC3]] to i32* 1075 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 1076 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 1077 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 1078 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 1079 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] 1080 // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC3]], i64 4) 1081 // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1082 // CHECK3-NEXT: ret void 1083 // CHECK3: worker.exit: 1084 // CHECK3-NEXT: ret void 1085 // 1086 // 1087 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ 1088 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { 1089 // CHECK3-NEXT: entry: 1090 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 1091 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 1092 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 1093 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 1094 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 1095 // CHECK3-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 1096 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 1097 // CHECK3-NEXT: store i32 0, i32* [[TMP0]], align 4 1098 // CHECK3-NEXT: ret void 1099 // 1100 // 1101 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53 1102 // CHECK3-SAME: (i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i8** noundef [[ARGC:%.*]]) #[[ATTR0]] { 1103 // CHECK3-NEXT: entry: 1104 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 1105 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 1106 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 1107 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1108 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1109 // CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 1110 // CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 1111 // CHECK3-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 1112 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* 1113 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* 1114 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 1115 // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1116 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1117 // CHECK3: user_code.entry: 1118 // CHECK3-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 1119 // CHECK3-NEXT: [[ARGC2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) 1120 // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC2]] to i8*** 1121 // CHECK3-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 1122 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 1123 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 1124 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 1125 // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] 1126 // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC2]], i64 8) 1127 // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1128 // CHECK3-NEXT: ret void 1129 // CHECK3: worker.exit: 1130 // CHECK3-NEXT: ret void 1131 // 1132 // 1133 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 1134 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR0]] { 1135 // CHECK3-NEXT: entry: 1136 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 1137 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 1138 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 1139 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 1140 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 1141 // CHECK3-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 1142 // CHECK3-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 1143 // CHECK3-NEXT: store i8** null, i8*** [[TMP0]], align 8 1144 // CHECK3-NEXT: ret void 1145 // 1146 // 1147 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64 1148 // CHECK4-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { 1149 // CHECK4-NEXT: entry: 1150 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 1151 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 1152 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 1153 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1154 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1155 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 1156 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 1157 // CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 1158 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 1159 // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1160 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1161 // CHECK4: user_code.entry: 1162 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 1163 // CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) 1164 // CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* 1165 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 1166 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 1167 // CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 1168 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 1169 // CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] 1170 // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) 1171 // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1172 // CHECK4-NEXT: ret void 1173 // CHECK4: worker.exit: 1174 // CHECK4-NEXT: ret void 1175 // 1176 // 1177 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ 1178 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { 1179 // CHECK4-NEXT: entry: 1180 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 1181 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 1182 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 1183 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 1184 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 1185 // CHECK4-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 1186 // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 1187 // CHECK4-NEXT: store i32 0, i32* [[TMP0]], align 4 1188 // CHECK4-NEXT: ret void 1189 // 1190 // 1191 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53 1192 // CHECK4-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i8** noundef [[ARGC:%.*]]) #[[ATTR0]] { 1193 // CHECK4-NEXT: entry: 1194 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 1195 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 1196 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 1197 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1198 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1199 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 1200 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 1201 // CHECK4-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 1202 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 1203 // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1204 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1205 // CHECK4: user_code.entry: 1206 // CHECK4-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 1207 // CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) 1208 // CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** 1209 // CHECK4-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 1210 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 1211 // CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 1212 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 1213 // CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] 1214 // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) 1215 // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1216 // CHECK4-NEXT: ret void 1217 // CHECK4: worker.exit: 1218 // CHECK4-NEXT: ret void 1219 // 1220 // 1221 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 1222 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { 1223 // CHECK4-NEXT: entry: 1224 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 1225 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 1226 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 1227 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 1228 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 1229 // CHECK4-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 1230 // CHECK4-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 1231 // CHECK4-NEXT: store i8** null, i8*** [[TMP0]], align 4 1232 // CHECK4-NEXT: ret void 1233 // 1234