1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // Test target codegen - host bc file has to be created first. 3 // RUN: %clang_cc1 -no-opaque-pointers -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 4 // RUN: %clang_cc1 -no-opaque-pointers -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 5 // RUN: %clang_cc1 -no-opaque-pointers -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 6 // RUN: %clang_cc1 -no-opaque-pointers -DCK1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 7 // expected-no-diagnostics 8 #ifndef HEADER 9 #define HEADER 10 11 #ifdef CK1 12 13 template <typename T> 14 int tmain(T argc) { 15 #pragma omp target 16 #pragma omp teams 17 argc = 0; 18 return 0; 19 } 20 21 22 int main (int argc, char **argv) { 23 #pragma omp target 24 #pragma omp teams 25 { 26 argc = 0; 27 } 28 return tmain(argv); 29 } 30 31 32 // only nvptx side: do not outline teams region and do not call fork_teams 33 34 35 // target region in template 36 37 38 39 #endif // CK1 40 41 // Test target codegen - host bc file has to be created first. 42 // RUN: %clang_cc1 -no-opaque-pointers -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 43 // RUN: %clang_cc1 -no-opaque-pointers -DCK2 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK3 44 // RUN: %clang_cc1 -no-opaque-pointers -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 45 // RUN: %clang_cc1 -no-opaque-pointers -DCK2 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK4 46 // expected-no-diagnostics 47 #ifdef CK2 48 49 template <typename T> 50 int tmain(T argc) { 51 int a = 10; 52 int b = 5; 53 #pragma omp target 54 #pragma omp teams num_teams(a) thread_limit(b) 55 { 56 argc = 0; 57 } 58 return 0; 59 } 60 61 int main (int argc, char **argv) { 62 int a = 20; 63 int b = 5; 64 #pragma omp target 65 #pragma omp teams num_teams(a) thread_limit(b) 66 { 67 argc = 0; 68 } 69 return tmain(argv); 70 } 71 72 73 74 75 76 77 #endif // CK2 78 #endif 79 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 80 // CHECK1-SAME: (i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { 81 // CHECK1-NEXT: entry: 82 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 83 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 84 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 85 // CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 86 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* 87 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 88 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 89 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 90 // CHECK1: user_code.entry: 91 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 92 // CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) 93 // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* 94 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 95 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 96 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 97 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 98 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3:[0-9]+]] 99 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 4) 100 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 101 // CHECK1-NEXT: ret void 102 // CHECK1: worker.exit: 103 // CHECK1-NEXT: ret void 104 // 105 // 106 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ 107 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { 108 // CHECK1-NEXT: entry: 109 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 110 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 111 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 112 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 113 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 114 // CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 115 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 116 // CHECK1-NEXT: store i32 0, i32* [[TMP0]], align 4 117 // CHECK1-NEXT: ret void 118 // 119 // 120 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15 121 // CHECK1-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR0]] { 122 // CHECK1-NEXT: entry: 123 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 124 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 125 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 126 // CHECK1-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 127 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 128 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 129 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 130 // CHECK1: user_code.entry: 131 // CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 132 // CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) 133 // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** 134 // CHECK1-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 135 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 136 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 137 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 138 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]] 139 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 8) 140 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 141 // CHECK1-NEXT: ret void 142 // CHECK1: worker.exit: 143 // CHECK1-NEXT: ret void 144 // 145 // 146 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 147 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] { 148 // CHECK1-NEXT: entry: 149 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 150 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 151 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 152 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 153 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 154 // CHECK1-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 155 // CHECK1-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 156 // CHECK1-NEXT: store i8** null, i8*** [[TMP0]], align 8 157 // CHECK1-NEXT: ret void 158 // 159 // 160 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 161 // CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { 162 // CHECK2-NEXT: entry: 163 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 164 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 165 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 166 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 167 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 168 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 169 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 170 // CHECK2: user_code.entry: 171 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 172 // CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) 173 // CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* 174 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 175 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 176 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 177 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 178 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3:[0-9]+]] 179 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) 180 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 181 // CHECK2-NEXT: ret void 182 // CHECK2: worker.exit: 183 // CHECK2-NEXT: ret void 184 // 185 // 186 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ 187 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { 188 // CHECK2-NEXT: entry: 189 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 190 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 191 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 192 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 193 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 194 // CHECK2-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 195 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 196 // CHECK2-NEXT: store i32 0, i32* [[TMP0]], align 4 197 // CHECK2-NEXT: ret void 198 // 199 // 200 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15 201 // CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR0]] { 202 // CHECK2-NEXT: entry: 203 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 204 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 205 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 206 // CHECK2-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 207 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 208 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 209 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 210 // CHECK2: user_code.entry: 211 // CHECK2-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 212 // CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) 213 // CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** 214 // CHECK2-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 215 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 216 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 217 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 218 // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]] 219 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) 220 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 221 // CHECK2-NEXT: ret void 222 // CHECK2: worker.exit: 223 // CHECK2-NEXT: ret void 224 // 225 // 226 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 227 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] { 228 // CHECK2-NEXT: entry: 229 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 230 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 231 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 232 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 233 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 234 // CHECK2-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 235 // CHECK2-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 236 // CHECK2-NEXT: store i8** null, i8*** [[TMP0]], align 4 237 // CHECK2-NEXT: ret void 238 // 239 // 240 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64 241 // CHECK3-SAME: (i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { 242 // CHECK3-NEXT: entry: 243 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 244 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 245 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 246 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 247 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 248 // CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 249 // CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 250 // CHECK3-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 251 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* 252 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* 253 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* 254 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 255 // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 256 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 257 // CHECK3: user_code.entry: 258 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 259 // CHECK3-NEXT: [[ARGC3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) 260 // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC3]] to i32* 261 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 262 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 263 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 264 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 265 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3:[0-9]+]] 266 // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC3]], i64 4) 267 // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 268 // CHECK3-NEXT: ret void 269 // CHECK3: worker.exit: 270 // CHECK3-NEXT: ret void 271 // 272 // 273 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ 274 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { 275 // CHECK3-NEXT: entry: 276 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 277 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 278 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 279 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 280 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 281 // CHECK3-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 282 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 283 // CHECK3-NEXT: store i32 0, i32* [[TMP0]], align 4 284 // CHECK3-NEXT: ret void 285 // 286 // 287 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53 288 // CHECK3-SAME: (i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i8** noundef [[ARGC:%.*]]) #[[ATTR0]] { 289 // CHECK3-NEXT: entry: 290 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 291 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 292 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 293 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 294 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 295 // CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 296 // CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 297 // CHECK3-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 298 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* 299 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* 300 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 301 // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 302 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 303 // CHECK3: user_code.entry: 304 // CHECK3-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 305 // CHECK3-NEXT: [[ARGC2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) 306 // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC2]] to i8*** 307 // CHECK3-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 308 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 309 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 310 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 311 // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]] 312 // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC2]], i64 8) 313 // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 314 // CHECK3-NEXT: ret void 315 // CHECK3: worker.exit: 316 // CHECK3-NEXT: ret void 317 // 318 // 319 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 320 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] { 321 // CHECK3-NEXT: entry: 322 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 323 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 324 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 325 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 326 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 327 // CHECK3-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 328 // CHECK3-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 329 // CHECK3-NEXT: store i8** null, i8*** [[TMP0]], align 8 330 // CHECK3-NEXT: ret void 331 // 332 // 333 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64 334 // CHECK4-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { 335 // CHECK4-NEXT: entry: 336 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 337 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 338 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 339 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 340 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 341 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 342 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 343 // CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 344 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 345 // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 346 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 347 // CHECK4: user_code.entry: 348 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 349 // CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) 350 // CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* 351 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 352 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 353 // CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 354 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 355 // CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3:[0-9]+]] 356 // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) 357 // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 358 // CHECK4-NEXT: ret void 359 // CHECK4: worker.exit: 360 // CHECK4-NEXT: ret void 361 // 362 // 363 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ 364 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { 365 // CHECK4-NEXT: entry: 366 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 367 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 368 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 369 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 370 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 371 // CHECK4-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 372 // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 373 // CHECK4-NEXT: store i32 0, i32* [[TMP0]], align 4 374 // CHECK4-NEXT: ret void 375 // 376 // 377 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53 378 // CHECK4-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i8** noundef [[ARGC:%.*]]) #[[ATTR0]] { 379 // CHECK4-NEXT: entry: 380 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 381 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 382 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 383 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 384 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 385 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 386 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 387 // CHECK4-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 388 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 389 // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 390 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 391 // CHECK4: user_code.entry: 392 // CHECK4-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 393 // CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) 394 // CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** 395 // CHECK4-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 396 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 397 // CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 398 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 399 // CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]] 400 // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) 401 // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 402 // CHECK4-NEXT: ret void 403 // CHECK4: worker.exit: 404 // CHECK4-NEXT: ret void 405 // 406 // 407 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 408 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] { 409 // CHECK4-NEXT: entry: 410 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 411 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 412 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 413 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 414 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 415 // CHECK4-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 416 // CHECK4-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 417 // CHECK4-NEXT: store i8** null, i8*** [[TMP0]], align 4 418 // CHECK4-NEXT: ret void 419 // 420