1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // Test target codegen - host bc file has to be created first. 3 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 4 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 5 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 6 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 7 // RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK3 8 // expected-no-diagnostics 9 #ifndef HEADER 10 #define HEADER 11 12 template<typename tx> 13 tx ftemplate(int n) { 14 tx a = 0; 15 short aa = 0; 16 tx b[10]; 17 18 #pragma omp target teams if(0) 19 { 20 b[2] += 1; 21 } 22 23 #pragma omp target teams if(1) 24 { 25 a = '1'; 26 } 27 28 #pragma omp target teams if(n>40) 29 { 30 aa = 1; 31 } 32 33 #pragma omp target teams 34 { 35 #pragma omp parallel 36 #pragma omp parallel 37 aa = 1; 38 } 39 40 return a; 41 } 42 43 int bar(int n){ 44 int a = 0; 45 46 a += ftemplate<char>(n); 47 48 return a; 49 } 50 51 #endif 52 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23 53 // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { 54 // CHECK1-NEXT: entry: 55 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 56 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 57 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 58 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 59 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 60 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* 61 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 62 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 63 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 64 // CHECK1: user_code.entry: 65 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 66 // CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 67 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i8* 68 // CHECK1-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 69 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[A_CASTED]], align 8 70 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 71 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 72 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1:[0-9]+]] 73 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 74 // CHECK1-NEXT: ret void 75 // CHECK1: worker.exit: 76 // CHECK1-NEXT: ret void 77 // 78 // 79 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ 80 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR0]] { 81 // CHECK1-NEXT: entry: 82 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 83 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 84 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 85 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 86 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 87 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 88 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* 89 // CHECK1-NEXT: store i8 49, i8* [[CONV]], align 1 90 // CHECK1-NEXT: ret void 91 // 92 // 93 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28 94 // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { 95 // CHECK1-NEXT: entry: 96 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 97 // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 98 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 99 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 100 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 101 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* 102 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 103 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 104 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 105 // CHECK1: user_code.entry: 106 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 107 // CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 108 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* 109 // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 110 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 111 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 112 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 113 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1]] 114 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 115 // CHECK1-NEXT: ret void 116 // CHECK1: worker.exit: 117 // CHECK1-NEXT: ret void 118 // 119 // 120 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 121 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { 122 // CHECK1-NEXT: entry: 123 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 124 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 125 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 126 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 127 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 128 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 129 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* 130 // CHECK1-NEXT: store i16 1, i16* [[CONV]], align 2 131 // CHECK1-NEXT: ret void 132 // 133 // 134 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33 135 // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { 136 // CHECK1-NEXT: entry: 137 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 138 // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 139 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 140 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 141 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 142 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* 143 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) 144 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 145 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 146 // CHECK1: user_code.entry: 147 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 148 // CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 149 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* 150 // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 151 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 152 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 153 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 154 // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1]] 155 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 156 // CHECK1-NEXT: ret void 157 // CHECK1: worker.exit: 158 // CHECK1-NEXT: ret void 159 // 160 // 161 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 162 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { 163 // CHECK1-NEXT: entry: 164 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 165 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 166 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 167 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 168 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 169 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 170 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 171 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* 172 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 173 // CHECK1-NEXT: [[TMP1:%.*]] = bitcast i16* [[CONV]] to i8* 174 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[TMP0]], align 8 175 // CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 176 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 177 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 178 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP4]], i64 1) 179 // CHECK1-NEXT: ret void 180 // 181 // 182 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 183 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { 184 // CHECK1-NEXT: entry: 185 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 186 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 187 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 188 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 189 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 190 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 191 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 192 // CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 193 // CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 194 // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP0]] to i8* 195 // CHECK1-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 8 196 // CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 197 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 198 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 199 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP5]], i64 1) 200 // CHECK1-NEXT: ret void 201 // 202 // 203 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 204 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { 205 // CHECK1-NEXT: entry: 206 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 207 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 208 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 209 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 210 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 211 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 212 // CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 213 // CHECK1-NEXT: store i16 1, i16* [[TMP0]], align 2 214 // CHECK1-NEXT: ret void 215 // 216 // 217 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23 218 // CHECK2-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { 219 // CHECK2-NEXT: entry: 220 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 221 // CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 222 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 223 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 224 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 225 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* 226 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 227 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 228 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 229 // CHECK2: user_code.entry: 230 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 231 // CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 232 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8* 233 // CHECK2-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 234 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 235 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 236 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 237 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1:[0-9]+]] 238 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 239 // CHECK2-NEXT: ret void 240 // CHECK2: worker.exit: 241 // CHECK2-NEXT: ret void 242 // 243 // 244 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ 245 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR0]] { 246 // CHECK2-NEXT: entry: 247 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 248 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 249 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 250 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 251 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 252 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 253 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* 254 // CHECK2-NEXT: store i8 49, i8* [[CONV]], align 1 255 // CHECK2-NEXT: ret void 256 // 257 // 258 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28 259 // CHECK2-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { 260 // CHECK2-NEXT: entry: 261 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 262 // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 263 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 264 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 265 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 266 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* 267 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 268 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 269 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 270 // CHECK2: user_code.entry: 271 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 272 // CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 273 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* 274 // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 275 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 276 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 277 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 278 // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] 279 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 280 // CHECK2-NEXT: ret void 281 // CHECK2: worker.exit: 282 // CHECK2-NEXT: ret void 283 // 284 // 285 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 286 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { 287 // CHECK2-NEXT: entry: 288 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 289 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 290 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 291 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 292 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 293 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 294 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* 295 // CHECK2-NEXT: store i16 1, i16* [[CONV]], align 2 296 // CHECK2-NEXT: ret void 297 // 298 // 299 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33 300 // CHECK2-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { 301 // CHECK2-NEXT: entry: 302 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 303 // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 304 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 305 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 306 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 307 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* 308 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) 309 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 310 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 311 // CHECK2: user_code.entry: 312 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 313 // CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 314 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* 315 // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 316 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 317 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 318 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 319 // CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] 320 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 321 // CHECK2-NEXT: ret void 322 // CHECK2: worker.exit: 323 // CHECK2-NEXT: ret void 324 // 325 // 326 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 327 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { 328 // CHECK2-NEXT: entry: 329 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 330 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 331 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 332 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 333 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 334 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 335 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 336 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* 337 // CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 338 // CHECK2-NEXT: [[TMP1:%.*]] = bitcast i16* [[CONV]] to i8* 339 // CHECK2-NEXT: store i8* [[TMP1]], i8** [[TMP0]], align 4 340 // CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 341 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 342 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 343 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP4]], i32 1) 344 // CHECK2-NEXT: ret void 345 // 346 // 347 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 348 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { 349 // CHECK2-NEXT: entry: 350 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 351 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 352 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 353 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 354 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 355 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 356 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 357 // CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 358 // CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 359 // CHECK2-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP0]] to i8* 360 // CHECK2-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 4 361 // CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 362 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 363 // CHECK2-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 364 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP5]], i32 1) 365 // CHECK2-NEXT: ret void 366 // 367 // 368 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 369 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { 370 // CHECK2-NEXT: entry: 371 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 372 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 373 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 374 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 375 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 376 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 377 // CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 378 // CHECK2-NEXT: store i16 1, i16* [[TMP0]], align 2 379 // CHECK2-NEXT: ret void 380 // 381 // 382 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23 383 // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { 384 // CHECK3-NEXT: entry: 385 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 386 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 387 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 388 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 389 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 390 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* 391 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 392 // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 393 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 394 // CHECK3: user_code.entry: 395 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 396 // CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 397 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8* 398 // CHECK3-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 399 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 400 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 401 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 402 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1:[0-9]+]] 403 // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 404 // CHECK3-NEXT: ret void 405 // CHECK3: worker.exit: 406 // CHECK3-NEXT: ret void 407 // 408 // 409 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ 410 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR0]] { 411 // CHECK3-NEXT: entry: 412 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 413 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 414 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 415 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 416 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 417 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 418 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* 419 // CHECK3-NEXT: store i8 49, i8* [[CONV]], align 1 420 // CHECK3-NEXT: ret void 421 // 422 // 423 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28 424 // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { 425 // CHECK3-NEXT: entry: 426 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 427 // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 428 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 429 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 430 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 431 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* 432 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 433 // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 434 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 435 // CHECK3: user_code.entry: 436 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 437 // CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 438 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* 439 // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 440 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 441 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 442 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 443 // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] 444 // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 445 // CHECK3-NEXT: ret void 446 // CHECK3: worker.exit: 447 // CHECK3-NEXT: ret void 448 // 449 // 450 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 451 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { 452 // CHECK3-NEXT: entry: 453 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 454 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 455 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 456 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 457 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 458 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 459 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* 460 // CHECK3-NEXT: store i16 1, i16* [[CONV]], align 2 461 // CHECK3-NEXT: ret void 462 // 463 // 464 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33 465 // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { 466 // CHECK3-NEXT: entry: 467 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 468 // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 469 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 470 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 471 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 472 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* 473 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) 474 // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 475 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 476 // CHECK3: user_code.entry: 477 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 478 // CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 479 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* 480 // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 481 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 482 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 483 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 484 // CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] 485 // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 486 // CHECK3-NEXT: ret void 487 // CHECK3: worker.exit: 488 // CHECK3-NEXT: ret void 489 // 490 // 491 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2 492 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { 493 // CHECK3-NEXT: entry: 494 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 495 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 496 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 497 // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 498 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 499 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 500 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 501 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* 502 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 503 // CHECK3-NEXT: [[TMP1:%.*]] = bitcast i16* [[CONV]] to i8* 504 // CHECK3-NEXT: store i8* [[TMP1]], i8** [[TMP0]], align 4 505 // CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 506 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 507 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 508 // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP4]], i32 1) 509 // CHECK3-NEXT: ret void 510 // 511 // 512 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3 513 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { 514 // CHECK3-NEXT: entry: 515 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 516 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 517 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 518 // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 519 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 520 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 521 // CHECK3-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 522 // CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 523 // CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 524 // CHECK3-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP0]] to i8* 525 // CHECK3-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 4 526 // CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 527 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 528 // CHECK3-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 529 // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP5]], i32 1) 530 // CHECK3-NEXT: ret void 531 // 532 // 533 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__4 534 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { 535 // CHECK3-NEXT: entry: 536 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 537 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 538 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 539 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 540 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 541 // CHECK3-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 542 // CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 543 // CHECK3-NEXT: store i16 1, i16* [[TMP0]], align 2 544 // CHECK3-NEXT: ret void 545 // 546