1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // Test target codegen - host bc file has to be created first. 3 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 4 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 5 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 6 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 7 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 8 9 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 10 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 11 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 12 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 13 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 14 15 // expected-no-diagnostics 16 #ifndef HEADER 17 #define HEADER 18 19 template<typename tx> 20 tx ftemplate(int n) { 21 tx a = 0; 22 short aa = 0; 23 tx b[10]; 24 25 #pragma omp target parallel if(target: 0) 26 { 27 a += 1; 28 } 29 30 #pragma omp target parallel map(tofrom: aa) 31 { 32 aa += 1; 33 } 34 35 #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) 36 { 37 a += 1; 38 aa += 1; 39 b[2] += 1; 40 } 41 42 return a; 43 } 44 45 int bar(int n){ 46 int a = 0; 47 48 a += ftemplate<int>(n); 49 50 return a; 51 } 52 53 #endif 54 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 55 // CHECK1-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { 56 // CHECK1-NEXT: entry: 57 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 58 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 59 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 60 // CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 61 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) 62 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 63 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 64 // CHECK1: user_code.entry: 65 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 66 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 67 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP0]] to i8* 68 // CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 69 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 70 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) 71 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 72 // CHECK1-NEXT: ret void 73 // CHECK1: worker.exit: 74 // CHECK1-NEXT: ret void 75 // 76 // 77 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ 78 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { 79 // CHECK1-NEXT: entry: 80 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 81 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 82 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 83 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 84 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 85 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 86 // CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 87 // CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 88 // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 89 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 90 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 91 // CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 92 // CHECK1-NEXT: ret void 93 // 94 // 95 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35 96 // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { 97 // CHECK1-NEXT: entry: 98 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 99 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 100 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 101 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 102 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 103 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 104 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 105 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 106 // CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 107 // CHECK1-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 108 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) 109 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 110 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 111 // CHECK1: user_code.entry: 112 // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) 113 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 114 // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP0]] to i8* 115 // CHECK1-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 116 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 117 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP1]] to i8* 118 // CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 119 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 120 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* 121 // CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 122 // CHECK1-NEXT: [[TMP11:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 123 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP11]], i64 3) 124 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 125 // CHECK1-NEXT: ret void 126 // CHECK1: worker.exit: 127 // CHECK1-NEXT: ret void 128 // 129 // 130 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 131 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { 132 // CHECK1-NEXT: entry: 133 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 134 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 135 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 136 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 137 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 138 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 139 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 140 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 141 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 142 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 143 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 144 // CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 145 // CHECK1-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 146 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 147 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 148 // CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 149 // CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 150 // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 151 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 152 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 153 // CHECK1-NEXT: store i16 [[CONV2]], i16* [[TMP1]], align 2 154 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 2 155 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 156 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 157 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 158 // CHECK1-NEXT: ret void 159 // 160 // 161 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 162 // CHECK2-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { 163 // CHECK2-NEXT: entry: 164 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 165 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 166 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 167 // CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 168 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) 169 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 170 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 171 // CHECK2: user_code.entry: 172 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 173 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 174 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP0]] to i8* 175 // CHECK2-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 176 // CHECK2-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 177 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) 178 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 179 // CHECK2-NEXT: ret void 180 // CHECK2: worker.exit: 181 // CHECK2-NEXT: ret void 182 // 183 // 184 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ 185 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { 186 // CHECK2-NEXT: entry: 187 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 188 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 189 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 190 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 191 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 192 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 193 // CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 194 // CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 195 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 196 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 197 // CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 198 // CHECK2-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 199 // CHECK2-NEXT: ret void 200 // 201 // 202 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35 203 // CHECK2-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { 204 // CHECK2-NEXT: entry: 205 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 206 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 207 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 208 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 209 // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 210 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 211 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 212 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 213 // CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 214 // CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 215 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) 216 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 217 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 218 // CHECK2: user_code.entry: 219 // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) 220 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 221 // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP0]] to i8* 222 // CHECK2-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 223 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 224 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP1]] to i8* 225 // CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 226 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 227 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* 228 // CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 229 // CHECK2-NEXT: [[TMP11:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 230 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP11]], i32 3) 231 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 232 // CHECK2-NEXT: ret void 233 // CHECK2: worker.exit: 234 // CHECK2-NEXT: ret void 235 // 236 // 237 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 238 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { 239 // CHECK2-NEXT: entry: 240 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 241 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 242 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 243 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 244 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 245 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 246 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 247 // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 248 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 249 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 250 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 251 // CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 252 // CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 253 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 254 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 255 // CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 256 // CHECK2-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 257 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 258 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 259 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 260 // CHECK2-NEXT: store i16 [[CONV2]], i16* [[TMP1]], align 2 261 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 2 262 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 263 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 264 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 265 // CHECK2-NEXT: ret void 266 // 267