1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // Test target codegen - host bc file has to be created first. 3 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 4 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK4 5 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 6 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK5 7 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK5 8 9 // expected-no-diagnostics 10 #ifndef HEADER 11 #define HEADER 12 13 int a; 14 15 int foo(int *a); 16 17 int main(int argc, char **argv) { 18 int b[10], c[10], d[10]; 19 #pragma omp target teams map(tofrom:a) 20 #pragma omp distribute parallel for firstprivate(b) lastprivate(c) if(a) 21 for (int i= 0; i < argc; ++i) 22 a = foo(&i) + foo(&a) + foo(&b[i]) + foo(&c[i]) + foo(&d[i]); 23 return 0; 24 } 25 26 #endif 27 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19 28 // CHECK4-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[ARGC:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { 29 // CHECK4-NEXT: entry: 30 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 31 // CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8 32 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 33 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 34 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8 35 // CHECK4-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 36 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 37 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 38 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 39 // CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 40 // CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 41 // CHECK4-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 42 // CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8 43 // CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 44 // CHECK4-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8 45 // CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 46 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* 47 // CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 48 // CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) 49 // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 50 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 51 // CHECK4: user_code.entry: 52 // CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) 53 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 54 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* 55 // CHECK4-NEXT: store i32 [[TMP6]], i32* [[CONV1]], align 4 56 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 57 // CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 58 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 59 // CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i64 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR5:[0-9]+]] 60 // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 61 // CHECK4-NEXT: ret void 62 // CHECK4: worker.exit: 63 // CHECK4-NEXT: ret void 64 // 65 // 66 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ 67 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[ARGC:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] { 68 // CHECK4-NEXT: entry: 69 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 70 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 71 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 72 // CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8 73 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 74 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 75 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8 76 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 77 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 78 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 79 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 80 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 81 // CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 82 // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 83 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 84 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 85 // CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 86 // CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4 87 // CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 8 88 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 89 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 90 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 91 // CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 92 // CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 93 // CHECK4-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 94 // CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8 95 // CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 96 // CHECK4-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8 97 // CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 98 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* 99 // CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 100 // CHECK4-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 40) 101 // CHECK4-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]* 102 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 103 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 104 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 105 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 106 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 107 // CHECK4-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 108 // CHECK4-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 109 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 110 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 111 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] 112 // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 113 // CHECK4: omp.precond.then: 114 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 115 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 116 // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 117 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 118 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 119 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast [10 x i32]* [[B4]] to i8* 120 // CHECK4-NEXT: [[TMP9:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* 121 // CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) 122 // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 123 // CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 124 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 125 // CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) 126 // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 127 // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 128 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] 129 // CHECK4-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 130 // CHECK4: cond.true: 131 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 132 // CHECK4-NEXT: br label [[COND_END:%.*]] 133 // CHECK4: cond.false: 134 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 135 // CHECK4-NEXT: br label [[COND_END]] 136 // CHECK4: cond.end: 137 // CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] 138 // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 139 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 140 // CHECK4-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 141 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 142 // CHECK4: omp.inner.for.cond: 143 // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 144 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 145 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 146 // CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] 147 // CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 148 // CHECK4: omp.inner.for.body: 149 // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 150 // CHECK4-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 151 // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 152 // CHECK4-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 153 // CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 154 // CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP20]] to i8* 155 // CHECK4-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 156 // CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 157 // CHECK4-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP22]] to i8* 158 // CHECK4-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 159 // CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 160 // CHECK4-NEXT: [[TMP28:%.*]] = bitcast i32* [[CONV]] to i8* 161 // CHECK4-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 162 // CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 163 // CHECK4-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8* 164 // CHECK4-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 165 // CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 166 // CHECK4-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8* 167 // CHECK4-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 168 // CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 169 // CHECK4-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* 170 // CHECK4-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 171 // CHECK4-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 172 // CHECK4-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* 173 // CHECK4-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8 174 // CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 175 // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 176 // CHECK4-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 177 // CHECK4-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 178 // CHECK4-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 179 // CHECK4-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 180 // CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i64 7) 181 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 182 // CHECK4: omp.inner.for.inc: 183 // CHECK4-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 184 // CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 185 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] 186 // CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 187 // CHECK4-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 188 // CHECK4-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 189 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] 190 // CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 191 // CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 192 // CHECK4-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 193 // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] 194 // CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 195 // CHECK4-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 196 // CHECK4-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 197 // CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] 198 // CHECK4-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] 199 // CHECK4: cond.true12: 200 // CHECK4-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 201 // CHECK4-NEXT: br label [[COND_END14:%.*]] 202 // CHECK4: cond.false13: 203 // CHECK4-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 204 // CHECK4-NEXT: br label [[COND_END14]] 205 // CHECK4: cond.end14: 206 // CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] 207 // CHECK4-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 208 // CHECK4-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 209 // CHECK4-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 210 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] 211 // CHECK4: omp.inner.for.end: 212 // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 213 // CHECK4: omp.loop.exit: 214 // CHECK4-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 215 // CHECK4-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 216 // CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP54]]) 217 // CHECK4-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 218 // CHECK4-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 219 // CHECK4-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 220 // CHECK4: .omp.lastprivate.then: 221 // CHECK4-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* 222 // CHECK4-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* 223 // CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i64 40, i1 false) 224 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 225 // CHECK4: .omp.lastprivate.done: 226 // CHECK4-NEXT: br label [[OMP_PRECOND_END]] 227 // CHECK4: omp.precond.end: 228 // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i64 40) 229 // CHECK4-NEXT: ret void 230 // 231 // 232 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 233 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] { 234 // CHECK4-NEXT: entry: 235 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 236 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 237 // CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 238 // CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 239 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 240 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 241 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 242 // CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8 243 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8 244 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 245 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 246 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 247 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 248 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 249 // CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 250 // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 251 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 252 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 253 // CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 254 // CHECK4-NEXT: [[C5:%.*]] = alloca [10 x i32], align 4 255 // CHECK4-NEXT: [[I6:%.*]] = alloca i32, align 4 256 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 257 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 258 // CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 259 // CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 260 // CHECK4-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 261 // CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 262 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 263 // CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 264 // CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8 265 // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 266 // CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 267 // CHECK4-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 268 // CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8 269 // CHECK4-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 270 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 271 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 272 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 273 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 274 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 275 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 276 // CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 277 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 278 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 279 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] 280 // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 281 // CHECK4: omp.precond.then: 282 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 283 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 284 // CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 285 // CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 286 // CHECK4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 287 // CHECK4-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 288 // CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP10]] to i32 289 // CHECK4-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 290 // CHECK4-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 291 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 292 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 293 // CHECK4-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B4]] to i8* 294 // CHECK4-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* 295 // CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 40, i1 false) 296 // CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 297 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 298 // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 299 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 300 // CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 301 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 302 // CHECK4: omp.inner.for.cond: 303 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 304 // CHECK4-NEXT: [[CONV7:%.*]] = sext i32 [[TMP16]] to i64 305 // CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 306 // CHECK4-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP17]] 307 // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 308 // CHECK4: omp.inner.for.body: 309 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 310 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 311 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 312 // CHECK4-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 313 // CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[I6]]) #[[ATTR8:[0-9]+]] 314 // CHECK4-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[TMP1]]) #[[ATTR8]] 315 // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]] 316 // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 317 // CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 318 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B4]], i64 0, i64 [[IDXPROM]] 319 // CHECK4-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX]]) #[[ATTR8]] 320 // CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]] 321 // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 322 // CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP20]] to i64 323 // CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C5]], i64 0, i64 [[IDXPROM13]] 324 // CHECK4-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX14]]) #[[ATTR8]] 325 // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]] 326 // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 327 // CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP21]] to i64 328 // CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM17]] 329 // CHECK4-NEXT: [[CALL19:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX18]]) #[[ATTR8]] 330 // CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]] 331 // CHECK4-NEXT: store i32 [[ADD20]], i32* [[TMP1]], align 4 332 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 333 // CHECK4: omp.body.continue: 334 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 335 // CHECK4: omp.inner.for.inc: 336 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 337 // CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 338 // CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] 339 // CHECK4-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 340 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] 341 // CHECK4: omp.inner.for.end: 342 // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 343 // CHECK4: omp.loop.exit: 344 // CHECK4-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 345 // CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 346 // CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]]) 347 // CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 348 // CHECK4-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 349 // CHECK4-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 350 // CHECK4: .omp.lastprivate.then: 351 // CHECK4-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* 352 // CHECK4-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C5]] to i8* 353 // CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 40, i1 false) 354 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 355 // CHECK4: .omp.lastprivate.done: 356 // CHECK4-NEXT: br label [[OMP_PRECOND_END]] 357 // CHECK4: omp.precond.end: 358 // CHECK4-NEXT: ret void 359 // 360 // 361 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19 362 // CHECK5-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[ARGC:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { 363 // CHECK5-NEXT: entry: 364 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 365 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 366 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 367 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 368 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 369 // CHECK5-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 370 // CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 371 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 372 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 373 // CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 374 // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 375 // CHECK5-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 376 // CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 377 // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 378 // CHECK5-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 379 // CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 380 // CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 381 // CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) 382 // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 383 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 384 // CHECK5: user_code.entry: 385 // CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) 386 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 387 // CHECK5-NEXT: store i32 [[TMP6]], i32* [[ARGC_CASTED]], align 4 388 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 389 // CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 390 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 391 // CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR5:[0-9]+]] 392 // CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 393 // CHECK5-NEXT: ret void 394 // CHECK5: worker.exit: 395 // CHECK5-NEXT: ret void 396 // 397 // 398 // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ 399 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[ARGC:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] { 400 // CHECK5-NEXT: entry: 401 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 402 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 403 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 404 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 405 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 406 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 407 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 408 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 409 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 410 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 411 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 412 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 413 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 414 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 415 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 416 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 417 // CHECK5-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 418 // CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 419 // CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 420 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 421 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 422 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 423 // CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 424 // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 425 // CHECK5-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 426 // CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 427 // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 428 // CHECK5-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 429 // CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 430 // CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 431 // CHECK5-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 40) 432 // CHECK5-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]* 433 // CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 434 // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 435 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 436 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 437 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 438 // CHECK5-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 439 // CHECK5-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 440 // CHECK5-NEXT: store i32 0, i32* [[I]], align 4 441 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 442 // CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] 443 // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 444 // CHECK5: omp.precond.then: 445 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 446 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 447 // CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 448 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 449 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 450 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast [10 x i32]* [[B4]] to i8* 451 // CHECK5-NEXT: [[TMP9:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* 452 // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) 453 // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 454 // CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 455 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 456 // CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) 457 // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 458 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 459 // CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] 460 // CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 461 // CHECK5: cond.true: 462 // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 463 // CHECK5-NEXT: br label [[COND_END:%.*]] 464 // CHECK5: cond.false: 465 // CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 466 // CHECK5-NEXT: br label [[COND_END]] 467 // CHECK5: cond.end: 468 // CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] 469 // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 470 // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 471 // CHECK5-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 472 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 473 // CHECK5: omp.inner.for.cond: 474 // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 475 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 476 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 477 // CHECK5-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] 478 // CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 479 // CHECK5: omp.inner.for.body: 480 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 481 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 482 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 483 // CHECK5-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP19]] to i8* 484 // CHECK5-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 485 // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 486 // CHECK5-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP20]] to i8* 487 // CHECK5-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 488 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 489 // CHECK5-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* 490 // CHECK5-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 491 // CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 492 // CHECK5-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP2]] to i8* 493 // CHECK5-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 494 // CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 495 // CHECK5-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[B4]] to i8* 496 // CHECK5-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 497 // CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 498 // CHECK5-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* 499 // CHECK5-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 500 // CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 501 // CHECK5-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* 502 // CHECK5-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 503 // CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP2]], align 4 504 // CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP35]], 0 505 // CHECK5-NEXT: [[TMP36:%.*]] = zext i1 [[TOBOOL]] to i32 506 // CHECK5-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 507 // CHECK5-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 508 // CHECK5-NEXT: [[TMP39:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 509 // CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP38]], i32 [[TMP36]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP39]], i32 7) 510 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 511 // CHECK5: omp.inner.for.inc: 512 // CHECK5-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 513 // CHECK5-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 514 // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] 515 // CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 516 // CHECK5-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 517 // CHECK5-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 518 // CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] 519 // CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 520 // CHECK5-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 521 // CHECK5-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 522 // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] 523 // CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 524 // CHECK5-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 525 // CHECK5-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 526 // CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] 527 // CHECK5-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] 528 // CHECK5: cond.true12: 529 // CHECK5-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 530 // CHECK5-NEXT: br label [[COND_END14:%.*]] 531 // CHECK5: cond.false13: 532 // CHECK5-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 533 // CHECK5-NEXT: br label [[COND_END14]] 534 // CHECK5: cond.end14: 535 // CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE12]] ], [ [[TMP49]], [[COND_FALSE13]] ] 536 // CHECK5-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 537 // CHECK5-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 538 // CHECK5-NEXT: store i32 [[TMP50]], i32* [[DOTOMP_IV]], align 4 539 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] 540 // CHECK5: omp.inner.for.end: 541 // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 542 // CHECK5: omp.loop.exit: 543 // CHECK5-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 544 // CHECK5-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 545 // CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP52]]) 546 // CHECK5-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 547 // CHECK5-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 548 // CHECK5-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 549 // CHECK5: .omp.lastprivate.then: 550 // CHECK5-NEXT: [[TMP55:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* 551 // CHECK5-NEXT: [[TMP56:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* 552 // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP55]], i8* align 4 [[TMP56]], i32 40, i1 false) 553 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 554 // CHECK5: .omp.lastprivate.done: 555 // CHECK5-NEXT: br label [[OMP_PRECOND_END]] 556 // CHECK5: omp.precond.end: 557 // CHECK5-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i32 40) 558 // CHECK5-NEXT: ret void 559 // 560 // 561 // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 562 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] { 563 // CHECK5-NEXT: entry: 564 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 565 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 566 // CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 567 // CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 568 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 569 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 570 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 571 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 572 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 573 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 574 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 575 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 576 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 577 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 578 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 579 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 580 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 581 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 582 // CHECK5-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4 583 // CHECK5-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4 584 // CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 585 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 586 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 587 // CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 588 // CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 589 // CHECK5-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 590 // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 591 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 592 // CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 593 // CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 594 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 595 // CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 596 // CHECK5-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 597 // CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 598 // CHECK5-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 599 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 600 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 601 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 602 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 603 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 604 // CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 605 // CHECK5-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 606 // CHECK5-NEXT: store i32 0, i32* [[I]], align 4 607 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 608 // CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] 609 // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 610 // CHECK5: omp.precond.then: 611 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 612 // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 613 // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 614 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 615 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 616 // CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 617 // CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 618 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 619 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 620 // CHECK5-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* 621 // CHECK5-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* 622 // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) 623 // CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 624 // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 625 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 626 // CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 627 // CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 628 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 629 // CHECK5: omp.inner.for.cond: 630 // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 631 // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 632 // CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] 633 // CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 634 // CHECK5: omp.inner.for.body: 635 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 636 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 637 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 638 // CHECK5-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 639 // CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[I5]]) #[[ATTR8:[0-9]+]] 640 // CHECK5-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[TMP1]]) #[[ATTR8]] 641 // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] 642 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 643 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] 644 // CHECK5-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX]]) #[[ATTR8]] 645 // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] 646 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 647 // CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] 648 // CHECK5-NEXT: [[CALL12:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX11]]) #[[ATTR8]] 649 // CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] 650 // CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 651 // CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] 652 // CHECK5-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX14]]) #[[ATTR8]] 653 // CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] 654 // CHECK5-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 655 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 656 // CHECK5: omp.body.continue: 657 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 658 // CHECK5: omp.inner.for.inc: 659 // CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 660 // CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 661 // CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] 662 // CHECK5-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 663 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] 664 // CHECK5: omp.inner.for.end: 665 // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 666 // CHECK5: omp.loop.exit: 667 // CHECK5-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 668 // CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 669 // CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]]) 670 // CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 671 // CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 672 // CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 673 // CHECK5: .omp.lastprivate.then: 674 // CHECK5-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* 675 // CHECK5-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* 676 // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) 677 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 678 // CHECK5: .omp.lastprivate.done: 679 // CHECK5-NEXT: br label [[OMP_PRECOND_END]] 680 // CHECK5: omp.precond.end: 681 // CHECK5-NEXT: ret void 682 // 683