1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // expected-no-diagnostics 3 #ifndef HEADER 4 #define HEADER 5 // Test host codegen. 6 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 7 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 8 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 9 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3 10 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK4 11 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 12 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK5 13 14 // RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 15 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 16 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 17 // RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 18 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 19 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 20 21 // Test target codegen - host bc file has to be created first. (no significant differences with host version of target region) 22 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc 23 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK10 24 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s 25 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK11 26 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc 27 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK12 28 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s 29 // RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK13 30 31 // RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc 32 // RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 33 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s 34 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 35 // RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc 36 // RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 37 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s 38 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 39 40 #ifdef CK1 41 42 43 int target_teams_fun(int *g){ 44 int n = 1000; 45 int a[1000]; 46 int te = n / 128; 47 int th = 128; 48 // discard n_addr 49 // discard capture expressions for te and th 50 51 #pragma omp target teams distribute parallel for num_teams(te), thread_limit(th) 52 for(int i = 0; i < n; i++) { 53 a[i] = 0; 54 #pragma omp cancel for 55 } 56 57 {{{ 58 #pragma omp target teams distribute parallel for is_device_ptr(g) 59 for(int i = 0; i < n; i++) { 60 a[i] = g[0]; 61 } 62 }}} 63 64 // outlined target regions 65 66 67 68 69 return a[0]; 70 } 71 72 #endif // CK1 73 #endif // HEADER 74 // CHECK1-LABEL: define {{[^@]+}}@_Z16target_teams_funPi 75 // CHECK1-SAME: (i32* noundef [[G:%.*]]) #[[ATTR0:[0-9]+]] { 76 // CHECK1-NEXT: entry: 77 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 78 // CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 79 // CHECK1-NEXT: [[A:%.*]] = alloca [1000 x i32], align 4 80 // CHECK1-NEXT: [[TE:%.*]] = alloca i32, align 4 81 // CHECK1-NEXT: [[TH:%.*]] = alloca i32, align 4 82 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 83 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 84 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 85 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 86 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED3:%.*]] = alloca i64, align 8 87 // CHECK1-NEXT: [[N_CASTED5:%.*]] = alloca i64, align 8 88 // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 89 // CHECK1-NEXT: store i32 1000, i32* [[N]], align 4 90 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N]], align 4 91 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], 128 92 // CHECK1-NEXT: store i32 [[DIV]], i32* [[TE]], align 4 93 // CHECK1-NEXT: store i32 128, i32* [[TH]], align 4 94 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TE]], align 4 95 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 96 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TH]], align 4 97 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 98 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[N]], align 4 99 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_CASTED]] to i32* 100 // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4 101 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 102 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 103 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* 104 // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV2]], align 4 105 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 106 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 107 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED3]] to i32* 108 // CHECK1-NEXT: store i32 [[TMP7]], i32* [[CONV4]], align 4 109 // CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED3]], align 8 110 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51(i64 [[TMP4]], [1000 x i32]* [[A]], i64 [[TMP6]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]] 111 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 112 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED5]] to i32* 113 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[CONV6]], align 4 114 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[N_CASTED5]], align 8 115 // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[G_ADDR]], align 8 116 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58(i64 [[TMP10]], [1000 x i32]* [[A]], i32* [[TMP11]]) #[[ATTR2]] 117 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[A]], i64 0, i64 0 118 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 119 // CHECK1-NEXT: ret i32 [[TMP12]] 120 // 121 // 122 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 123 // CHECK1-SAME: (i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1:[0-9]+]] { 124 // CHECK1-NEXT: entry: 125 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 126 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 127 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 128 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 129 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 130 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) 131 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 132 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 133 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 134 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 135 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 136 // CHECK1-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 137 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* 138 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* 139 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 140 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 141 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) 142 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 143 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* 144 // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 145 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 146 // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP5]], [1000 x i32]* [[TMP1]]) 147 // CHECK1-NEXT: ret void 148 // 149 // 150 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. 151 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { 152 // CHECK1-NEXT: entry: 153 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 154 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 155 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 156 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 157 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 158 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 159 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 160 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 161 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 162 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 163 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 164 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 165 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 166 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 167 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 168 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 169 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 170 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 171 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 172 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 173 // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 174 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 175 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 176 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 177 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 178 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 179 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 180 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 181 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 182 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 183 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 184 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 185 // CHECK1: omp.precond.then: 186 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 187 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 188 // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 189 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 190 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 191 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 192 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 193 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 194 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 195 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 196 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 197 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 198 // CHECK1: cond.true: 199 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 200 // CHECK1-NEXT: br label [[COND_END:%.*]] 201 // CHECK1: cond.false: 202 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 203 // CHECK1-NEXT: br label [[COND_END]] 204 // CHECK1: cond.end: 205 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 206 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 207 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 208 // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 209 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 210 // CHECK1: omp.inner.for.cond: 211 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 212 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 213 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 214 // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 215 // CHECK1: omp.inner.for.body: 216 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 217 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 218 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 219 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 220 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 221 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* 222 // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 223 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 224 // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]]) 225 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 226 // CHECK1: omp.inner.for.inc: 227 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 228 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 229 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] 230 // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 231 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] 232 // CHECK1: omp.inner.for.end: 233 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 234 // CHECK1: omp.loop.exit: 235 // CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 236 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 237 // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) 238 // CHECK1-NEXT: br label [[OMP_PRECOND_END]] 239 // CHECK1: omp.precond.end: 240 // CHECK1-NEXT: ret void 241 // 242 // 243 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 244 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { 245 // CHECK1-NEXT: entry: 246 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 247 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 248 // CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 249 // CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 250 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 251 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 252 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 253 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 254 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 255 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 256 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 257 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 258 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 259 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 260 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 261 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 262 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 263 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 264 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 265 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 266 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 267 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 268 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 269 // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 270 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 271 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 272 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 273 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 274 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 275 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 276 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 277 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 278 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 279 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 280 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 281 // CHECK1: omp.precond.then: 282 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 283 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 284 // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 285 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 286 // CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 287 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 288 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 289 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 290 // CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 291 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 292 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 293 // CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 294 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 295 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 296 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 297 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 298 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 299 // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 300 // CHECK1: cond.true: 301 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 302 // CHECK1-NEXT: br label [[COND_END:%.*]] 303 // CHECK1: cond.false: 304 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 305 // CHECK1-NEXT: br label [[COND_END]] 306 // CHECK1: cond.end: 307 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 308 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 309 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 310 // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 311 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 312 // CHECK1: omp.inner.for.cond: 313 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 314 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 315 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 316 // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 317 // CHECK1: omp.inner.for.body: 318 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 319 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 320 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 321 // CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 322 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I5]], align 4 323 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 324 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] 325 // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 326 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 327 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 328 // CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) 329 // CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 330 // CHECK1-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] 331 // CHECK1: .cancel.exit: 332 // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] 333 // CHECK1: .cancel.continue: 334 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 335 // CHECK1: omp.body.continue: 336 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 337 // CHECK1: omp.inner.for.inc: 338 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 339 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 340 // CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 341 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] 342 // CHECK1: omp.inner.for.end: 343 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 344 // CHECK1: omp.loop.exit: 345 // CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 346 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 347 // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 348 // CHECK1-NEXT: br label [[OMP_PRECOND_END]] 349 // CHECK1: cancel.exit: 350 // CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 351 // CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 352 // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) 353 // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] 354 // CHECK1: omp.precond.end: 355 // CHECK1-NEXT: br label [[CANCEL_CONT]] 356 // CHECK1: cancel.cont: 357 // CHECK1-NEXT: ret void 358 // 359 // 360 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58 361 // CHECK1-SAME: (i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 362 // CHECK1-NEXT: entry: 363 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 364 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 365 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 366 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 367 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 368 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 369 // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 370 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 371 // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 372 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 373 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* 374 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 375 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 376 // CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 8 377 // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) 378 // CHECK1-NEXT: ret void 379 // 380 // 381 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 382 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 383 // CHECK1-NEXT: entry: 384 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 385 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 386 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 387 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 388 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 389 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 390 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 391 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 392 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 393 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 394 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 395 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 396 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 397 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 398 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 399 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 400 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 401 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 402 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 403 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 404 // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 405 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 406 // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 407 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 408 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 409 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 410 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 411 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 412 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 413 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 414 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 415 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 416 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 417 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 418 // CHECK1: omp.precond.then: 419 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 420 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 421 // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 422 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 423 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 424 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 425 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 426 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 427 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 428 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 429 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 430 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 431 // CHECK1: cond.true: 432 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 433 // CHECK1-NEXT: br label [[COND_END:%.*]] 434 // CHECK1: cond.false: 435 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 436 // CHECK1-NEXT: br label [[COND_END]] 437 // CHECK1: cond.end: 438 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 439 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 440 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 441 // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 442 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 443 // CHECK1: omp.inner.for.cond: 444 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 445 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 446 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 447 // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 448 // CHECK1: omp.inner.for.body: 449 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 450 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 451 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 452 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 453 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 454 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* 455 // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 456 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 457 // CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 458 // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) 459 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 460 // CHECK1: omp.inner.for.inc: 461 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 462 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 463 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] 464 // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 465 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] 466 // CHECK1: omp.inner.for.end: 467 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 468 // CHECK1: omp.loop.exit: 469 // CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 470 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 471 // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 472 // CHECK1-NEXT: br label [[OMP_PRECOND_END]] 473 // CHECK1: omp.precond.end: 474 // CHECK1-NEXT: ret void 475 // 476 // 477 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 478 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 479 // CHECK1-NEXT: entry: 480 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 481 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 482 // CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 483 // CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 484 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 485 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 486 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 487 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 488 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 489 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 490 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 491 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 492 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 493 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 494 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 495 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 496 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 497 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 498 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 499 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 500 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 501 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 502 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 503 // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 504 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 505 // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 506 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 507 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 508 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 509 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 510 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 511 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 512 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 513 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 514 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 515 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 516 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 517 // CHECK1: omp.precond.then: 518 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 519 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 520 // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 521 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 522 // CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 523 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 524 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 525 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 526 // CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 527 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 528 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 529 // CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 530 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 531 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 532 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 533 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 534 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 535 // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 536 // CHECK1: cond.true: 537 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 538 // CHECK1-NEXT: br label [[COND_END:%.*]] 539 // CHECK1: cond.false: 540 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 541 // CHECK1-NEXT: br label [[COND_END]] 542 // CHECK1: cond.end: 543 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 544 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 545 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 546 // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 547 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 548 // CHECK1: omp.inner.for.cond: 549 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 550 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 551 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 552 // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 553 // CHECK1: omp.inner.for.body: 554 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 555 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 556 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 557 // CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 558 // CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 559 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 560 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 561 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 562 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 563 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] 564 // CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 565 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 566 // CHECK1: omp.body.continue: 567 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 568 // CHECK1: omp.inner.for.inc: 569 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 570 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 571 // CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 572 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] 573 // CHECK1: omp.inner.for.end: 574 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 575 // CHECK1: omp.loop.exit: 576 // CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 577 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 578 // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) 579 // CHECK1-NEXT: br label [[OMP_PRECOND_END]] 580 // CHECK1: omp.precond.end: 581 // CHECK1-NEXT: ret void 582 // 583 // 584 // CHECK2-LABEL: define {{[^@]+}}@_Z16target_teams_funPi 585 // CHECK2-SAME: (i32* noundef [[G:%.*]]) #[[ATTR0:[0-9]+]] { 586 // CHECK2-NEXT: entry: 587 // CHECK2-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 588 // CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 589 // CHECK2-NEXT: [[A:%.*]] = alloca [1000 x i32], align 4 590 // CHECK2-NEXT: [[TE:%.*]] = alloca i32, align 4 591 // CHECK2-NEXT: [[TH:%.*]] = alloca i32, align 4 592 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 593 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 594 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 595 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 596 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED3:%.*]] = alloca i64, align 8 597 // CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 598 // CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 599 // CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 600 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 601 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 602 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 603 // CHECK2-NEXT: [[N_CASTED9:%.*]] = alloca i64, align 8 604 // CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [3 x i8*], align 8 605 // CHECK2-NEXT: [[DOTOFFLOAD_PTRS12:%.*]] = alloca [3 x i8*], align 8 606 // CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [3 x i8*], align 8 607 // CHECK2-NEXT: [[_TMP14:%.*]] = alloca i32, align 4 608 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_15:%.*]] = alloca i32, align 4 609 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_16:%.*]] = alloca i32, align 4 610 // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 611 // CHECK2-NEXT: store i32 1000, i32* [[N]], align 4 612 // CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[N]], align 4 613 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], 128 614 // CHECK2-NEXT: store i32 [[DIV]], i32* [[TE]], align 4 615 // CHECK2-NEXT: store i32 128, i32* [[TH]], align 4 616 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TE]], align 4 617 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 618 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TH]], align 4 619 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 620 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N]], align 4 621 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_CASTED]] to i32* 622 // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4 623 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 624 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 625 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* 626 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV2]], align 4 627 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 628 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 629 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED3]] to i32* 630 // CHECK2-NEXT: store i32 [[TMP7]], i32* [[CONV4]], align 4 631 // CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED3]], align 8 632 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 633 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64* 634 // CHECK2-NEXT: store i64 [[TMP4]], i64* [[TMP10]], align 8 635 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 636 // CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64* 637 // CHECK2-NEXT: store i64 [[TMP4]], i64* [[TMP12]], align 8 638 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 639 // CHECK2-NEXT: store i8* null, i8** [[TMP13]], align 8 640 // CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 641 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [1000 x i32]** 642 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP15]], align 8 643 // CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 644 // CHECK2-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [1000 x i32]** 645 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP17]], align 8 646 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 647 // CHECK2-NEXT: store i8* null, i8** [[TMP18]], align 8 648 // CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 649 // CHECK2-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i64* 650 // CHECK2-NEXT: store i64 [[TMP6]], i64* [[TMP20]], align 8 651 // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 652 // CHECK2-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64* 653 // CHECK2-NEXT: store i64 [[TMP6]], i64* [[TMP22]], align 8 654 // CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 655 // CHECK2-NEXT: store i8* null, i8** [[TMP23]], align 8 656 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 657 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64* 658 // CHECK2-NEXT: store i64 [[TMP8]], i64* [[TMP25]], align 8 659 // CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 660 // CHECK2-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64* 661 // CHECK2-NEXT: store i64 [[TMP8]], i64* [[TMP27]], align 8 662 // CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 663 // CHECK2-NEXT: store i8* null, i8** [[TMP28]], align 8 664 // CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 665 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 666 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 667 // CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 668 // CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[N]], align 4 669 // CHECK2-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_5]], align 4 670 // CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 671 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0 672 // CHECK2-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB]], 1 673 // CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i32 [[DIV7]], 1 674 // CHECK2-NEXT: store i32 [[SUB8]], i32* [[DOTCAPTURE_EXPR_6]], align 4 675 // CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_6]], align 4 676 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], 1 677 // CHECK2-NEXT: [[TMP36:%.*]] = zext i32 [[ADD]] to i64 678 // CHECK2-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP36]]) 679 // CHECK2-NEXT: [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.region_id, i32 4, i8** [[TMP29]], i8** [[TMP30]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 [[TMP31]], i32 [[TMP32]]) 680 // CHECK2-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 681 // CHECK2-NEXT: br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] 682 // CHECK2: omp_offload.failed: 683 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51(i64 [[TMP4]], [1000 x i32]* [[A]], i64 [[TMP6]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]] 684 // CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]] 685 // CHECK2: omp_offload.cont: 686 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[N]], align 4 687 // CHECK2-NEXT: [[CONV10:%.*]] = bitcast i64* [[N_CASTED9]] to i32* 688 // CHECK2-NEXT: store i32 [[TMP39]], i32* [[CONV10]], align 4 689 // CHECK2-NEXT: [[TMP40:%.*]] = load i64, i64* [[N_CASTED9]], align 8 690 // CHECK2-NEXT: [[TMP41:%.*]] = load i32*, i32** [[G_ADDR]], align 8 691 // CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 692 // CHECK2-NEXT: [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64* 693 // CHECK2-NEXT: store i64 [[TMP40]], i64* [[TMP43]], align 8 694 // CHECK2-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 695 // CHECK2-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64* 696 // CHECK2-NEXT: store i64 [[TMP40]], i64* [[TMP45]], align 8 697 // CHECK2-NEXT: [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS13]], i64 0, i64 0 698 // CHECK2-NEXT: store i8* null, i8** [[TMP46]], align 8 699 // CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 1 700 // CHECK2-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP47]] to [1000 x i32]** 701 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP48]], align 8 702 // CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS12]], i32 0, i32 1 703 // CHECK2-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to [1000 x i32]** 704 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP50]], align 8 705 // CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS13]], i64 0, i64 1 706 // CHECK2-NEXT: store i8* null, i8** [[TMP51]], align 8 707 // CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 2 708 // CHECK2-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i32** 709 // CHECK2-NEXT: store i32* [[TMP41]], i32** [[TMP53]], align 8 710 // CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS12]], i32 0, i32 2 711 // CHECK2-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** 712 // CHECK2-NEXT: store i32* [[TMP41]], i32** [[TMP55]], align 8 713 // CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS13]], i64 0, i64 2 714 // CHECK2-NEXT: store i8* null, i8** [[TMP56]], align 8 715 // CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 716 // CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 717 // CHECK2-NEXT: [[TMP59:%.*]] = load i32, i32* [[N]], align 4 718 // CHECK2-NEXT: store i32 [[TMP59]], i32* [[DOTCAPTURE_EXPR_15]], align 4 719 // CHECK2-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_15]], align 4 720 // CHECK2-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP60]], 0 721 // CHECK2-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 722 // CHECK2-NEXT: [[SUB19:%.*]] = sub nsw i32 [[DIV18]], 1 723 // CHECK2-NEXT: store i32 [[SUB19]], i32* [[DOTCAPTURE_EXPR_16]], align 4 724 // CHECK2-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_16]], align 4 725 // CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP61]], 1 726 // CHECK2-NEXT: [[TMP62:%.*]] = zext i32 [[ADD20]] to i64 727 // CHECK2-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP62]]) 728 // CHECK2-NEXT: [[TMP63:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58.region_id, i32 3, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0) 729 // CHECK2-NEXT: [[TMP64:%.*]] = icmp ne i32 [[TMP63]], 0 730 // CHECK2-NEXT: br i1 [[TMP64]], label [[OMP_OFFLOAD_FAILED21:%.*]], label [[OMP_OFFLOAD_CONT22:%.*]] 731 // CHECK2: omp_offload.failed21: 732 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58(i64 [[TMP40]], [1000 x i32]* [[A]], i32* [[TMP41]]) #[[ATTR2]] 733 // CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT22]] 734 // CHECK2: omp_offload.cont22: 735 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[A]], i64 0, i64 0 736 // CHECK2-NEXT: [[TMP65:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 737 // CHECK2-NEXT: ret i32 [[TMP65]] 738 // 739 // 740 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 741 // CHECK2-SAME: (i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1:[0-9]+]] { 742 // CHECK2-NEXT: entry: 743 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 744 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 745 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 746 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 747 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 748 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) 749 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 750 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 751 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 752 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 753 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 754 // CHECK2-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 755 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* 756 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* 757 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 758 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 759 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) 760 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 761 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* 762 // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 763 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 764 // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP5]], [1000 x i32]* [[TMP1]]) 765 // CHECK2-NEXT: ret void 766 // 767 // 768 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. 769 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { 770 // CHECK2-NEXT: entry: 771 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 772 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 773 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 774 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 775 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 776 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 777 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 778 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 779 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 780 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 781 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 782 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 783 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 784 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 785 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 786 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 787 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 788 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 789 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 790 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 791 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 792 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 793 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 794 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 795 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 796 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 797 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 798 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 799 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 800 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 801 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 802 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 803 // CHECK2: omp.precond.then: 804 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 805 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 806 // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 807 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 808 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 809 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 810 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 811 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 812 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 813 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 814 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 815 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 816 // CHECK2: cond.true: 817 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 818 // CHECK2-NEXT: br label [[COND_END:%.*]] 819 // CHECK2: cond.false: 820 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 821 // CHECK2-NEXT: br label [[COND_END]] 822 // CHECK2: cond.end: 823 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 824 // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 825 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 826 // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 827 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 828 // CHECK2: omp.inner.for.cond: 829 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 830 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 831 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 832 // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 833 // CHECK2: omp.inner.for.body: 834 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 835 // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 836 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 837 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 838 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 839 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* 840 // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 841 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 842 // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]]) 843 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 844 // CHECK2: omp.inner.for.inc: 845 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 846 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 847 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] 848 // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 849 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] 850 // CHECK2: omp.inner.for.end: 851 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 852 // CHECK2: omp.loop.exit: 853 // CHECK2-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 854 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 855 // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) 856 // CHECK2-NEXT: br label [[OMP_PRECOND_END]] 857 // CHECK2: omp.precond.end: 858 // CHECK2-NEXT: ret void 859 // 860 // 861 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 862 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { 863 // CHECK2-NEXT: entry: 864 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 865 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 866 // CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 867 // CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 868 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 869 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 870 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 871 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 872 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 873 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 874 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 875 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 876 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 877 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 878 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 879 // CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 880 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 881 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 882 // CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 883 // CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 884 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 885 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 886 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 887 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 888 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 889 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 890 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 891 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 892 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 893 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 894 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 895 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 896 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 897 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 898 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 899 // CHECK2: omp.precond.then: 900 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 901 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 902 // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 903 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 904 // CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 905 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 906 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 907 // CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 908 // CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 909 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 910 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 911 // CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 912 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 913 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 914 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 915 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 916 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 917 // CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 918 // CHECK2: cond.true: 919 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 920 // CHECK2-NEXT: br label [[COND_END:%.*]] 921 // CHECK2: cond.false: 922 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 923 // CHECK2-NEXT: br label [[COND_END]] 924 // CHECK2: cond.end: 925 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 926 // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 927 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 928 // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 929 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 930 // CHECK2: omp.inner.for.cond: 931 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 932 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 933 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 934 // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 935 // CHECK2: omp.inner.for.body: 936 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 937 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 938 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 939 // CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 940 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I5]], align 4 941 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 942 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] 943 // CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 944 // CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 945 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 946 // CHECK2-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) 947 // CHECK2-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 948 // CHECK2-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] 949 // CHECK2: .cancel.exit: 950 // CHECK2-NEXT: br label [[CANCEL_EXIT:%.*]] 951 // CHECK2: .cancel.continue: 952 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 953 // CHECK2: omp.body.continue: 954 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 955 // CHECK2: omp.inner.for.inc: 956 // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 957 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 958 // CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 959 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] 960 // CHECK2: omp.inner.for.end: 961 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 962 // CHECK2: omp.loop.exit: 963 // CHECK2-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 964 // CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 965 // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 966 // CHECK2-NEXT: br label [[OMP_PRECOND_END]] 967 // CHECK2: cancel.exit: 968 // CHECK2-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 969 // CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 970 // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) 971 // CHECK2-NEXT: br label [[CANCEL_CONT:%.*]] 972 // CHECK2: omp.precond.end: 973 // CHECK2-NEXT: br label [[CANCEL_CONT]] 974 // CHECK2: cancel.cont: 975 // CHECK2-NEXT: ret void 976 // 977 // 978 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58 979 // CHECK2-SAME: (i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 980 // CHECK2-NEXT: entry: 981 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 982 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 983 // CHECK2-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 984 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 985 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 986 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 987 // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 988 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 989 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 990 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 991 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* 992 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 993 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 994 // CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 8 995 // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) 996 // CHECK2-NEXT: ret void 997 // 998 // 999 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 1000 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 1001 // CHECK2-NEXT: entry: 1002 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 1003 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 1004 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 1005 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 1006 // CHECK2-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 1007 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 1008 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 1009 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 1010 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 1011 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 1012 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 1013 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 1014 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 1015 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 1016 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 1017 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 1018 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 1019 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 1020 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 1021 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 1022 // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 1023 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 1024 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 1025 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 1026 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 1027 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1028 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 1029 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 1030 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 1031 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 1032 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 1033 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1034 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 1035 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 1036 // CHECK2: omp.precond.then: 1037 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 1038 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1039 // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 1040 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 1041 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 1042 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1043 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 1044 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 1045 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 1046 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1047 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 1048 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 1049 // CHECK2: cond.true: 1050 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1051 // CHECK2-NEXT: br label [[COND_END:%.*]] 1052 // CHECK2: cond.false: 1053 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 1054 // CHECK2-NEXT: br label [[COND_END]] 1055 // CHECK2: cond.end: 1056 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 1057 // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 1058 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 1059 // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 1060 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1061 // CHECK2: omp.inner.for.cond: 1062 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1063 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 1064 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 1065 // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 1066 // CHECK2: omp.inner.for.body: 1067 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 1068 // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 1069 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 1070 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 1071 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 1072 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* 1073 // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 1074 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 1075 // CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 1076 // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) 1077 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1078 // CHECK2: omp.inner.for.inc: 1079 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1080 // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 1081 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] 1082 // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 1083 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] 1084 // CHECK2: omp.inner.for.end: 1085 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 1086 // CHECK2: omp.loop.exit: 1087 // CHECK2-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1088 // CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 1089 // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 1090 // CHECK2-NEXT: br label [[OMP_PRECOND_END]] 1091 // CHECK2: omp.precond.end: 1092 // CHECK2-NEXT: ret void 1093 // 1094 // 1095 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 1096 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 1097 // CHECK2-NEXT: entry: 1098 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 1099 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 1100 // CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 1101 // CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 1102 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 1103 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 1104 // CHECK2-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 1105 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 1106 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 1107 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 1108 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 1109 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 1110 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 1111 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 1112 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 1113 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 1114 // CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 1115 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 1116 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 1117 // CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 1118 // CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 1119 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 1120 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 1121 // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 1122 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 1123 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 1124 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 1125 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 1126 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1127 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 1128 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 1129 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 1130 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 1131 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 1132 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1133 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 1134 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 1135 // CHECK2: omp.precond.then: 1136 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 1137 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1138 // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 1139 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 1140 // CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 1141 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 1142 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 1143 // CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 1144 // CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 1145 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 1146 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 1147 // CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1148 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 1149 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 1150 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 1151 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1152 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 1153 // CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 1154 // CHECK2: cond.true: 1155 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1156 // CHECK2-NEXT: br label [[COND_END:%.*]] 1157 // CHECK2: cond.false: 1158 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 1159 // CHECK2-NEXT: br label [[COND_END]] 1160 // CHECK2: cond.end: 1161 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 1162 // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 1163 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 1164 // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 1165 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1166 // CHECK2: omp.inner.for.cond: 1167 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1168 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 1169 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 1170 // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 1171 // CHECK2: omp.inner.for.body: 1172 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1173 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 1174 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 1175 // CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 1176 // CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 1177 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 1178 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1179 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 1180 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 1181 // CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] 1182 // CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 1183 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 1184 // CHECK2: omp.body.continue: 1185 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1186 // CHECK2: omp.inner.for.inc: 1187 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1188 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 1189 // CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 1190 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] 1191 // CHECK2: omp.inner.for.end: 1192 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 1193 // CHECK2: omp.loop.exit: 1194 // CHECK2-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1195 // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 1196 // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) 1197 // CHECK2-NEXT: br label [[OMP_PRECOND_END]] 1198 // CHECK2: omp.precond.end: 1199 // CHECK2-NEXT: ret void 1200 // 1201 // 1202 // CHECK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg 1203 // CHECK2-SAME: () #[[ATTR3:[0-9]+]] { 1204 // CHECK2-NEXT: entry: 1205 // CHECK2-NEXT: call void @__tgt_register_requires(i64 1) 1206 // CHECK2-NEXT: ret void 1207 // 1208 // 1209 // CHECK3-LABEL: define {{[^@]+}}@_Z16target_teams_funPi 1210 // CHECK3-SAME: (i32* noundef [[G:%.*]]) #[[ATTR0:[0-9]+]] { 1211 // CHECK3-NEXT: entry: 1212 // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 1213 // CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 1214 // CHECK3-NEXT: [[A:%.*]] = alloca [1000 x i32], align 4 1215 // CHECK3-NEXT: [[TE:%.*]] = alloca i32, align 4 1216 // CHECK3-NEXT: [[TH:%.*]] = alloca i32, align 4 1217 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 1218 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 1219 // CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 1220 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 1221 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED3:%.*]] = alloca i64, align 8 1222 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 1223 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 1224 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 1225 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 1226 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 1227 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 1228 // CHECK3-NEXT: [[N_CASTED9:%.*]] = alloca i64, align 8 1229 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [3 x i8*], align 8 1230 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS12:%.*]] = alloca [3 x i8*], align 8 1231 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [3 x i8*], align 8 1232 // CHECK3-NEXT: [[_TMP14:%.*]] = alloca i32, align 4 1233 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_15:%.*]] = alloca i32, align 4 1234 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_16:%.*]] = alloca i32, align 4 1235 // CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 1236 // CHECK3-NEXT: store i32 1000, i32* [[N]], align 4 1237 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[N]], align 4 1238 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], 128 1239 // CHECK3-NEXT: store i32 [[DIV]], i32* [[TE]], align 4 1240 // CHECK3-NEXT: store i32 128, i32* [[TH]], align 4 1241 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TE]], align 4 1242 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 1243 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TH]], align 4 1244 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 1245 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N]], align 4 1246 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_CASTED]] to i32* 1247 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4 1248 // CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 1249 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1250 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* 1251 // CHECK3-NEXT: store i32 [[TMP5]], i32* [[CONV2]], align 4 1252 // CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 1253 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1254 // CHECK3-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED3]] to i32* 1255 // CHECK3-NEXT: store i32 [[TMP7]], i32* [[CONV4]], align 4 1256 // CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED3]], align 8 1257 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 1258 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64* 1259 // CHECK3-NEXT: store i64 [[TMP4]], i64* [[TMP10]], align 8 1260 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 1261 // CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64* 1262 // CHECK3-NEXT: store i64 [[TMP4]], i64* [[TMP12]], align 8 1263 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 1264 // CHECK3-NEXT: store i8* null, i8** [[TMP13]], align 8 1265 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 1266 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [1000 x i32]** 1267 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP15]], align 8 1268 // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 1269 // CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [1000 x i32]** 1270 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP17]], align 8 1271 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 1272 // CHECK3-NEXT: store i8* null, i8** [[TMP18]], align 8 1273 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 1274 // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i64* 1275 // CHECK3-NEXT: store i64 [[TMP6]], i64* [[TMP20]], align 8 1276 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 1277 // CHECK3-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64* 1278 // CHECK3-NEXT: store i64 [[TMP6]], i64* [[TMP22]], align 8 1279 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 1280 // CHECK3-NEXT: store i8* null, i8** [[TMP23]], align 8 1281 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 1282 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64* 1283 // CHECK3-NEXT: store i64 [[TMP8]], i64* [[TMP25]], align 8 1284 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 1285 // CHECK3-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64* 1286 // CHECK3-NEXT: store i64 [[TMP8]], i64* [[TMP27]], align 8 1287 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 1288 // CHECK3-NEXT: store i8* null, i8** [[TMP28]], align 8 1289 // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 1290 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 1291 // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1292 // CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1293 // CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[N]], align 4 1294 // CHECK3-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_5]], align 4 1295 // CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 1296 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0 1297 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB]], 1 1298 // CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[DIV7]], 1 1299 // CHECK3-NEXT: store i32 [[SUB8]], i32* [[DOTCAPTURE_EXPR_6]], align 4 1300 // CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_6]], align 4 1301 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], 1 1302 // CHECK3-NEXT: [[TMP36:%.*]] = zext i32 [[ADD]] to i64 1303 // CHECK3-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP36]]) 1304 // CHECK3-NEXT: [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.region_id, i32 4, i8** [[TMP29]], i8** [[TMP30]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 [[TMP31]], i32 [[TMP32]]) 1305 // CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 1306 // CHECK3-NEXT: br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] 1307 // CHECK3: omp_offload.failed: 1308 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51(i64 [[TMP4]], [1000 x i32]* [[A]], i64 [[TMP6]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]] 1309 // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] 1310 // CHECK3: omp_offload.cont: 1311 // CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[N]], align 4 1312 // CHECK3-NEXT: [[CONV10:%.*]] = bitcast i64* [[N_CASTED9]] to i32* 1313 // CHECK3-NEXT: store i32 [[TMP39]], i32* [[CONV10]], align 4 1314 // CHECK3-NEXT: [[TMP40:%.*]] = load i64, i64* [[N_CASTED9]], align 8 1315 // CHECK3-NEXT: [[TMP41:%.*]] = load i32*, i32** [[G_ADDR]], align 8 1316 // CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 1317 // CHECK3-NEXT: [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64* 1318 // CHECK3-NEXT: store i64 [[TMP40]], i64* [[TMP43]], align 8 1319 // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 1320 // CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64* 1321 // CHECK3-NEXT: store i64 [[TMP40]], i64* [[TMP45]], align 8 1322 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS13]], i64 0, i64 0 1323 // CHECK3-NEXT: store i8* null, i8** [[TMP46]], align 8 1324 // CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 1 1325 // CHECK3-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP47]] to [1000 x i32]** 1326 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP48]], align 8 1327 // CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS12]], i32 0, i32 1 1328 // CHECK3-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to [1000 x i32]** 1329 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP50]], align 8 1330 // CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS13]], i64 0, i64 1 1331 // CHECK3-NEXT: store i8* null, i8** [[TMP51]], align 8 1332 // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 2 1333 // CHECK3-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i32** 1334 // CHECK3-NEXT: store i32* [[TMP41]], i32** [[TMP53]], align 8 1335 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS12]], i32 0, i32 2 1336 // CHECK3-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** 1337 // CHECK3-NEXT: store i32* [[TMP41]], i32** [[TMP55]], align 8 1338 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS13]], i64 0, i64 2 1339 // CHECK3-NEXT: store i8* null, i8** [[TMP56]], align 8 1340 // CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 1341 // CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 1342 // CHECK3-NEXT: [[TMP59:%.*]] = load i32, i32* [[N]], align 4 1343 // CHECK3-NEXT: store i32 [[TMP59]], i32* [[DOTCAPTURE_EXPR_15]], align 4 1344 // CHECK3-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_15]], align 4 1345 // CHECK3-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP60]], 0 1346 // CHECK3-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 1347 // CHECK3-NEXT: [[SUB19:%.*]] = sub nsw i32 [[DIV18]], 1 1348 // CHECK3-NEXT: store i32 [[SUB19]], i32* [[DOTCAPTURE_EXPR_16]], align 4 1349 // CHECK3-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_16]], align 4 1350 // CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP61]], 1 1351 // CHECK3-NEXT: [[TMP62:%.*]] = zext i32 [[ADD20]] to i64 1352 // CHECK3-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP62]]) 1353 // CHECK3-NEXT: [[TMP63:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58.region_id, i32 3, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0) 1354 // CHECK3-NEXT: [[TMP64:%.*]] = icmp ne i32 [[TMP63]], 0 1355 // CHECK3-NEXT: br i1 [[TMP64]], label [[OMP_OFFLOAD_FAILED21:%.*]], label [[OMP_OFFLOAD_CONT22:%.*]] 1356 // CHECK3: omp_offload.failed21: 1357 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58(i64 [[TMP40]], [1000 x i32]* [[A]], i32* [[TMP41]]) #[[ATTR2]] 1358 // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT22]] 1359 // CHECK3: omp_offload.cont22: 1360 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[A]], i64 0, i64 0 1361 // CHECK3-NEXT: [[TMP65:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1362 // CHECK3-NEXT: ret i32 [[TMP65]] 1363 // 1364 // 1365 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 1366 // CHECK3-SAME: (i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1:[0-9]+]] { 1367 // CHECK3-NEXT: entry: 1368 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 1369 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 1370 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 1371 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 1372 // CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 1373 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) 1374 // CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 1375 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 1376 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 1377 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 1378 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 1379 // CHECK3-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 1380 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* 1381 // CHECK3-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* 1382 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 1383 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 1384 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) 1385 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 1386 // CHECK3-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* 1387 // CHECK3-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 1388 // CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 1389 // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP5]], [1000 x i32]* [[TMP1]]) 1390 // CHECK3-NEXT: ret void 1391 // 1392 // 1393 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. 1394 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { 1395 // CHECK3-NEXT: entry: 1396 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 1397 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 1398 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 1399 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 1400 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 1401 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 1402 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 1403 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 1404 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 1405 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 1406 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 1407 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 1408 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 1409 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 1410 // CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 1411 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 1412 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 1413 // CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 1414 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 1415 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 1416 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 1417 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 1418 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 1419 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1420 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 1421 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 1422 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 1423 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 1424 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 1425 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1426 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 1427 // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 1428 // CHECK3: omp.precond.then: 1429 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 1430 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1431 // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 1432 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 1433 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 1434 // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1435 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 1436 // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 1437 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 1438 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1439 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 1440 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 1441 // CHECK3: cond.true: 1442 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1443 // CHECK3-NEXT: br label [[COND_END:%.*]] 1444 // CHECK3: cond.false: 1445 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 1446 // CHECK3-NEXT: br label [[COND_END]] 1447 // CHECK3: cond.end: 1448 // CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 1449 // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 1450 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 1451 // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 1452 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1453 // CHECK3: omp.inner.for.cond: 1454 // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1455 // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 1456 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 1457 // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 1458 // CHECK3: omp.inner.for.body: 1459 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 1460 // CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 1461 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 1462 // CHECK3-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 1463 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 1464 // CHECK3-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* 1465 // CHECK3-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 1466 // CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 1467 // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]]) 1468 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1469 // CHECK3: omp.inner.for.inc: 1470 // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1471 // CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 1472 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] 1473 // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 1474 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] 1475 // CHECK3: omp.inner.for.end: 1476 // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 1477 // CHECK3: omp.loop.exit: 1478 // CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1479 // CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 1480 // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) 1481 // CHECK3-NEXT: br label [[OMP_PRECOND_END]] 1482 // CHECK3: omp.precond.end: 1483 // CHECK3-NEXT: ret void 1484 // 1485 // 1486 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 1487 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { 1488 // CHECK3-NEXT: entry: 1489 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 1490 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 1491 // CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 1492 // CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 1493 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 1494 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 1495 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 1496 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 1497 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 1498 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 1499 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 1500 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 1501 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 1502 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 1503 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 1504 // CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 1505 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 1506 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 1507 // CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 1508 // CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 1509 // CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 1510 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 1511 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 1512 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 1513 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 1514 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 1515 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1516 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 1517 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 1518 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 1519 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 1520 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 1521 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1522 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 1523 // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 1524 // CHECK3: omp.precond.then: 1525 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 1526 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1527 // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 1528 // CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 1529 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 1530 // CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 1531 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 1532 // CHECK3-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 1533 // CHECK3-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 1534 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 1535 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 1536 // CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1537 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 1538 // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 1539 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 1540 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1541 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 1542 // CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 1543 // CHECK3: cond.true: 1544 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1545 // CHECK3-NEXT: br label [[COND_END:%.*]] 1546 // CHECK3: cond.false: 1547 // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 1548 // CHECK3-NEXT: br label [[COND_END]] 1549 // CHECK3: cond.end: 1550 // CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 1551 // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 1552 // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 1553 // CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 1554 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1555 // CHECK3: omp.inner.for.cond: 1556 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1557 // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 1558 // CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 1559 // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 1560 // CHECK3: omp.inner.for.body: 1561 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1562 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 1563 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 1564 // CHECK3-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 1565 // CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I5]], align 4 1566 // CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 1567 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] 1568 // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 1569 // CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1570 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 1571 // CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) 1572 // CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 1573 // CHECK3-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] 1574 // CHECK3: .cancel.exit: 1575 // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] 1576 // CHECK3: .cancel.continue: 1577 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 1578 // CHECK3: omp.body.continue: 1579 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1580 // CHECK3: omp.inner.for.inc: 1581 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1582 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 1583 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 1584 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] 1585 // CHECK3: omp.inner.for.end: 1586 // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 1587 // CHECK3: omp.loop.exit: 1588 // CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1589 // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 1590 // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 1591 // CHECK3-NEXT: br label [[OMP_PRECOND_END]] 1592 // CHECK3: cancel.exit: 1593 // CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1594 // CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 1595 // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) 1596 // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] 1597 // CHECK3: omp.precond.end: 1598 // CHECK3-NEXT: br label [[CANCEL_CONT]] 1599 // CHECK3: cancel.cont: 1600 // CHECK3-NEXT: ret void 1601 // 1602 // 1603 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58 1604 // CHECK3-SAME: (i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 1605 // CHECK3-NEXT: entry: 1606 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 1607 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 1608 // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 1609 // CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 1610 // CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 1611 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 1612 // CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 1613 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 1614 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 1615 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 1616 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* 1617 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 1618 // CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 1619 // CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 8 1620 // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) 1621 // CHECK3-NEXT: ret void 1622 // 1623 // 1624 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 1625 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 1626 // CHECK3-NEXT: entry: 1627 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 1628 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 1629 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 1630 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 1631 // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 1632 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 1633 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 1634 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 1635 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 1636 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 1637 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 1638 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 1639 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 1640 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 1641 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 1642 // CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 1643 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 1644 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 1645 // CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 1646 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 1647 // CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 1648 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 1649 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 1650 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 1651 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 1652 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1653 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 1654 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 1655 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 1656 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 1657 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 1658 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1659 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 1660 // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 1661 // CHECK3: omp.precond.then: 1662 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 1663 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1664 // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 1665 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 1666 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 1667 // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1668 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 1669 // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 1670 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 1671 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1672 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 1673 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 1674 // CHECK3: cond.true: 1675 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1676 // CHECK3-NEXT: br label [[COND_END:%.*]] 1677 // CHECK3: cond.false: 1678 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 1679 // CHECK3-NEXT: br label [[COND_END]] 1680 // CHECK3: cond.end: 1681 // CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 1682 // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 1683 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 1684 // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 1685 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1686 // CHECK3: omp.inner.for.cond: 1687 // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1688 // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 1689 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 1690 // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 1691 // CHECK3: omp.inner.for.body: 1692 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 1693 // CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 1694 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 1695 // CHECK3-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 1696 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 1697 // CHECK3-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* 1698 // CHECK3-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 1699 // CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 1700 // CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 1701 // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) 1702 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1703 // CHECK3: omp.inner.for.inc: 1704 // CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1705 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 1706 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] 1707 // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 1708 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] 1709 // CHECK3: omp.inner.for.end: 1710 // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 1711 // CHECK3: omp.loop.exit: 1712 // CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1713 // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 1714 // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 1715 // CHECK3-NEXT: br label [[OMP_PRECOND_END]] 1716 // CHECK3: omp.precond.end: 1717 // CHECK3-NEXT: ret void 1718 // 1719 // 1720 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 1721 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 1722 // CHECK3-NEXT: entry: 1723 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 1724 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 1725 // CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 1726 // CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 1727 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 1728 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 1729 // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 1730 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 1731 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 1732 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 1733 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 1734 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 1735 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 1736 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 1737 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 1738 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 1739 // CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 1740 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 1741 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 1742 // CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 1743 // CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 1744 // CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 1745 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 1746 // CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 1747 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 1748 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 1749 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 1750 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 1751 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1752 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 1753 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 1754 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 1755 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 1756 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 1757 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1758 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 1759 // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 1760 // CHECK3: omp.precond.then: 1761 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 1762 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1763 // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 1764 // CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 1765 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 1766 // CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 1767 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 1768 // CHECK3-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 1769 // CHECK3-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 1770 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 1771 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 1772 // CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1773 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 1774 // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 1775 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 1776 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1777 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 1778 // CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 1779 // CHECK3: cond.true: 1780 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1781 // CHECK3-NEXT: br label [[COND_END:%.*]] 1782 // CHECK3: cond.false: 1783 // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 1784 // CHECK3-NEXT: br label [[COND_END]] 1785 // CHECK3: cond.end: 1786 // CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 1787 // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 1788 // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 1789 // CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 1790 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1791 // CHECK3: omp.inner.for.cond: 1792 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1793 // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 1794 // CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 1795 // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 1796 // CHECK3: omp.inner.for.body: 1797 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1798 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 1799 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 1800 // CHECK3-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 1801 // CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 1802 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 1803 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1804 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 1805 // CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 1806 // CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] 1807 // CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 1808 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 1809 // CHECK3: omp.body.continue: 1810 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1811 // CHECK3: omp.inner.for.inc: 1812 // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 1813 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 1814 // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 1815 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] 1816 // CHECK3: omp.inner.for.end: 1817 // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 1818 // CHECK3: omp.loop.exit: 1819 // CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1820 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 1821 // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) 1822 // CHECK3-NEXT: br label [[OMP_PRECOND_END]] 1823 // CHECK3: omp.precond.end: 1824 // CHECK3-NEXT: ret void 1825 // 1826 // 1827 // CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg 1828 // CHECK3-SAME: () #[[ATTR3:[0-9]+]] { 1829 // CHECK3-NEXT: entry: 1830 // CHECK3-NEXT: call void @__tgt_register_requires(i64 1) 1831 // CHECK3-NEXT: ret void 1832 // 1833 // 1834 // CHECK4-LABEL: define {{[^@]+}}@_Z16target_teams_funPi 1835 // CHECK4-SAME: (i32* noundef [[G:%.*]]) #[[ATTR0:[0-9]+]] { 1836 // CHECK4-NEXT: entry: 1837 // CHECK4-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 1838 // CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 1839 // CHECK4-NEXT: [[A:%.*]] = alloca [1000 x i32], align 4 1840 // CHECK4-NEXT: [[TE:%.*]] = alloca i32, align 4 1841 // CHECK4-NEXT: [[TH:%.*]] = alloca i32, align 4 1842 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 1843 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 1844 // CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 1845 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 1846 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i32, align 4 1847 // CHECK4-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 1848 // CHECK4-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 1849 // CHECK4-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 1850 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 1851 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 1852 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 1853 // CHECK4-NEXT: [[N_CASTED7:%.*]] = alloca i32, align 4 1854 // CHECK4-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [3 x i8*], align 4 1855 // CHECK4-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [3 x i8*], align 4 1856 // CHECK4-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [3 x i8*], align 4 1857 // CHECK4-NEXT: [[_TMP11:%.*]] = alloca i32, align 4 1858 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4 1859 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4 1860 // CHECK4-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 1861 // CHECK4-NEXT: store i32 1000, i32* [[N]], align 4 1862 // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[N]], align 4 1863 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], 128 1864 // CHECK4-NEXT: store i32 [[DIV]], i32* [[TE]], align 4 1865 // CHECK4-NEXT: store i32 128, i32* [[TH]], align 4 1866 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TE]], align 4 1867 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 1868 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TH]], align 4 1869 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 1870 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[N]], align 4 1871 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 1872 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 1873 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1874 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 1875 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 1876 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1877 // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR__CASTED2]], align 4 1878 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED2]], align 4 1879 // CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 1880 // CHECK4-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32* 1881 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[TMP10]], align 4 1882 // CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 1883 // CHECK4-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* 1884 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[TMP12]], align 4 1885 // CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 1886 // CHECK4-NEXT: store i8* null, i8** [[TMP13]], align 4 1887 // CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 1888 // CHECK4-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [1000 x i32]** 1889 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP15]], align 4 1890 // CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 1891 // CHECK4-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [1000 x i32]** 1892 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP17]], align 4 1893 // CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 1894 // CHECK4-NEXT: store i8* null, i8** [[TMP18]], align 4 1895 // CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 1896 // CHECK4-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32* 1897 // CHECK4-NEXT: store i32 [[TMP6]], i32* [[TMP20]], align 4 1898 // CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 1899 // CHECK4-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* 1900 // CHECK4-NEXT: store i32 [[TMP6]], i32* [[TMP22]], align 4 1901 // CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 1902 // CHECK4-NEXT: store i8* null, i8** [[TMP23]], align 4 1903 // CHECK4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 1904 // CHECK4-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32* 1905 // CHECK4-NEXT: store i32 [[TMP8]], i32* [[TMP25]], align 4 1906 // CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 1907 // CHECK4-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32* 1908 // CHECK4-NEXT: store i32 [[TMP8]], i32* [[TMP27]], align 4 1909 // CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 1910 // CHECK4-NEXT: store i8* null, i8** [[TMP28]], align 4 1911 // CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 1912 // CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 1913 // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 1914 // CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 1915 // CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[N]], align 4 1916 // CHECK4-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_3]], align 4 1917 // CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 1918 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0 1919 // CHECK4-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB]], 1 1920 // CHECK4-NEXT: [[SUB6:%.*]] = sub nsw i32 [[DIV5]], 1 1921 // CHECK4-NEXT: store i32 [[SUB6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 1922 // CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 1923 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], 1 1924 // CHECK4-NEXT: [[TMP36:%.*]] = zext i32 [[ADD]] to i64 1925 // CHECK4-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP36]]) 1926 // CHECK4-NEXT: [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.region_id, i32 4, i8** [[TMP29]], i8** [[TMP30]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 [[TMP31]], i32 [[TMP32]]) 1927 // CHECK4-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 1928 // CHECK4-NEXT: br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] 1929 // CHECK4: omp_offload.failed: 1930 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51(i32 [[TMP4]], [1000 x i32]* [[A]], i32 [[TMP6]], i32 [[TMP8]]) #[[ATTR2:[0-9]+]] 1931 // CHECK4-NEXT: br label [[OMP_OFFLOAD_CONT]] 1932 // CHECK4: omp_offload.cont: 1933 // CHECK4-NEXT: [[TMP39:%.*]] = load i32, i32* [[N]], align 4 1934 // CHECK4-NEXT: store i32 [[TMP39]], i32* [[N_CASTED7]], align 4 1935 // CHECK4-NEXT: [[TMP40:%.*]] = load i32, i32* [[N_CASTED7]], align 4 1936 // CHECK4-NEXT: [[TMP41:%.*]] = load i32*, i32** [[G_ADDR]], align 4 1937 // CHECK4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 1938 // CHECK4-NEXT: [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32* 1939 // CHECK4-NEXT: store i32 [[TMP40]], i32* [[TMP43]], align 4 1940 // CHECK4-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 1941 // CHECK4-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32* 1942 // CHECK4-NEXT: store i32 [[TMP40]], i32* [[TMP45]], align 4 1943 // CHECK4-NEXT: [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0 1944 // CHECK4-NEXT: store i8* null, i8** [[TMP46]], align 4 1945 // CHECK4-NEXT: [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 1946 // CHECK4-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP47]] to [1000 x i32]** 1947 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP48]], align 4 1948 // CHECK4-NEXT: [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 1949 // CHECK4-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to [1000 x i32]** 1950 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP50]], align 4 1951 // CHECK4-NEXT: [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1 1952 // CHECK4-NEXT: store i8* null, i8** [[TMP51]], align 4 1953 // CHECK4-NEXT: [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 2 1954 // CHECK4-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i32** 1955 // CHECK4-NEXT: store i32* [[TMP41]], i32** [[TMP53]], align 4 1956 // CHECK4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 2 1957 // CHECK4-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** 1958 // CHECK4-NEXT: store i32* [[TMP41]], i32** [[TMP55]], align 4 1959 // CHECK4-NEXT: [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 2 1960 // CHECK4-NEXT: store i8* null, i8** [[TMP56]], align 4 1961 // CHECK4-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 1962 // CHECK4-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 1963 // CHECK4-NEXT: [[TMP59:%.*]] = load i32, i32* [[N]], align 4 1964 // CHECK4-NEXT: store i32 [[TMP59]], i32* [[DOTCAPTURE_EXPR_12]], align 4 1965 // CHECK4-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_12]], align 4 1966 // CHECK4-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP60]], 0 1967 // CHECK4-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 1968 // CHECK4-NEXT: [[SUB16:%.*]] = sub nsw i32 [[DIV15]], 1 1969 // CHECK4-NEXT: store i32 [[SUB16]], i32* [[DOTCAPTURE_EXPR_13]], align 4 1970 // CHECK4-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_13]], align 4 1971 // CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP61]], 1 1972 // CHECK4-NEXT: [[TMP62:%.*]] = zext i32 [[ADD17]] to i64 1973 // CHECK4-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP62]]) 1974 // CHECK4-NEXT: [[TMP63:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58.region_id, i32 3, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0) 1975 // CHECK4-NEXT: [[TMP64:%.*]] = icmp ne i32 [[TMP63]], 0 1976 // CHECK4-NEXT: br i1 [[TMP64]], label [[OMP_OFFLOAD_FAILED18:%.*]], label [[OMP_OFFLOAD_CONT19:%.*]] 1977 // CHECK4: omp_offload.failed18: 1978 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58(i32 [[TMP40]], [1000 x i32]* [[A]], i32* [[TMP41]]) #[[ATTR2]] 1979 // CHECK4-NEXT: br label [[OMP_OFFLOAD_CONT19]] 1980 // CHECK4: omp_offload.cont19: 1981 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[A]], i32 0, i32 0 1982 // CHECK4-NEXT: [[TMP65:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1983 // CHECK4-NEXT: ret i32 [[TMP65]] 1984 // 1985 // 1986 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 1987 // CHECK4-SAME: (i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1:[0-9]+]] { 1988 // CHECK4-NEXT: entry: 1989 // CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 1990 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 1991 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 1992 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 1993 // CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 1994 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) 1995 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 1996 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 1997 // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 1998 // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 1999 // CHECK4-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 2000 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 2001 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 2002 // CHECK4-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) 2003 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 2004 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[N_CASTED]], align 4 2005 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_CASTED]], align 4 2006 // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP5]], [1000 x i32]* [[TMP1]]) 2007 // CHECK4-NEXT: ret void 2008 // 2009 // 2010 // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. 2011 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { 2012 // CHECK4-NEXT: entry: 2013 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 2014 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 2015 // CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 2016 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 2017 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2018 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 2019 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 2020 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 2021 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 2022 // CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 2023 // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 2024 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2025 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2026 // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 2027 // CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 2028 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 2029 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 2030 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 2031 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 2032 // CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 2033 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 2034 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 2035 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2036 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 2037 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 2038 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 2039 // CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 2040 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 2041 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2042 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 2043 // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 2044 // CHECK4: omp.precond.then: 2045 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 2046 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2047 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 2048 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 2049 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 2050 // CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2051 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 2052 // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 2053 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2054 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2055 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 2056 // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2057 // CHECK4: cond.true: 2058 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2059 // CHECK4-NEXT: br label [[COND_END:%.*]] 2060 // CHECK4: cond.false: 2061 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2062 // CHECK4-NEXT: br label [[COND_END]] 2063 // CHECK4: cond.end: 2064 // CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 2065 // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 2066 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 2067 // CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 2068 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2069 // CHECK4: omp.inner.for.cond: 2070 // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2071 // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2072 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 2073 // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2074 // CHECK4: omp.inner.for.body: 2075 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 2076 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2077 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 2078 // CHECK4-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 2079 // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 2080 // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]]) 2081 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2082 // CHECK4: omp.inner.for.inc: 2083 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2084 // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 2085 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] 2086 // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 2087 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] 2088 // CHECK4: omp.inner.for.end: 2089 // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2090 // CHECK4: omp.loop.exit: 2091 // CHECK4-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2092 // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 2093 // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) 2094 // CHECK4-NEXT: br label [[OMP_PRECOND_END]] 2095 // CHECK4: omp.precond.end: 2096 // CHECK4-NEXT: ret void 2097 // 2098 // 2099 // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..1 2100 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { 2101 // CHECK4-NEXT: entry: 2102 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 2103 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 2104 // CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 2105 // CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 2106 // CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 2107 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 2108 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2109 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 2110 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 2111 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 2112 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 2113 // CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 2114 // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 2115 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2116 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2117 // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 2118 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 2119 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 2120 // CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 2121 // CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 2122 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 2123 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 2124 // CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 2125 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 2126 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 2127 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2128 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 2129 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 2130 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 2131 // CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 2132 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 2133 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2134 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 2135 // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 2136 // CHECK4: omp.precond.then: 2137 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 2138 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2139 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 2140 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 2141 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 2142 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 2143 // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 2144 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 2145 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 2146 // CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2147 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 2148 // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 2149 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 2150 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2151 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 2152 // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2153 // CHECK4: cond.true: 2154 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2155 // CHECK4-NEXT: br label [[COND_END:%.*]] 2156 // CHECK4: cond.false: 2157 // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 2158 // CHECK4-NEXT: br label [[COND_END]] 2159 // CHECK4: cond.end: 2160 // CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 2161 // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 2162 // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 2163 // CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 2164 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2165 // CHECK4: omp.inner.for.cond: 2166 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2167 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 2168 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 2169 // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2170 // CHECK4: omp.inner.for.body: 2171 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2172 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 2173 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 2174 // CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 2175 // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 2176 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP17]] 2177 // CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 2178 // CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2179 // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 2180 // CHECK4-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) 2181 // CHECK4-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 2182 // CHECK4-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] 2183 // CHECK4: .cancel.exit: 2184 // CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] 2185 // CHECK4: .cancel.continue: 2186 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 2187 // CHECK4: omp.body.continue: 2188 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2189 // CHECK4: omp.inner.for.inc: 2190 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2191 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 2192 // CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 2193 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] 2194 // CHECK4: omp.inner.for.end: 2195 // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2196 // CHECK4: omp.loop.exit: 2197 // CHECK4-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2198 // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 2199 // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 2200 // CHECK4-NEXT: br label [[OMP_PRECOND_END]] 2201 // CHECK4: cancel.exit: 2202 // CHECK4-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2203 // CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 2204 // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) 2205 // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] 2206 // CHECK4: omp.precond.end: 2207 // CHECK4-NEXT: br label [[CANCEL_CONT]] 2208 // CHECK4: cancel.cont: 2209 // CHECK4-NEXT: ret void 2210 // 2211 // 2212 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58 2213 // CHECK4-SAME: (i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 2214 // CHECK4-NEXT: entry: 2215 // CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 2216 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 2217 // CHECK4-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 2218 // CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 2219 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 2220 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 2221 // CHECK4-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 2222 // CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 2223 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 2224 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[N_CASTED]], align 4 2225 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_CASTED]], align 4 2226 // CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 4 2227 // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) 2228 // CHECK4-NEXT: ret void 2229 // 2230 // 2231 // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 2232 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 2233 // CHECK4-NEXT: entry: 2234 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 2235 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 2236 // CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 2237 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 2238 // CHECK4-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 2239 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2240 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 2241 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 2242 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 2243 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 2244 // CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 2245 // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 2246 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2247 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2248 // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 2249 // CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 2250 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 2251 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 2252 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 2253 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 2254 // CHECK4-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 2255 // CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 2256 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 2257 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 2258 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2259 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 2260 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 2261 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 2262 // CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 2263 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 2264 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2265 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 2266 // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 2267 // CHECK4: omp.precond.then: 2268 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 2269 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2270 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 2271 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 2272 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 2273 // CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2274 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 2275 // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 2276 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2277 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2278 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 2279 // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2280 // CHECK4: cond.true: 2281 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2282 // CHECK4-NEXT: br label [[COND_END:%.*]] 2283 // CHECK4: cond.false: 2284 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2285 // CHECK4-NEXT: br label [[COND_END]] 2286 // CHECK4: cond.end: 2287 // CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 2288 // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 2289 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 2290 // CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 2291 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2292 // CHECK4: omp.inner.for.cond: 2293 // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2294 // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2295 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 2296 // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2297 // CHECK4: omp.inner.for.body: 2298 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 2299 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2300 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 2301 // CHECK4-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 2302 // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 2303 // CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 2304 // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) 2305 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2306 // CHECK4: omp.inner.for.inc: 2307 // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2308 // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 2309 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] 2310 // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 2311 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] 2312 // CHECK4: omp.inner.for.end: 2313 // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2314 // CHECK4: omp.loop.exit: 2315 // CHECK4-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2316 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 2317 // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) 2318 // CHECK4-NEXT: br label [[OMP_PRECOND_END]] 2319 // CHECK4: omp.precond.end: 2320 // CHECK4-NEXT: ret void 2321 // 2322 // 2323 // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 2324 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 2325 // CHECK4-NEXT: entry: 2326 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 2327 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 2328 // CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 2329 // CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 2330 // CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 2331 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 2332 // CHECK4-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 2333 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2334 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 2335 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 2336 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 2337 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 2338 // CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 2339 // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 2340 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2341 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2342 // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 2343 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 2344 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 2345 // CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 2346 // CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 2347 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 2348 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 2349 // CHECK4-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 2350 // CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 2351 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 2352 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 2353 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2354 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 2355 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 2356 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 2357 // CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 2358 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 2359 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2360 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 2361 // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 2362 // CHECK4: omp.precond.then: 2363 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 2364 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2365 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 2366 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 2367 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 2368 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 2369 // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 2370 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 2371 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 2372 // CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2373 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 2374 // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 2375 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 2376 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2377 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 2378 // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2379 // CHECK4: cond.true: 2380 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2381 // CHECK4-NEXT: br label [[COND_END:%.*]] 2382 // CHECK4: cond.false: 2383 // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 2384 // CHECK4-NEXT: br label [[COND_END]] 2385 // CHECK4: cond.end: 2386 // CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 2387 // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 2388 // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 2389 // CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 2390 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2391 // CHECK4: omp.inner.for.cond: 2392 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2393 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 2394 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 2395 // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2396 // CHECK4: omp.inner.for.body: 2397 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2398 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 2399 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 2400 // CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 2401 // CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 2402 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 2403 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 2404 // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 2405 // CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] 2406 // CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 2407 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 2408 // CHECK4: omp.body.continue: 2409 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2410 // CHECK4: omp.inner.for.inc: 2411 // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2412 // CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 2413 // CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 2414 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] 2415 // CHECK4: omp.inner.for.end: 2416 // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2417 // CHECK4: omp.loop.exit: 2418 // CHECK4-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2419 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 2420 // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) 2421 // CHECK4-NEXT: br label [[OMP_PRECOND_END]] 2422 // CHECK4: omp.precond.end: 2423 // CHECK4-NEXT: ret void 2424 // 2425 // 2426 // CHECK4-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg 2427 // CHECK4-SAME: () #[[ATTR3:[0-9]+]] { 2428 // CHECK4-NEXT: entry: 2429 // CHECK4-NEXT: call void @__tgt_register_requires(i64 1) 2430 // CHECK4-NEXT: ret void 2431 // 2432 // 2433 // CHECK5-LABEL: define {{[^@]+}}@_Z16target_teams_funPi 2434 // CHECK5-SAME: (i32* noundef [[G:%.*]]) #[[ATTR0:[0-9]+]] { 2435 // CHECK5-NEXT: entry: 2436 // CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 2437 // CHECK5-NEXT: [[N:%.*]] = alloca i32, align 4 2438 // CHECK5-NEXT: [[A:%.*]] = alloca [1000 x i32], align 4 2439 // CHECK5-NEXT: [[TE:%.*]] = alloca i32, align 4 2440 // CHECK5-NEXT: [[TH:%.*]] = alloca i32, align 4 2441 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 2442 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 2443 // CHECK5-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 2444 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 2445 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i32, align 4 2446 // CHECK5-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 2447 // CHECK5-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 2448 // CHECK5-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 2449 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 2450 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 2451 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 2452 // CHECK5-NEXT: [[N_CASTED7:%.*]] = alloca i32, align 4 2453 // CHECK5-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [3 x i8*], align 4 2454 // CHECK5-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [3 x i8*], align 4 2455 // CHECK5-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [3 x i8*], align 4 2456 // CHECK5-NEXT: [[_TMP11:%.*]] = alloca i32, align 4 2457 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4 2458 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4 2459 // CHECK5-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 2460 // CHECK5-NEXT: store i32 1000, i32* [[N]], align 4 2461 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[N]], align 4 2462 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], 128 2463 // CHECK5-NEXT: store i32 [[DIV]], i32* [[TE]], align 4 2464 // CHECK5-NEXT: store i32 128, i32* [[TH]], align 4 2465 // CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TE]], align 4 2466 // CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 2467 // CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TH]], align 4 2468 // CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 2469 // CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[N]], align 4 2470 // CHECK5-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 2471 // CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 2472 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2473 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 2474 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 2475 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2476 // CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR__CASTED2]], align 4 2477 // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED2]], align 4 2478 // CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 2479 // CHECK5-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32* 2480 // CHECK5-NEXT: store i32 [[TMP4]], i32* [[TMP10]], align 4 2481 // CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 2482 // CHECK5-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* 2483 // CHECK5-NEXT: store i32 [[TMP4]], i32* [[TMP12]], align 4 2484 // CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 2485 // CHECK5-NEXT: store i8* null, i8** [[TMP13]], align 4 2486 // CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 2487 // CHECK5-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [1000 x i32]** 2488 // CHECK5-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP15]], align 4 2489 // CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 2490 // CHECK5-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [1000 x i32]** 2491 // CHECK5-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP17]], align 4 2492 // CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 2493 // CHECK5-NEXT: store i8* null, i8** [[TMP18]], align 4 2494 // CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 2495 // CHECK5-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32* 2496 // CHECK5-NEXT: store i32 [[TMP6]], i32* [[TMP20]], align 4 2497 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 2498 // CHECK5-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* 2499 // CHECK5-NEXT: store i32 [[TMP6]], i32* [[TMP22]], align 4 2500 // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 2501 // CHECK5-NEXT: store i8* null, i8** [[TMP23]], align 4 2502 // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 2503 // CHECK5-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32* 2504 // CHECK5-NEXT: store i32 [[TMP8]], i32* [[TMP25]], align 4 2505 // CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 2506 // CHECK5-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32* 2507 // CHECK5-NEXT: store i32 [[TMP8]], i32* [[TMP27]], align 4 2508 // CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 2509 // CHECK5-NEXT: store i8* null, i8** [[TMP28]], align 4 2510 // CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 2511 // CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 2512 // CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2513 // CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2514 // CHECK5-NEXT: [[TMP33:%.*]] = load i32, i32* [[N]], align 4 2515 // CHECK5-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_3]], align 4 2516 // CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 2517 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0 2518 // CHECK5-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB]], 1 2519 // CHECK5-NEXT: [[SUB6:%.*]] = sub nsw i32 [[DIV5]], 1 2520 // CHECK5-NEXT: store i32 [[SUB6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 2521 // CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 2522 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], 1 2523 // CHECK5-NEXT: [[TMP36:%.*]] = zext i32 [[ADD]] to i64 2524 // CHECK5-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP36]]) 2525 // CHECK5-NEXT: [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.region_id, i32 4, i8** [[TMP29]], i8** [[TMP30]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 [[TMP31]], i32 [[TMP32]]) 2526 // CHECK5-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 2527 // CHECK5-NEXT: br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] 2528 // CHECK5: omp_offload.failed: 2529 // CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51(i32 [[TMP4]], [1000 x i32]* [[A]], i32 [[TMP6]], i32 [[TMP8]]) #[[ATTR2:[0-9]+]] 2530 // CHECK5-NEXT: br label [[OMP_OFFLOAD_CONT]] 2531 // CHECK5: omp_offload.cont: 2532 // CHECK5-NEXT: [[TMP39:%.*]] = load i32, i32* [[N]], align 4 2533 // CHECK5-NEXT: store i32 [[TMP39]], i32* [[N_CASTED7]], align 4 2534 // CHECK5-NEXT: [[TMP40:%.*]] = load i32, i32* [[N_CASTED7]], align 4 2535 // CHECK5-NEXT: [[TMP41:%.*]] = load i32*, i32** [[G_ADDR]], align 4 2536 // CHECK5-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 2537 // CHECK5-NEXT: [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32* 2538 // CHECK5-NEXT: store i32 [[TMP40]], i32* [[TMP43]], align 4 2539 // CHECK5-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 2540 // CHECK5-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32* 2541 // CHECK5-NEXT: store i32 [[TMP40]], i32* [[TMP45]], align 4 2542 // CHECK5-NEXT: [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0 2543 // CHECK5-NEXT: store i8* null, i8** [[TMP46]], align 4 2544 // CHECK5-NEXT: [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 2545 // CHECK5-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP47]] to [1000 x i32]** 2546 // CHECK5-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP48]], align 4 2547 // CHECK5-NEXT: [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 2548 // CHECK5-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to [1000 x i32]** 2549 // CHECK5-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[TMP50]], align 4 2550 // CHECK5-NEXT: [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1 2551 // CHECK5-NEXT: store i8* null, i8** [[TMP51]], align 4 2552 // CHECK5-NEXT: [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 2 2553 // CHECK5-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i32** 2554 // CHECK5-NEXT: store i32* [[TMP41]], i32** [[TMP53]], align 4 2555 // CHECK5-NEXT: [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 2 2556 // CHECK5-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** 2557 // CHECK5-NEXT: store i32* [[TMP41]], i32** [[TMP55]], align 4 2558 // CHECK5-NEXT: [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 2 2559 // CHECK5-NEXT: store i8* null, i8** [[TMP56]], align 4 2560 // CHECK5-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 2561 // CHECK5-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 2562 // CHECK5-NEXT: [[TMP59:%.*]] = load i32, i32* [[N]], align 4 2563 // CHECK5-NEXT: store i32 [[TMP59]], i32* [[DOTCAPTURE_EXPR_12]], align 4 2564 // CHECK5-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_12]], align 4 2565 // CHECK5-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP60]], 0 2566 // CHECK5-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 2567 // CHECK5-NEXT: [[SUB16:%.*]] = sub nsw i32 [[DIV15]], 1 2568 // CHECK5-NEXT: store i32 [[SUB16]], i32* [[DOTCAPTURE_EXPR_13]], align 4 2569 // CHECK5-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_13]], align 4 2570 // CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP61]], 1 2571 // CHECK5-NEXT: [[TMP62:%.*]] = zext i32 [[ADD17]] to i64 2572 // CHECK5-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP62]]) 2573 // CHECK5-NEXT: [[TMP63:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58.region_id, i32 3, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0) 2574 // CHECK5-NEXT: [[TMP64:%.*]] = icmp ne i32 [[TMP63]], 0 2575 // CHECK5-NEXT: br i1 [[TMP64]], label [[OMP_OFFLOAD_FAILED18:%.*]], label [[OMP_OFFLOAD_CONT19:%.*]] 2576 // CHECK5: omp_offload.failed18: 2577 // CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58(i32 [[TMP40]], [1000 x i32]* [[A]], i32* [[TMP41]]) #[[ATTR2]] 2578 // CHECK5-NEXT: br label [[OMP_OFFLOAD_CONT19]] 2579 // CHECK5: omp_offload.cont19: 2580 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[A]], i32 0, i32 0 2581 // CHECK5-NEXT: [[TMP65:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 2582 // CHECK5-NEXT: ret i32 [[TMP65]] 2583 // 2584 // 2585 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 2586 // CHECK5-SAME: (i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1:[0-9]+]] { 2587 // CHECK5-NEXT: entry: 2588 // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 2589 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 2590 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 2591 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 2592 // CHECK5-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 2593 // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) 2594 // CHECK5-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 2595 // CHECK5-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 2596 // CHECK5-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 2597 // CHECK5-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 2598 // CHECK5-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 2599 // CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 2600 // CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 2601 // CHECK5-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) 2602 // CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 2603 // CHECK5-NEXT: store i32 [[TMP4]], i32* [[N_CASTED]], align 4 2604 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_CASTED]], align 4 2605 // CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP5]], [1000 x i32]* [[TMP1]]) 2606 // CHECK5-NEXT: ret void 2607 // 2608 // 2609 // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. 2610 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { 2611 // CHECK5-NEXT: entry: 2612 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 2613 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 2614 // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 2615 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 2616 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2617 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 2618 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 2619 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 2620 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 2621 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 2622 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 2623 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2624 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2625 // CHECK5-NEXT: [[I3:%.*]] = alloca i32, align 4 2626 // CHECK5-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 2627 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 2628 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 2629 // CHECK5-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 2630 // CHECK5-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 2631 // CHECK5-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 2632 // CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 2633 // CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 2634 // CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2635 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 2636 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 2637 // CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 2638 // CHECK5-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 2639 // CHECK5-NEXT: store i32 0, i32* [[I]], align 4 2640 // CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2641 // CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 2642 // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 2643 // CHECK5: omp.precond.then: 2644 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 2645 // CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2646 // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 2647 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 2648 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 2649 // CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2650 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 2651 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 2652 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2653 // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2654 // CHECK5-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 2655 // CHECK5-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2656 // CHECK5: cond.true: 2657 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2658 // CHECK5-NEXT: br label [[COND_END:%.*]] 2659 // CHECK5: cond.false: 2660 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2661 // CHECK5-NEXT: br label [[COND_END]] 2662 // CHECK5: cond.end: 2663 // CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 2664 // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 2665 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 2666 // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 2667 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2668 // CHECK5: omp.inner.for.cond: 2669 // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2670 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2671 // CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 2672 // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2673 // CHECK5: omp.inner.for.body: 2674 // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 2675 // CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2676 // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 2677 // CHECK5-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 2678 // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 2679 // CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]]) 2680 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2681 // CHECK5: omp.inner.for.inc: 2682 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2683 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 2684 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] 2685 // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 2686 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] 2687 // CHECK5: omp.inner.for.end: 2688 // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2689 // CHECK5: omp.loop.exit: 2690 // CHECK5-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2691 // CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 2692 // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) 2693 // CHECK5-NEXT: br label [[OMP_PRECOND_END]] 2694 // CHECK5: omp.precond.end: 2695 // CHECK5-NEXT: ret void 2696 // 2697 // 2698 // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 2699 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { 2700 // CHECK5-NEXT: entry: 2701 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 2702 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 2703 // CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 2704 // CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 2705 // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 2706 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 2707 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2708 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 2709 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 2710 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 2711 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 2712 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 2713 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 2714 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2715 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2716 // CHECK5-NEXT: [[I3:%.*]] = alloca i32, align 4 2717 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 2718 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 2719 // CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 2720 // CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 2721 // CHECK5-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 2722 // CHECK5-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 2723 // CHECK5-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 2724 // CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 2725 // CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 2726 // CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2727 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 2728 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 2729 // CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 2730 // CHECK5-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 2731 // CHECK5-NEXT: store i32 0, i32* [[I]], align 4 2732 // CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2733 // CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 2734 // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 2735 // CHECK5: omp.precond.then: 2736 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 2737 // CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2738 // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 2739 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 2740 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 2741 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 2742 // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 2743 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 2744 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 2745 // CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2746 // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 2747 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 2748 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 2749 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2750 // CHECK5-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 2751 // CHECK5-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2752 // CHECK5: cond.true: 2753 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2754 // CHECK5-NEXT: br label [[COND_END:%.*]] 2755 // CHECK5: cond.false: 2756 // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 2757 // CHECK5-NEXT: br label [[COND_END]] 2758 // CHECK5: cond.end: 2759 // CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 2760 // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 2761 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 2762 // CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 2763 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2764 // CHECK5: omp.inner.for.cond: 2765 // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2766 // CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 2767 // CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 2768 // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2769 // CHECK5: omp.inner.for.body: 2770 // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2771 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 2772 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 2773 // CHECK5-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 2774 // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 2775 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP17]] 2776 // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 2777 // CHECK5-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2778 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 2779 // CHECK5-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) 2780 // CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 2781 // CHECK5-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] 2782 // CHECK5: .cancel.exit: 2783 // CHECK5-NEXT: br label [[CANCEL_EXIT:%.*]] 2784 // CHECK5: .cancel.continue: 2785 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 2786 // CHECK5: omp.body.continue: 2787 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2788 // CHECK5: omp.inner.for.inc: 2789 // CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2790 // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 2791 // CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 2792 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] 2793 // CHECK5: omp.inner.for.end: 2794 // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2795 // CHECK5: omp.loop.exit: 2796 // CHECK5-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2797 // CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 2798 // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 2799 // CHECK5-NEXT: br label [[OMP_PRECOND_END]] 2800 // CHECK5: cancel.exit: 2801 // CHECK5-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2802 // CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 2803 // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) 2804 // CHECK5-NEXT: br label [[CANCEL_CONT:%.*]] 2805 // CHECK5: omp.precond.end: 2806 // CHECK5-NEXT: br label [[CANCEL_CONT]] 2807 // CHECK5: cancel.cont: 2808 // CHECK5-NEXT: ret void 2809 // 2810 // 2811 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58 2812 // CHECK5-SAME: (i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 2813 // CHECK5-NEXT: entry: 2814 // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 2815 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 2816 // CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 2817 // CHECK5-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 2818 // CHECK5-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 2819 // CHECK5-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 2820 // CHECK5-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 2821 // CHECK5-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 2822 // CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 2823 // CHECK5-NEXT: store i32 [[TMP1]], i32* [[N_CASTED]], align 4 2824 // CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_CASTED]], align 4 2825 // CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 4 2826 // CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) 2827 // CHECK5-NEXT: ret void 2828 // 2829 // 2830 // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 2831 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 2832 // CHECK5-NEXT: entry: 2833 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 2834 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 2835 // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 2836 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 2837 // CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 2838 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2839 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 2840 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 2841 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 2842 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 2843 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 2844 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 2845 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2846 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2847 // CHECK5-NEXT: [[I3:%.*]] = alloca i32, align 4 2848 // CHECK5-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 2849 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 2850 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 2851 // CHECK5-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 2852 // CHECK5-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 2853 // CHECK5-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 2854 // CHECK5-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 2855 // CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 2856 // CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 2857 // CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2858 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 2859 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 2860 // CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 2861 // CHECK5-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 2862 // CHECK5-NEXT: store i32 0, i32* [[I]], align 4 2863 // CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2864 // CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 2865 // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 2866 // CHECK5: omp.precond.then: 2867 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 2868 // CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2869 // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 2870 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 2871 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 2872 // CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2873 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 2874 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 2875 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2876 // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2877 // CHECK5-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 2878 // CHECK5-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2879 // CHECK5: cond.true: 2880 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2881 // CHECK5-NEXT: br label [[COND_END:%.*]] 2882 // CHECK5: cond.false: 2883 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2884 // CHECK5-NEXT: br label [[COND_END]] 2885 // CHECK5: cond.end: 2886 // CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 2887 // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 2888 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 2889 // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 2890 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2891 // CHECK5: omp.inner.for.cond: 2892 // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2893 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2894 // CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 2895 // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2896 // CHECK5: omp.inner.for.body: 2897 // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 2898 // CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 2899 // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 2900 // CHECK5-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 2901 // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 2902 // CHECK5-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 2903 // CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) 2904 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2905 // CHECK5: omp.inner.for.inc: 2906 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2907 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 2908 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] 2909 // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 2910 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] 2911 // CHECK5: omp.inner.for.end: 2912 // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2913 // CHECK5: omp.loop.exit: 2914 // CHECK5-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2915 // CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 2916 // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) 2917 // CHECK5-NEXT: br label [[OMP_PRECOND_END]] 2918 // CHECK5: omp.precond.end: 2919 // CHECK5-NEXT: ret void 2920 // 2921 // 2922 // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 2923 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { 2924 // CHECK5-NEXT: entry: 2925 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 2926 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 2927 // CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 2928 // CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 2929 // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 2930 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 2931 // CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 2932 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2933 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 2934 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 2935 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 2936 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 2937 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 2938 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 2939 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2940 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2941 // CHECK5-NEXT: [[I3:%.*]] = alloca i32, align 4 2942 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 2943 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 2944 // CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 2945 // CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 2946 // CHECK5-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 2947 // CHECK5-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 2948 // CHECK5-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 2949 // CHECK5-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 2950 // CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 2951 // CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 2952 // CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2953 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 2954 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 2955 // CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 2956 // CHECK5-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 2957 // CHECK5-NEXT: store i32 0, i32* [[I]], align 4 2958 // CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 2959 // CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 2960 // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 2961 // CHECK5: omp.precond.then: 2962 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 2963 // CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2964 // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 2965 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 2966 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 2967 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 2968 // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 2969 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 2970 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 2971 // CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 2972 // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 2973 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 2974 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 2975 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2976 // CHECK5-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 2977 // CHECK5-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2978 // CHECK5: cond.true: 2979 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 2980 // CHECK5-NEXT: br label [[COND_END:%.*]] 2981 // CHECK5: cond.false: 2982 // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 2983 // CHECK5-NEXT: br label [[COND_END]] 2984 // CHECK5: cond.end: 2985 // CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 2986 // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 2987 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 2988 // CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 2989 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2990 // CHECK5: omp.inner.for.cond: 2991 // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2992 // CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 2993 // CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 2994 // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2995 // CHECK5: omp.inner.for.body: 2996 // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 2997 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 2998 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 2999 // CHECK5-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 3000 // CHECK5-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 3001 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 3002 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 3003 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 3004 // CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] 3005 // CHECK5-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 3006 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 3007 // CHECK5: omp.body.continue: 3008 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 3009 // CHECK5: omp.inner.for.inc: 3010 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3011 // CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 3012 // CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 3013 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] 3014 // CHECK5: omp.inner.for.end: 3015 // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 3016 // CHECK5: omp.loop.exit: 3017 // CHECK5-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 3018 // CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 3019 // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) 3020 // CHECK5-NEXT: br label [[OMP_PRECOND_END]] 3021 // CHECK5: omp.precond.end: 3022 // CHECK5-NEXT: ret void 3023 // 3024 // 3025 // CHECK5-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg 3026 // CHECK5-SAME: () #[[ATTR3:[0-9]+]] { 3027 // CHECK5-NEXT: entry: 3028 // CHECK5-NEXT: call void @__tgt_register_requires(i64 1) 3029 // CHECK5-NEXT: ret void 3030 // 3031 // 3032 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 3033 // CHECK10-SAME: (i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0:[0-9]+]] { 3034 // CHECK10-NEXT: entry: 3035 // CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 3036 // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 3037 // CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 3038 // CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 3039 // CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 3040 // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) 3041 // CHECK10-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 3042 // CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 3043 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 3044 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 3045 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 3046 // CHECK10-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 3047 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* 3048 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* 3049 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 3050 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 3051 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) 3052 // CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 3053 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* 3054 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 3055 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 3056 // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP5]], [1000 x i32]* [[TMP1]]) 3057 // CHECK10-NEXT: ret void 3058 // 3059 // 3060 // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined. 3061 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { 3062 // CHECK10-NEXT: entry: 3063 // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 3064 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 3065 // CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 3066 // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 3067 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 3068 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 3069 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 3070 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 3071 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 3072 // CHECK10-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 3073 // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 3074 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 3075 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 3076 // CHECK10-NEXT: [[I3:%.*]] = alloca i32, align 4 3077 // CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 3078 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 3079 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 3080 // CHECK10-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 3081 // CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 3082 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 3083 // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 3084 // CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 3085 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 3086 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3087 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 3088 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 3089 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 3090 // CHECK10-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 3091 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 3092 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3093 // CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 3094 // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 3095 // CHECK10: omp.precond.then: 3096 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 3097 // CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3098 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 3099 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 3100 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 3101 // CHECK10-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3102 // CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 3103 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 3104 // CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3105 // CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3106 // CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 3107 // CHECK10-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 3108 // CHECK10: cond.true: 3109 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3110 // CHECK10-NEXT: br label [[COND_END:%.*]] 3111 // CHECK10: cond.false: 3112 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3113 // CHECK10-NEXT: br label [[COND_END]] 3114 // CHECK10: cond.end: 3115 // CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 3116 // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 3117 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 3118 // CHECK10-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 3119 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 3120 // CHECK10: omp.inner.for.cond: 3121 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3122 // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3123 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 3124 // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 3125 // CHECK10: omp.inner.for.body: 3126 // CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 3127 // CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 3128 // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3129 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 3130 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 3131 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* 3132 // CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 3133 // CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 3134 // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]]) 3135 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 3136 // CHECK10: omp.inner.for.inc: 3137 // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3138 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 3139 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] 3140 // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 3141 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] 3142 // CHECK10: omp.inner.for.end: 3143 // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 3144 // CHECK10: omp.loop.exit: 3145 // CHECK10-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3146 // CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 3147 // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) 3148 // CHECK10-NEXT: br label [[OMP_PRECOND_END]] 3149 // CHECK10: omp.precond.end: 3150 // CHECK10-NEXT: ret void 3151 // 3152 // 3153 // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..1 3154 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { 3155 // CHECK10-NEXT: entry: 3156 // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 3157 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 3158 // CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 3159 // CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 3160 // CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 3161 // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 3162 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 3163 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 3164 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 3165 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 3166 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 3167 // CHECK10-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 3168 // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 3169 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 3170 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 3171 // CHECK10-NEXT: [[I5:%.*]] = alloca i32, align 4 3172 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 3173 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 3174 // CHECK10-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 3175 // CHECK10-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 3176 // CHECK10-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 3177 // CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 3178 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 3179 // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 3180 // CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 3181 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 3182 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3183 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 3184 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 3185 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 3186 // CHECK10-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 3187 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 3188 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3189 // CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 3190 // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 3191 // CHECK10: omp.precond.then: 3192 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 3193 // CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3194 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 3195 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 3196 // CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 3197 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 3198 // CHECK10-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 3199 // CHECK10-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 3200 // CHECK10-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 3201 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 3202 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 3203 // CHECK10-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3204 // CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 3205 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 3206 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 3207 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3208 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 3209 // CHECK10-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 3210 // CHECK10: cond.true: 3211 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3212 // CHECK10-NEXT: br label [[COND_END:%.*]] 3213 // CHECK10: cond.false: 3214 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 3215 // CHECK10-NEXT: br label [[COND_END]] 3216 // CHECK10: cond.end: 3217 // CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 3218 // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 3219 // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 3220 // CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 3221 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 3222 // CHECK10: omp.inner.for.cond: 3223 // CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3224 // CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 3225 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 3226 // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 3227 // CHECK10: omp.inner.for.body: 3228 // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3229 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 3230 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 3231 // CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 3232 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[I5]], align 4 3233 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 3234 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] 3235 // CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 3236 // CHECK10-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3237 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 3238 // CHECK10-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) 3239 // CHECK10-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 3240 // CHECK10-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] 3241 // CHECK10: .cancel.exit: 3242 // CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] 3243 // CHECK10: .cancel.continue: 3244 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 3245 // CHECK10: omp.body.continue: 3246 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 3247 // CHECK10: omp.inner.for.inc: 3248 // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3249 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 3250 // CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 3251 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] 3252 // CHECK10: omp.inner.for.end: 3253 // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 3254 // CHECK10: omp.loop.exit: 3255 // CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3256 // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 3257 // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 3258 // CHECK10-NEXT: br label [[OMP_PRECOND_END]] 3259 // CHECK10: cancel.exit: 3260 // CHECK10-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3261 // CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 3262 // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) 3263 // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] 3264 // CHECK10: omp.precond.end: 3265 // CHECK10-NEXT: br label [[CANCEL_CONT]] 3266 // CHECK10: cancel.cont: 3267 // CHECK10-NEXT: ret void 3268 // 3269 // 3270 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58 3271 // CHECK10-SAME: (i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { 3272 // CHECK10-NEXT: entry: 3273 // CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 3274 // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 3275 // CHECK10-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 3276 // CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 3277 // CHECK10-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 3278 // CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 3279 // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 3280 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 3281 // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 3282 // CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 3283 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* 3284 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 3285 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 3286 // CHECK10-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 8 3287 // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) 3288 // CHECK10-NEXT: ret void 3289 // 3290 // 3291 // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..2 3292 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { 3293 // CHECK10-NEXT: entry: 3294 // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 3295 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 3296 // CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 3297 // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 3298 // CHECK10-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 3299 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 3300 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 3301 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 3302 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 3303 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 3304 // CHECK10-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 3305 // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 3306 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 3307 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 3308 // CHECK10-NEXT: [[I3:%.*]] = alloca i32, align 4 3309 // CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 3310 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 3311 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 3312 // CHECK10-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 3313 // CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 3314 // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 3315 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 3316 // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 3317 // CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 3318 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 3319 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3320 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 3321 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 3322 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 3323 // CHECK10-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 3324 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 3325 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3326 // CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 3327 // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 3328 // CHECK10: omp.precond.then: 3329 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 3330 // CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3331 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 3332 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 3333 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 3334 // CHECK10-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3335 // CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 3336 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 3337 // CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3338 // CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3339 // CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 3340 // CHECK10-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 3341 // CHECK10: cond.true: 3342 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3343 // CHECK10-NEXT: br label [[COND_END:%.*]] 3344 // CHECK10: cond.false: 3345 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3346 // CHECK10-NEXT: br label [[COND_END]] 3347 // CHECK10: cond.end: 3348 // CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 3349 // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 3350 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 3351 // CHECK10-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 3352 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 3353 // CHECK10: omp.inner.for.cond: 3354 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3355 // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3356 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 3357 // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 3358 // CHECK10: omp.inner.for.body: 3359 // CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 3360 // CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 3361 // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3362 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 3363 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 3364 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* 3365 // CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 3366 // CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 3367 // CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 3368 // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) 3369 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 3370 // CHECK10: omp.inner.for.inc: 3371 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3372 // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 3373 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] 3374 // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 3375 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] 3376 // CHECK10: omp.inner.for.end: 3377 // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 3378 // CHECK10: omp.loop.exit: 3379 // CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3380 // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 3381 // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 3382 // CHECK10-NEXT: br label [[OMP_PRECOND_END]] 3383 // CHECK10: omp.precond.end: 3384 // CHECK10-NEXT: ret void 3385 // 3386 // 3387 // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3 3388 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { 3389 // CHECK10-NEXT: entry: 3390 // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 3391 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 3392 // CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 3393 // CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 3394 // CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 3395 // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 3396 // CHECK10-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 3397 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 3398 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 3399 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 3400 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 3401 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 3402 // CHECK10-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 3403 // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 3404 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 3405 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 3406 // CHECK10-NEXT: [[I5:%.*]] = alloca i32, align 4 3407 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 3408 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 3409 // CHECK10-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 3410 // CHECK10-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 3411 // CHECK10-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 3412 // CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 3413 // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 3414 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 3415 // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 3416 // CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 3417 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 3418 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3419 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 3420 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 3421 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 3422 // CHECK10-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 3423 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 3424 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3425 // CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 3426 // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 3427 // CHECK10: omp.precond.then: 3428 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 3429 // CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3430 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 3431 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 3432 // CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 3433 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 3434 // CHECK10-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 3435 // CHECK10-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 3436 // CHECK10-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 3437 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 3438 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 3439 // CHECK10-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3440 // CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 3441 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 3442 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 3443 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3444 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 3445 // CHECK10-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 3446 // CHECK10: cond.true: 3447 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3448 // CHECK10-NEXT: br label [[COND_END:%.*]] 3449 // CHECK10: cond.false: 3450 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 3451 // CHECK10-NEXT: br label [[COND_END]] 3452 // CHECK10: cond.end: 3453 // CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 3454 // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 3455 // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 3456 // CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 3457 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 3458 // CHECK10: omp.inner.for.cond: 3459 // CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3460 // CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 3461 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 3462 // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 3463 // CHECK10: omp.inner.for.body: 3464 // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3465 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 3466 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 3467 // CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 3468 // CHECK10-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 3469 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 3470 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 3471 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 3472 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 3473 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] 3474 // CHECK10-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 3475 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 3476 // CHECK10: omp.body.continue: 3477 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 3478 // CHECK10: omp.inner.for.inc: 3479 // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3480 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 3481 // CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 3482 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] 3483 // CHECK10: omp.inner.for.end: 3484 // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 3485 // CHECK10: omp.loop.exit: 3486 // CHECK10-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3487 // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 3488 // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) 3489 // CHECK10-NEXT: br label [[OMP_PRECOND_END]] 3490 // CHECK10: omp.precond.end: 3491 // CHECK10-NEXT: ret void 3492 // 3493 // 3494 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 3495 // CHECK11-SAME: (i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0:[0-9]+]] { 3496 // CHECK11-NEXT: entry: 3497 // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 3498 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 3499 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 3500 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 3501 // CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 3502 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) 3503 // CHECK11-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 3504 // CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 3505 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 3506 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 3507 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 3508 // CHECK11-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 3509 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* 3510 // CHECK11-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* 3511 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 3512 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 3513 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) 3514 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 3515 // CHECK11-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* 3516 // CHECK11-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 3517 // CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 3518 // CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP5]], [1000 x i32]* [[TMP1]]) 3519 // CHECK11-NEXT: ret void 3520 // 3521 // 3522 // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. 3523 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { 3524 // CHECK11-NEXT: entry: 3525 // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 3526 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 3527 // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 3528 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 3529 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 3530 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 3531 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 3532 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 3533 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 3534 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 3535 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 3536 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 3537 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 3538 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 3539 // CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 3540 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 3541 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 3542 // CHECK11-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 3543 // CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 3544 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 3545 // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 3546 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 3547 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 3548 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3549 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 3550 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 3551 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 3552 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 3553 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 3554 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3555 // CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 3556 // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 3557 // CHECK11: omp.precond.then: 3558 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 3559 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3560 // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 3561 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 3562 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 3563 // CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3564 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 3565 // CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 3566 // CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3567 // CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3568 // CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 3569 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 3570 // CHECK11: cond.true: 3571 // CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3572 // CHECK11-NEXT: br label [[COND_END:%.*]] 3573 // CHECK11: cond.false: 3574 // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3575 // CHECK11-NEXT: br label [[COND_END]] 3576 // CHECK11: cond.end: 3577 // CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 3578 // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 3579 // CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 3580 // CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 3581 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 3582 // CHECK11: omp.inner.for.cond: 3583 // CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3584 // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3585 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 3586 // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 3587 // CHECK11: omp.inner.for.body: 3588 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 3589 // CHECK11-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 3590 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3591 // CHECK11-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 3592 // CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 3593 // CHECK11-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* 3594 // CHECK11-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 3595 // CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 3596 // CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]]) 3597 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 3598 // CHECK11: omp.inner.for.inc: 3599 // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3600 // CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 3601 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] 3602 // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 3603 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] 3604 // CHECK11: omp.inner.for.end: 3605 // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 3606 // CHECK11: omp.loop.exit: 3607 // CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3608 // CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 3609 // CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) 3610 // CHECK11-NEXT: br label [[OMP_PRECOND_END]] 3611 // CHECK11: omp.precond.end: 3612 // CHECK11-NEXT: ret void 3613 // 3614 // 3615 // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 3616 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { 3617 // CHECK11-NEXT: entry: 3618 // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 3619 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 3620 // CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 3621 // CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 3622 // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 3623 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 3624 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 3625 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 3626 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 3627 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 3628 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 3629 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 3630 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 3631 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 3632 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 3633 // CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 3634 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 3635 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 3636 // CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 3637 // CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 3638 // CHECK11-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 3639 // CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 3640 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 3641 // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 3642 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 3643 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 3644 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3645 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 3646 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 3647 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 3648 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 3649 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 3650 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3651 // CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 3652 // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 3653 // CHECK11: omp.precond.then: 3654 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 3655 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3656 // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 3657 // CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 3658 // CHECK11-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 3659 // CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 3660 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 3661 // CHECK11-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 3662 // CHECK11-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 3663 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 3664 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 3665 // CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3666 // CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 3667 // CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 3668 // CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 3669 // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3670 // CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 3671 // CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 3672 // CHECK11: cond.true: 3673 // CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3674 // CHECK11-NEXT: br label [[COND_END:%.*]] 3675 // CHECK11: cond.false: 3676 // CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 3677 // CHECK11-NEXT: br label [[COND_END]] 3678 // CHECK11: cond.end: 3679 // CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 3680 // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 3681 // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 3682 // CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 3683 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 3684 // CHECK11: omp.inner.for.cond: 3685 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3686 // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 3687 // CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 3688 // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 3689 // CHECK11: omp.inner.for.body: 3690 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3691 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 3692 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 3693 // CHECK11-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 3694 // CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I5]], align 4 3695 // CHECK11-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 3696 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] 3697 // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 3698 // CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3699 // CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 3700 // CHECK11-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) 3701 // CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 3702 // CHECK11-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] 3703 // CHECK11: .cancel.exit: 3704 // CHECK11-NEXT: br label [[CANCEL_EXIT:%.*]] 3705 // CHECK11: .cancel.continue: 3706 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 3707 // CHECK11: omp.body.continue: 3708 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 3709 // CHECK11: omp.inner.for.inc: 3710 // CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3711 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 3712 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 3713 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] 3714 // CHECK11: omp.inner.for.end: 3715 // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 3716 // CHECK11: omp.loop.exit: 3717 // CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3718 // CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 3719 // CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 3720 // CHECK11-NEXT: br label [[OMP_PRECOND_END]] 3721 // CHECK11: cancel.exit: 3722 // CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3723 // CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 3724 // CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) 3725 // CHECK11-NEXT: br label [[CANCEL_CONT:%.*]] 3726 // CHECK11: omp.precond.end: 3727 // CHECK11-NEXT: br label [[CANCEL_CONT]] 3728 // CHECK11: cancel.cont: 3729 // CHECK11-NEXT: ret void 3730 // 3731 // 3732 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58 3733 // CHECK11-SAME: (i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { 3734 // CHECK11-NEXT: entry: 3735 // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 3736 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 3737 // CHECK11-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 3738 // CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 3739 // CHECK11-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 3740 // CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 3741 // CHECK11-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 3742 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 3743 // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 3744 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 3745 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* 3746 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 3747 // CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 3748 // CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 8 3749 // CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) 3750 // CHECK11-NEXT: ret void 3751 // 3752 // 3753 // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 3754 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { 3755 // CHECK11-NEXT: entry: 3756 // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 3757 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 3758 // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 3759 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 3760 // CHECK11-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 3761 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 3762 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 3763 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 3764 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 3765 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 3766 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 3767 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 3768 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 3769 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 3770 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 3771 // CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 3772 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 3773 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 3774 // CHECK11-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 3775 // CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 3776 // CHECK11-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 3777 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 3778 // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 3779 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 3780 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 3781 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3782 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 3783 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 3784 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 3785 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 3786 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 3787 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3788 // CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 3789 // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 3790 // CHECK11: omp.precond.then: 3791 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 3792 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3793 // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 3794 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 3795 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 3796 // CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3797 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 3798 // CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 3799 // CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3800 // CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3801 // CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 3802 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 3803 // CHECK11: cond.true: 3804 // CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3805 // CHECK11-NEXT: br label [[COND_END:%.*]] 3806 // CHECK11: cond.false: 3807 // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3808 // CHECK11-NEXT: br label [[COND_END]] 3809 // CHECK11: cond.end: 3810 // CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 3811 // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 3812 // CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 3813 // CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 3814 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 3815 // CHECK11: omp.inner.for.cond: 3816 // CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3817 // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3818 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 3819 // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 3820 // CHECK11: omp.inner.for.body: 3821 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 3822 // CHECK11-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 3823 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 3824 // CHECK11-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 3825 // CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 3826 // CHECK11-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* 3827 // CHECK11-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 3828 // CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 3829 // CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 3830 // CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) 3831 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 3832 // CHECK11: omp.inner.for.inc: 3833 // CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3834 // CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 3835 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] 3836 // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 3837 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] 3838 // CHECK11: omp.inner.for.end: 3839 // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 3840 // CHECK11: omp.loop.exit: 3841 // CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3842 // CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 3843 // CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 3844 // CHECK11-NEXT: br label [[OMP_PRECOND_END]] 3845 // CHECK11: omp.precond.end: 3846 // CHECK11-NEXT: ret void 3847 // 3848 // 3849 // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 3850 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { 3851 // CHECK11-NEXT: entry: 3852 // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 3853 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 3854 // CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 3855 // CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 3856 // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 3857 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 3858 // CHECK11-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 3859 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 3860 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 3861 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 3862 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 3863 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 3864 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 3865 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 3866 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 3867 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 3868 // CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 3869 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 3870 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 3871 // CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 3872 // CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 3873 // CHECK11-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 3874 // CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 3875 // CHECK11-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 3876 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* 3877 // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 3878 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 3879 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 3880 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3881 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 3882 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 3883 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 3884 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 3885 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 3886 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 3887 // CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 3888 // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 3889 // CHECK11: omp.precond.then: 3890 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 3891 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3892 // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 3893 // CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 3894 // CHECK11-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 3895 // CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 3896 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 3897 // CHECK11-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 3898 // CHECK11-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 3899 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 3900 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 3901 // CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3902 // CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 3903 // CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 3904 // CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 3905 // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3906 // CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 3907 // CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 3908 // CHECK11: cond.true: 3909 // CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 3910 // CHECK11-NEXT: br label [[COND_END:%.*]] 3911 // CHECK11: cond.false: 3912 // CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 3913 // CHECK11-NEXT: br label [[COND_END]] 3914 // CHECK11: cond.end: 3915 // CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 3916 // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 3917 // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 3918 // CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 3919 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 3920 // CHECK11: omp.inner.for.cond: 3921 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3922 // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 3923 // CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 3924 // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 3925 // CHECK11: omp.inner.for.body: 3926 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3927 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 3928 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 3929 // CHECK11-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 3930 // CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 3931 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 3932 // CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 3933 // CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 3934 // CHECK11-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 3935 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] 3936 // CHECK11-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 3937 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 3938 // CHECK11: omp.body.continue: 3939 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 3940 // CHECK11: omp.inner.for.inc: 3941 // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 3942 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 3943 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 3944 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] 3945 // CHECK11: omp.inner.for.end: 3946 // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 3947 // CHECK11: omp.loop.exit: 3948 // CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3949 // CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 3950 // CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) 3951 // CHECK11-NEXT: br label [[OMP_PRECOND_END]] 3952 // CHECK11: omp.precond.end: 3953 // CHECK11-NEXT: ret void 3954 // 3955 // 3956 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 3957 // CHECK12-SAME: (i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0:[0-9]+]] { 3958 // CHECK12-NEXT: entry: 3959 // CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 3960 // CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 3961 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 3962 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 3963 // CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 3964 // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) 3965 // CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 3966 // CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 3967 // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 3968 // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 3969 // CHECK12-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 3970 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 3971 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 3972 // CHECK12-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) 3973 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 3974 // CHECK12-NEXT: store i32 [[TMP4]], i32* [[N_CASTED]], align 4 3975 // CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_CASTED]], align 4 3976 // CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP5]], [1000 x i32]* [[TMP1]]) 3977 // CHECK12-NEXT: ret void 3978 // 3979 // 3980 // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined. 3981 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { 3982 // CHECK12-NEXT: entry: 3983 // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 3984 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 3985 // CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 3986 // CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 3987 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 3988 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 3989 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 3990 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 3991 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 3992 // CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 3993 // CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 3994 // CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 3995 // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 3996 // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 3997 // CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 3998 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 3999 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 4000 // CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 4001 // CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 4002 // CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 4003 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 4004 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 4005 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4006 // CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 4007 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 4008 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 4009 // CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 4010 // CHECK12-NEXT: store i32 0, i32* [[I]], align 4 4011 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4012 // CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 4013 // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 4014 // CHECK12: omp.precond.then: 4015 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 4016 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4017 // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 4018 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 4019 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 4020 // CHECK12-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4021 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 4022 // CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 4023 // CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4024 // CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4025 // CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 4026 // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 4027 // CHECK12: cond.true: 4028 // CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4029 // CHECK12-NEXT: br label [[COND_END:%.*]] 4030 // CHECK12: cond.false: 4031 // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4032 // CHECK12-NEXT: br label [[COND_END]] 4033 // CHECK12: cond.end: 4034 // CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 4035 // CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 4036 // CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 4037 // CHECK12-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 4038 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 4039 // CHECK12: omp.inner.for.cond: 4040 // CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4041 // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4042 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 4043 // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 4044 // CHECK12: omp.inner.for.body: 4045 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 4046 // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4047 // CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 4048 // CHECK12-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 4049 // CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 4050 // CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]]) 4051 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 4052 // CHECK12: omp.inner.for.inc: 4053 // CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4054 // CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 4055 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] 4056 // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 4057 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] 4058 // CHECK12: omp.inner.for.end: 4059 // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 4060 // CHECK12: omp.loop.exit: 4061 // CHECK12-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4062 // CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 4063 // CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) 4064 // CHECK12-NEXT: br label [[OMP_PRECOND_END]] 4065 // CHECK12: omp.precond.end: 4066 // CHECK12-NEXT: ret void 4067 // 4068 // 4069 // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..1 4070 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { 4071 // CHECK12-NEXT: entry: 4072 // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 4073 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 4074 // CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 4075 // CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 4076 // CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 4077 // CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 4078 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 4079 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 4080 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 4081 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 4082 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 4083 // CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 4084 // CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 4085 // CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 4086 // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 4087 // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 4088 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 4089 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 4090 // CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 4091 // CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 4092 // CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 4093 // CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 4094 // CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 4095 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 4096 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 4097 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4098 // CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 4099 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 4100 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 4101 // CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 4102 // CHECK12-NEXT: store i32 0, i32* [[I]], align 4 4103 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4104 // CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 4105 // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 4106 // CHECK12: omp.precond.then: 4107 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 4108 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4109 // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 4110 // CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 4111 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 4112 // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 4113 // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 4114 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 4115 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 4116 // CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4117 // CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 4118 // CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 4119 // CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 4120 // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4121 // CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 4122 // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 4123 // CHECK12: cond.true: 4124 // CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4125 // CHECK12-NEXT: br label [[COND_END:%.*]] 4126 // CHECK12: cond.false: 4127 // CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 4128 // CHECK12-NEXT: br label [[COND_END]] 4129 // CHECK12: cond.end: 4130 // CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 4131 // CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 4132 // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 4133 // CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 4134 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 4135 // CHECK12: omp.inner.for.cond: 4136 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4137 // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 4138 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 4139 // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 4140 // CHECK12: omp.inner.for.body: 4141 // CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4142 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 4143 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 4144 // CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 4145 // CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 4146 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP17]] 4147 // CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 4148 // CHECK12-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4149 // CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 4150 // CHECK12-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) 4151 // CHECK12-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 4152 // CHECK12-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] 4153 // CHECK12: .cancel.exit: 4154 // CHECK12-NEXT: br label [[CANCEL_EXIT:%.*]] 4155 // CHECK12: .cancel.continue: 4156 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 4157 // CHECK12: omp.body.continue: 4158 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 4159 // CHECK12: omp.inner.for.inc: 4160 // CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4161 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 4162 // CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 4163 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] 4164 // CHECK12: omp.inner.for.end: 4165 // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 4166 // CHECK12: omp.loop.exit: 4167 // CHECK12-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4168 // CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 4169 // CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 4170 // CHECK12-NEXT: br label [[OMP_PRECOND_END]] 4171 // CHECK12: cancel.exit: 4172 // CHECK12-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4173 // CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 4174 // CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) 4175 // CHECK12-NEXT: br label [[CANCEL_CONT:%.*]] 4176 // CHECK12: omp.precond.end: 4177 // CHECK12-NEXT: br label [[CANCEL_CONT]] 4178 // CHECK12: cancel.cont: 4179 // CHECK12-NEXT: ret void 4180 // 4181 // 4182 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58 4183 // CHECK12-SAME: (i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { 4184 // CHECK12-NEXT: entry: 4185 // CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 4186 // CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 4187 // CHECK12-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 4188 // CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 4189 // CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 4190 // CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 4191 // CHECK12-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 4192 // CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 4193 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 4194 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[N_CASTED]], align 4 4195 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_CASTED]], align 4 4196 // CHECK12-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 4 4197 // CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) 4198 // CHECK12-NEXT: ret void 4199 // 4200 // 4201 // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..2 4202 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { 4203 // CHECK12-NEXT: entry: 4204 // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 4205 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 4206 // CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 4207 // CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 4208 // CHECK12-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 4209 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 4210 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 4211 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 4212 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 4213 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 4214 // CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 4215 // CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 4216 // CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 4217 // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 4218 // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 4219 // CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 4220 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 4221 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 4222 // CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 4223 // CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 4224 // CHECK12-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 4225 // CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 4226 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 4227 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 4228 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4229 // CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 4230 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 4231 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 4232 // CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 4233 // CHECK12-NEXT: store i32 0, i32* [[I]], align 4 4234 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4235 // CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 4236 // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 4237 // CHECK12: omp.precond.then: 4238 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 4239 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4240 // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 4241 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 4242 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 4243 // CHECK12-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4244 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 4245 // CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 4246 // CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4247 // CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4248 // CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 4249 // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 4250 // CHECK12: cond.true: 4251 // CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4252 // CHECK12-NEXT: br label [[COND_END:%.*]] 4253 // CHECK12: cond.false: 4254 // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4255 // CHECK12-NEXT: br label [[COND_END]] 4256 // CHECK12: cond.end: 4257 // CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 4258 // CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 4259 // CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 4260 // CHECK12-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 4261 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 4262 // CHECK12: omp.inner.for.cond: 4263 // CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4264 // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4265 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 4266 // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 4267 // CHECK12: omp.inner.for.body: 4268 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 4269 // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4270 // CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 4271 // CHECK12-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 4272 // CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 4273 // CHECK12-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 4274 // CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) 4275 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 4276 // CHECK12: omp.inner.for.inc: 4277 // CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4278 // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 4279 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] 4280 // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 4281 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] 4282 // CHECK12: omp.inner.for.end: 4283 // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 4284 // CHECK12: omp.loop.exit: 4285 // CHECK12-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4286 // CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 4287 // CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) 4288 // CHECK12-NEXT: br label [[OMP_PRECOND_END]] 4289 // CHECK12: omp.precond.end: 4290 // CHECK12-NEXT: ret void 4291 // 4292 // 4293 // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..3 4294 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { 4295 // CHECK12-NEXT: entry: 4296 // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 4297 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 4298 // CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 4299 // CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 4300 // CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 4301 // CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 4302 // CHECK12-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 4303 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 4304 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 4305 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 4306 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 4307 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 4308 // CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 4309 // CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 4310 // CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 4311 // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 4312 // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 4313 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 4314 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 4315 // CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 4316 // CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 4317 // CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 4318 // CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 4319 // CHECK12-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 4320 // CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 4321 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 4322 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 4323 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4324 // CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 4325 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 4326 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 4327 // CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 4328 // CHECK12-NEXT: store i32 0, i32* [[I]], align 4 4329 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4330 // CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 4331 // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 4332 // CHECK12: omp.precond.then: 4333 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 4334 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4335 // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 4336 // CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 4337 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 4338 // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 4339 // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 4340 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 4341 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 4342 // CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4343 // CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 4344 // CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 4345 // CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 4346 // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4347 // CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 4348 // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 4349 // CHECK12: cond.true: 4350 // CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4351 // CHECK12-NEXT: br label [[COND_END:%.*]] 4352 // CHECK12: cond.false: 4353 // CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 4354 // CHECK12-NEXT: br label [[COND_END]] 4355 // CHECK12: cond.end: 4356 // CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 4357 // CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 4358 // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 4359 // CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 4360 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 4361 // CHECK12: omp.inner.for.cond: 4362 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4363 // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 4364 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 4365 // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 4366 // CHECK12: omp.inner.for.body: 4367 // CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4368 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 4369 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 4370 // CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 4371 // CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 4372 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 4373 // CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 4374 // CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 4375 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] 4376 // CHECK12-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 4377 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 4378 // CHECK12: omp.body.continue: 4379 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 4380 // CHECK12: omp.inner.for.inc: 4381 // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4382 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 4383 // CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 4384 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] 4385 // CHECK12: omp.inner.for.end: 4386 // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 4387 // CHECK12: omp.loop.exit: 4388 // CHECK12-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4389 // CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 4390 // CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) 4391 // CHECK12-NEXT: br label [[OMP_PRECOND_END]] 4392 // CHECK12: omp.precond.end: 4393 // CHECK12-NEXT: ret void 4394 // 4395 // 4396 // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 4397 // CHECK13-SAME: (i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0:[0-9]+]] { 4398 // CHECK13-NEXT: entry: 4399 // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 4400 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 4401 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 4402 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 4403 // CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 4404 // CHECK13-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) 4405 // CHECK13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 4406 // CHECK13-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 4407 // CHECK13-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 4408 // CHECK13-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 4409 // CHECK13-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 4410 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 4411 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 4412 // CHECK13-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) 4413 // CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 4414 // CHECK13-NEXT: store i32 [[TMP4]], i32* [[N_CASTED]], align 4 4415 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_CASTED]], align 4 4416 // CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP5]], [1000 x i32]* [[TMP1]]) 4417 // CHECK13-NEXT: ret void 4418 // 4419 // 4420 // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. 4421 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { 4422 // CHECK13-NEXT: entry: 4423 // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 4424 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 4425 // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 4426 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 4427 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 4428 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 4429 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 4430 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 4431 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 4432 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 4433 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 4434 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 4435 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 4436 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 4437 // CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 4438 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 4439 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 4440 // CHECK13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 4441 // CHECK13-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 4442 // CHECK13-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 4443 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 4444 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 4445 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4446 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 4447 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 4448 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 4449 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 4450 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 4451 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4452 // CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 4453 // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 4454 // CHECK13: omp.precond.then: 4455 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 4456 // CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4457 // CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 4458 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 4459 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 4460 // CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4461 // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 4462 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 4463 // CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4464 // CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4465 // CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 4466 // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 4467 // CHECK13: cond.true: 4468 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4469 // CHECK13-NEXT: br label [[COND_END:%.*]] 4470 // CHECK13: cond.false: 4471 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4472 // CHECK13-NEXT: br label [[COND_END]] 4473 // CHECK13: cond.end: 4474 // CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 4475 // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 4476 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 4477 // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 4478 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 4479 // CHECK13: omp.inner.for.cond: 4480 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4481 // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4482 // CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 4483 // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 4484 // CHECK13: omp.inner.for.body: 4485 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 4486 // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4487 // CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 4488 // CHECK13-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 4489 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 4490 // CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]]) 4491 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 4492 // CHECK13: omp.inner.for.inc: 4493 // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4494 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 4495 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] 4496 // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 4497 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] 4498 // CHECK13: omp.inner.for.end: 4499 // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 4500 // CHECK13: omp.loop.exit: 4501 // CHECK13-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4502 // CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 4503 // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) 4504 // CHECK13-NEXT: br label [[OMP_PRECOND_END]] 4505 // CHECK13: omp.precond.end: 4506 // CHECK13-NEXT: ret void 4507 // 4508 // 4509 // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 4510 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { 4511 // CHECK13-NEXT: entry: 4512 // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 4513 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 4514 // CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 4515 // CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 4516 // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 4517 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 4518 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 4519 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 4520 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 4521 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 4522 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 4523 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 4524 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 4525 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 4526 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 4527 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 4528 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 4529 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 4530 // CHECK13-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 4531 // CHECK13-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 4532 // CHECK13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 4533 // CHECK13-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 4534 // CHECK13-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 4535 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 4536 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 4537 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4538 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 4539 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 4540 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 4541 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 4542 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 4543 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4544 // CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 4545 // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 4546 // CHECK13: omp.precond.then: 4547 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 4548 // CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4549 // CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 4550 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 4551 // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 4552 // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 4553 // CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 4554 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 4555 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 4556 // CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4557 // CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 4558 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 4559 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 4560 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4561 // CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 4562 // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 4563 // CHECK13: cond.true: 4564 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4565 // CHECK13-NEXT: br label [[COND_END:%.*]] 4566 // CHECK13: cond.false: 4567 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 4568 // CHECK13-NEXT: br label [[COND_END]] 4569 // CHECK13: cond.end: 4570 // CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 4571 // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 4572 // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 4573 // CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 4574 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 4575 // CHECK13: omp.inner.for.cond: 4576 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4577 // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 4578 // CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 4579 // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 4580 // CHECK13: omp.inner.for.body: 4581 // CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4582 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 4583 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 4584 // CHECK13-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 4585 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 4586 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP17]] 4587 // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 4588 // CHECK13-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4589 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 4590 // CHECK13-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) 4591 // CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 4592 // CHECK13-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] 4593 // CHECK13: .cancel.exit: 4594 // CHECK13-NEXT: br label [[CANCEL_EXIT:%.*]] 4595 // CHECK13: .cancel.continue: 4596 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 4597 // CHECK13: omp.body.continue: 4598 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 4599 // CHECK13: omp.inner.for.inc: 4600 // CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4601 // CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 4602 // CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 4603 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] 4604 // CHECK13: omp.inner.for.end: 4605 // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 4606 // CHECK13: omp.loop.exit: 4607 // CHECK13-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4608 // CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 4609 // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) 4610 // CHECK13-NEXT: br label [[OMP_PRECOND_END]] 4611 // CHECK13: cancel.exit: 4612 // CHECK13-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4613 // CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 4614 // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) 4615 // CHECK13-NEXT: br label [[CANCEL_CONT:%.*]] 4616 // CHECK13: omp.precond.end: 4617 // CHECK13-NEXT: br label [[CANCEL_CONT]] 4618 // CHECK13: cancel.cont: 4619 // CHECK13-NEXT: ret void 4620 // 4621 // 4622 // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58 4623 // CHECK13-SAME: (i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { 4624 // CHECK13-NEXT: entry: 4625 // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 4626 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 4627 // CHECK13-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 4628 // CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 4629 // CHECK13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 4630 // CHECK13-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 4631 // CHECK13-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 4632 // CHECK13-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 4633 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 4634 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[N_CASTED]], align 4 4635 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_CASTED]], align 4 4636 // CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 4 4637 // CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) 4638 // CHECK13-NEXT: ret void 4639 // 4640 // 4641 // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 4642 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { 4643 // CHECK13-NEXT: entry: 4644 // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 4645 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 4646 // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 4647 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 4648 // CHECK13-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 4649 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 4650 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 4651 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 4652 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 4653 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 4654 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 4655 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 4656 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 4657 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 4658 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 4659 // CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 4660 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 4661 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 4662 // CHECK13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 4663 // CHECK13-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 4664 // CHECK13-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 4665 // CHECK13-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 4666 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 4667 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 4668 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4669 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 4670 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 4671 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 4672 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 4673 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 4674 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4675 // CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 4676 // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 4677 // CHECK13: omp.precond.then: 4678 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 4679 // CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4680 // CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 4681 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 4682 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 4683 // CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4684 // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 4685 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 4686 // CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4687 // CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4688 // CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] 4689 // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 4690 // CHECK13: cond.true: 4691 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4692 // CHECK13-NEXT: br label [[COND_END:%.*]] 4693 // CHECK13: cond.false: 4694 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4695 // CHECK13-NEXT: br label [[COND_END]] 4696 // CHECK13: cond.end: 4697 // CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] 4698 // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 4699 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 4700 // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 4701 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 4702 // CHECK13: omp.inner.for.cond: 4703 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4704 // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4705 // CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] 4706 // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 4707 // CHECK13: omp.inner.for.body: 4708 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 4709 // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 4710 // CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 4711 // CHECK13-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 4712 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 4713 // CHECK13-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 4714 // CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) 4715 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 4716 // CHECK13: omp.inner.for.inc: 4717 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4718 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 4719 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] 4720 // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 4721 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] 4722 // CHECK13: omp.inner.for.end: 4723 // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 4724 // CHECK13: omp.loop.exit: 4725 // CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4726 // CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 4727 // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) 4728 // CHECK13-NEXT: br label [[OMP_PRECOND_END]] 4729 // CHECK13: omp.precond.end: 4730 // CHECK13-NEXT: ret void 4731 // 4732 // 4733 // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 4734 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { 4735 // CHECK13-NEXT: entry: 4736 // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 4737 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 4738 // CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 4739 // CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 4740 // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 4741 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 4742 // CHECK13-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 4743 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 4744 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 4745 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 4746 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 4747 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 4748 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 4749 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 4750 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 4751 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 4752 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 4753 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 4754 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 4755 // CHECK13-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 4756 // CHECK13-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 4757 // CHECK13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 4758 // CHECK13-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 4759 // CHECK13-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 4760 // CHECK13-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 4761 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 4762 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 4763 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4764 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 4765 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 4766 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 4767 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 4768 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 4769 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 4770 // CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] 4771 // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 4772 // CHECK13: omp.precond.then: 4773 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 4774 // CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4775 // CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 4776 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 4777 // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 4778 // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 4779 // CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 4780 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 4781 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 4782 // CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4783 // CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 4784 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 4785 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 4786 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4787 // CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] 4788 // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 4789 // CHECK13: cond.true: 4790 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 4791 // CHECK13-NEXT: br label [[COND_END:%.*]] 4792 // CHECK13: cond.false: 4793 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 4794 // CHECK13-NEXT: br label [[COND_END]] 4795 // CHECK13: cond.end: 4796 // CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] 4797 // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 4798 // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 4799 // CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 4800 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 4801 // CHECK13: omp.inner.for.cond: 4802 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4803 // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 4804 // CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] 4805 // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 4806 // CHECK13: omp.inner.for.body: 4807 // CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4808 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 4809 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 4810 // CHECK13-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 4811 // CHECK13-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 4812 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 4813 // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 4814 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 4815 // CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] 4816 // CHECK13-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 4817 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 4818 // CHECK13: omp.body.continue: 4819 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 4820 // CHECK13: omp.inner.for.inc: 4821 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 4822 // CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 4823 // CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 4824 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] 4825 // CHECK13: omp.inner.for.end: 4826 // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 4827 // CHECK13: omp.loop.exit: 4828 // CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 4829 // CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 4830 // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) 4831 // CHECK13-NEXT: br label [[OMP_PRECOND_END]] 4832 // CHECK13: omp.precond.end: 4833 // CHECK13-NEXT: ret void 4834 // 4835