1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // Test target codegen - host bc file has to be created first.
3 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
4 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1
5 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
6 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
7 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
8
9 // expected-no-diagnostics
10 #ifndef HEADER
11 #define HEADER
12
13 #define N 1000
14 #define M 10
15
16 template<typename tx>
ftemplate(int n)17 tx ftemplate(int n) {
18 tx a[N];
19 short aa[N];
20 tx b[10];
21 tx c[M][M];
22 tx f = n;
23 tx l;
24 int k;
25
26 #pragma omp target teams distribute parallel for simd lastprivate(l) dist_schedule(static,128) schedule(static,32)
27 for(int i = 0; i < n; i++) {
28 a[i] = 1;
29 l = i;
30 }
31
32 #pragma omp target teams distribute parallel for simd map(tofrom: aa) num_teams(M) thread_limit(64)
33 for(int i = 0; i < n; i++) {
34 aa[i] += 1;
35 }
36
37 #pragma omp target teams distribute parallel for simd map(tofrom:a, aa, b) if(target: n>40) proc_bind(spread)
38 for(int i = 0; i < 10; i++) {
39 b[i] += 1;
40 }
41
42 #pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k)
43 for(int i = 0; i < M; i++) {
44 for(int j = 0; j < M; j++) {
45 k = M;
46 c[i][j] = i+j*f+k;
47 }
48 }
49
50 return a[0];
51 }
52
bar(int n)53 int bar(int n){
54 int a = 0;
55
56 a += ftemplate<int>(n);
57
58 return a;
59 }
60
61 #endif
62 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26
63 // CHECK1-SAME: (i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR0:[0-9]+]] {
64 // CHECK1-NEXT: entry:
65 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
66 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8
67 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
68 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
69 // CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
70 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
71 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
72 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
73 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8
74 // CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8
75 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
76 // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8
77 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32*
78 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false)
79 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
80 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
81 // CHECK1: user_code.entry:
82 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]])
83 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
84 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32*
85 // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4
86 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
87 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4
88 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32*
89 // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4
90 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8
91 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
92 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
93 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
94 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
95 // CHECK1-NEXT: ret void
96 // CHECK1: worker.exit:
97 // CHECK1-NEXT: ret void
98 //
99 //
100 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__
101 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] {
102 // CHECK1-NEXT: entry:
103 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
104 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
105 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
106 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8
107 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
108 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
109 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
110 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
111 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
112 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
113 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
114 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
115 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
116 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
117 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4
118 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
119 // CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
120 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8
121 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
122 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
123 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
124 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8
125 // CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8
126 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
127 // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8
128 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32*
129 // CHECK1-NEXT: [[L2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4)
130 // CHECK1-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32*
131 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4
132 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4
133 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
134 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
135 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
136 // CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
137 // CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4
138 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4
139 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
140 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
141 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
142 // CHECK1: omp.precond.then:
143 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
144 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
145 // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4
146 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
147 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
148 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
149 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
150 // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128)
151 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
152 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
153 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
154 // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
155 // CHECK1: cond.true:
156 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
157 // CHECK1-NEXT: br label [[COND_END:%.*]]
158 // CHECK1: cond.false:
159 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
160 // CHECK1-NEXT: br label [[COND_END]]
161 // CHECK1: cond.end:
162 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
163 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
164 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
165 // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4
166 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
167 // CHECK1: omp.inner.for.cond:
168 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
169 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12
170 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
171 // CHECK1-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
172 // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
173 // CHECK1: omp.inner.for.body:
174 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12
175 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
176 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12
177 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
178 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12
179 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32*
180 // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !12
181 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12
182 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !12
183 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32*
184 // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !12
185 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8, !llvm.access.group !12
186 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
187 // CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to i8*
188 // CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !llvm.access.group !12
189 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
190 // CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8*
191 // CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !llvm.access.group !12
192 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
193 // CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8*
194 // CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8, !llvm.access.group !12
195 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
196 // CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8*
197 // CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8, !llvm.access.group !12
198 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
199 // CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP21]] to i8*
200 // CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8, !llvm.access.group !12
201 // CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !12
202 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4, !llvm.access.group !12
203 // CHECK1-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
204 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5), !llvm.access.group !12
205 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
206 // CHECK1: omp.inner.for.inc:
207 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
208 // CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12
209 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
210 // CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
211 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12
212 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12
213 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
214 // CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12
215 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12
216 // CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12
217 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP39]], [[TMP40]]
218 // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12
219 // CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12
220 // CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12
221 // CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]]
222 // CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]]
223 // CHECK1: cond.true14:
224 // CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12
225 // CHECK1-NEXT: br label [[COND_END16:%.*]]
226 // CHECK1: cond.false15:
227 // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12
228 // CHECK1-NEXT: br label [[COND_END16]]
229 // CHECK1: cond.end16:
230 // CHECK1-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE14]] ], [ [[TMP44]], [[COND_FALSE15]] ]
231 // CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12
232 // CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12
233 // CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
234 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
235 // CHECK1: omp.inner.for.end:
236 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
237 // CHECK1: omp.loop.exit:
238 // CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
239 // CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4
240 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]])
241 // CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
242 // CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0
243 // CHECK1-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
244 // CHECK1: .omp.final.then:
245 // CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
246 // CHECK1-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP50]], 0
247 // CHECK1-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1
248 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV19]], 1
249 // CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL]]
250 // CHECK1-NEXT: store i32 [[ADD20]], i32* [[I5]], align 4
251 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
252 // CHECK1: .omp.final.done:
253 // CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
254 // CHECK1-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0
255 // CHECK1-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
256 // CHECK1: .omp.lastprivate.then:
257 // CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[CONV1]], align 4
258 // CHECK1-NEXT: store i32 [[TMP53]], i32* [[CONV1]], align 4
259 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
260 // CHECK1: .omp.lastprivate.done:
261 // CHECK1-NEXT: br label [[OMP_PRECOND_END]]
262 // CHECK1: omp.precond.end:
263 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[L2]], i64 4)
264 // CHECK1-NEXT: ret void
265 //
266 //
267 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1
268 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] {
269 // CHECK1-NEXT: entry:
270 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
271 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
272 // CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
273 // CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
274 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
275 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8
276 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
277 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
278 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
279 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
280 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
281 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
282 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
283 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
284 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
285 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
286 // CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4
287 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
288 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
289 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
290 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
291 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
292 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8
293 // CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8
294 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
295 // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8
296 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32*
297 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4
298 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4
299 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
300 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
301 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
302 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
303 // CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
304 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4
305 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
306 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
307 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
308 // CHECK1: omp.precond.then:
309 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
310 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
311 // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4
312 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
313 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32
314 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
315 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32
316 // CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4
317 // CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4
318 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
319 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
320 // CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
321 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
322 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32)
323 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
324 // CHECK1: omp.dispatch.cond:
325 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
326 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
327 // CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP10]] to i32
328 // CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP9]], [[CONV7]]
329 // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
330 // CHECK1: cond.true:
331 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
332 // CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP11]] to i32
333 // CHECK1-NEXT: br label [[COND_END:%.*]]
334 // CHECK1: cond.false:
335 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
336 // CHECK1-NEXT: br label [[COND_END]]
337 // CHECK1: cond.end:
338 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
339 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
340 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
341 // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
342 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
343 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
344 // CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
345 // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
346 // CHECK1: omp.dispatch.body:
347 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
348 // CHECK1: omp.inner.for.cond:
349 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16
350 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16
351 // CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
352 // CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
353 // CHECK1: omp.inner.for.body:
354 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16
355 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
356 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
357 // CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !16
358 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !16
359 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64
360 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]]
361 // CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16
362 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !16
363 // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4, !llvm.access.group !16
364 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
365 // CHECK1: omp.body.continue:
366 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
367 // CHECK1: omp.inner.for.inc:
368 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16
369 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1
370 // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16
371 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
372 // CHECK1: omp.inner.for.end:
373 // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
374 // CHECK1: omp.dispatch.inc:
375 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
376 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
377 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
378 // CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4
379 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
380 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
381 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
382 // CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4
383 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
384 // CHECK1: omp.dispatch.end:
385 // CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
386 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4
387 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]])
388 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
389 // CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
390 // CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
391 // CHECK1: .omp.final.then:
392 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
393 // CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP30]], 0
394 // CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
395 // CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1
396 // CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]]
397 // CHECK1-NEXT: store i32 [[ADD18]], i32* [[I6]], align 4
398 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
399 // CHECK1: .omp.final.done:
400 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
401 // CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
402 // CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
403 // CHECK1: .omp.lastprivate.then:
404 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[CONV1]], align 4
405 // CHECK1-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 4
406 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
407 // CHECK1: .omp.lastprivate.done:
408 // CHECK1-NEXT: br label [[OMP_PRECOND_END]]
409 // CHECK1: omp.precond.end:
410 // CHECK1-NEXT: ret void
411 //
412 //
413 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32
414 // CHECK1-SAME: (i64 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR6:[0-9]+]] {
415 // CHECK1-NEXT: entry:
416 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
417 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8
418 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
419 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
420 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
421 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
422 // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8
423 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
424 // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8
425 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
426 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
427 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
428 // CHECK1: user_code.entry:
429 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]])
430 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
431 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
432 // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4
433 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
434 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
435 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
436 // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR3]]
437 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
438 // CHECK1-NEXT: ret void
439 // CHECK1: worker.exit:
440 // CHECK1-NEXT: ret void
441 //
442 //
443 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2
444 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
445 // CHECK1-NEXT: entry:
446 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
447 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
448 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
449 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8
450 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
451 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
452 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
453 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
454 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
455 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
456 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
457 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
458 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
459 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
460 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
461 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8
462 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
463 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
464 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
465 // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8
466 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
467 // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8
468 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4
469 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4
470 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
471 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
472 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
473 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
474 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
475 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4
476 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
477 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
478 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
479 // CHECK1: omp.precond.then:
480 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
481 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
482 // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4
483 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
484 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
485 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
486 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
487 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
488 // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
489 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
490 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
491 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
492 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
493 // CHECK1: cond.true:
494 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
495 // CHECK1-NEXT: br label [[COND_END:%.*]]
496 // CHECK1: cond.false:
497 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
498 // CHECK1-NEXT: br label [[COND_END]]
499 // CHECK1: cond.end:
500 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
501 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
502 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
503 // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4
504 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
505 // CHECK1: omp.inner.for.cond:
506 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
507 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19
508 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
509 // CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
510 // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
511 // CHECK1: omp.inner.for.body:
512 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19
513 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
514 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
515 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
516 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !19
517 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32*
518 // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !19
519 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !19
520 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
521 // CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8*
522 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !llvm.access.group !19
523 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
524 // CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8*
525 // CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !llvm.access.group !19
526 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
527 // CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8*
528 // CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !llvm.access.group !19
529 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
530 // CHECK1-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8*
531 // CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8, !llvm.access.group !19
532 // CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19
533 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4, !llvm.access.group !19
534 // CHECK1-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
535 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4), !llvm.access.group !19
536 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
537 // CHECK1: omp.inner.for.inc:
538 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
539 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19
540 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
541 // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
542 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19
543 // CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19
544 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
545 // CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19
546 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
547 // CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19
548 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
549 // CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
550 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
551 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19
552 // CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]]
553 // CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]]
554 // CHECK1: cond.true11:
555 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19
556 // CHECK1-NEXT: br label [[COND_END13:%.*]]
557 // CHECK1: cond.false12:
558 // CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
559 // CHECK1-NEXT: br label [[COND_END13]]
560 // CHECK1: cond.end13:
561 // CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ]
562 // CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
563 // CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19
564 // CHECK1-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
565 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
566 // CHECK1: omp.inner.for.end:
567 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
568 // CHECK1: omp.loop.exit:
569 // CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
570 // CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4
571 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP43]])
572 // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
573 // CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
574 // CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
575 // CHECK1: .omp.final.then:
576 // CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
577 // CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP46]], 0
578 // CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
579 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1
580 // CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]]
581 // CHECK1-NEXT: store i32 [[ADD17]], i32* [[I3]], align 4
582 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
583 // CHECK1: .omp.final.done:
584 // CHECK1-NEXT: br label [[OMP_PRECOND_END]]
585 // CHECK1: omp.precond.end:
586 // CHECK1-NEXT: ret void
587 //
588 //
589 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3
590 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
591 // CHECK1-NEXT: entry:
592 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
593 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
594 // CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
595 // CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
596 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
597 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8
598 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
599 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
600 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
601 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
602 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
603 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
604 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
605 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
606 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
607 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4
608 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
609 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
610 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
611 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
612 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8
613 // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8
614 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
615 // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8
616 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4
617 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4
618 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
619 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
620 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
621 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
622 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
623 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4
624 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
625 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
626 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
627 // CHECK1: omp.precond.then:
628 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
629 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
630 // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4
631 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
632 // CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32
633 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
634 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32
635 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4
636 // CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4
637 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
638 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
639 // CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
640 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
641 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
642 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
643 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4
644 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
645 // CHECK1: omp.inner.for.cond:
646 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
647 // CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64
648 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !22
649 // CHECK1-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]]
650 // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
651 // CHECK1: omp.inner.for.body:
652 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
653 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
654 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
655 // CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !22
656 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !22
657 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
658 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]]
659 // CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22
660 // CHECK1-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32
661 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1
662 // CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16
663 // CHECK1-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22
664 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
665 // CHECK1: omp.body.continue:
666 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
667 // CHECK1: omp.inner.for.inc:
668 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
669 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22
670 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
671 // CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
672 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
673 // CHECK1: omp.inner.for.end:
674 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
675 // CHECK1: omp.loop.exit:
676 // CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
677 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4
678 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]])
679 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
680 // CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
681 // CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
682 // CHECK1: .omp.final.then:
683 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
684 // CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP21]], 0
685 // CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
686 // CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
687 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
688 // CHECK1-NEXT: store i32 [[ADD15]], i32* [[I5]], align 4
689 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
690 // CHECK1: .omp.final.done:
691 // CHECK1-NEXT: br label [[OMP_PRECOND_END]]
692 // CHECK1: omp.precond.end:
693 // CHECK1-NEXT: ret void
694 //
695 //
696 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37
697 // CHECK1-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
698 // CHECK1-NEXT: entry:
699 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
700 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
701 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
702 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
703 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
704 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
705 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
706 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
707 // CHECK1: user_code.entry:
708 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]])
709 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
710 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
711 // CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR3]]
712 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
713 // CHECK1-NEXT: ret void
714 // CHECK1: worker.exit:
715 // CHECK1-NEXT: ret void
716 //
717 //
718 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4
719 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
720 // CHECK1-NEXT: entry:
721 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
722 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
723 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
724 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
725 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
726 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
727 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
728 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
729 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
730 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
731 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8
732 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
733 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
734 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
735 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
736 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
737 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4
738 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
739 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
740 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
741 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
742 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
743 // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
744 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
745 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
746 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
747 // CHECK1: cond.true:
748 // CHECK1-NEXT: br label [[COND_END:%.*]]
749 // CHECK1: cond.false:
750 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
751 // CHECK1-NEXT: br label [[COND_END]]
752 // CHECK1: cond.end:
753 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
754 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
755 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
756 // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
757 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
758 // CHECK1: omp.inner.for.cond:
759 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
760 // CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
761 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
762 // CHECK1: omp.inner.for.body:
763 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25
764 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
765 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
766 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
767 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
768 // CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8*
769 // CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8, !llvm.access.group !25
770 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
771 // CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8*
772 // CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group !25
773 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
774 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
775 // CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !llvm.access.group !25
776 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
777 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3), !llvm.access.group !25
778 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
779 // CHECK1: omp.inner.for.inc:
780 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
781 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25
782 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
783 // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
784 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25
785 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25
786 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
787 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25
788 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
789 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25
790 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
791 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
792 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
793 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9
794 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
795 // CHECK1: cond.true5:
796 // CHECK1-NEXT: br label [[COND_END7:%.*]]
797 // CHECK1: cond.false6:
798 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
799 // CHECK1-NEXT: br label [[COND_END7]]
800 // CHECK1: cond.end7:
801 // CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ]
802 // CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
803 // CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25
804 // CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
805 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
806 // CHECK1: omp.inner.for.end:
807 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
808 // CHECK1: omp.loop.exit:
809 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]])
810 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
811 // CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
812 // CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
813 // CHECK1: .omp.final.then:
814 // CHECK1-NEXT: store i32 10, i32* [[I]], align 4
815 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
816 // CHECK1: .omp.final.done:
817 // CHECK1-NEXT: ret void
818 //
819 //
820 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__5
821 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
822 // CHECK1-NEXT: entry:
823 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
824 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
825 // CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
826 // CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
827 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
828 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
829 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
830 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
831 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
832 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
833 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
834 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
835 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
836 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
837 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
838 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
839 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
840 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
841 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
842 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4
843 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
844 // CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
845 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
846 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
847 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4
848 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4
849 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
850 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
851 // CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
852 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
853 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
854 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
855 // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
856 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
857 // CHECK1: omp.inner.for.cond:
858 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
859 // CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64
860 // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !28
861 // CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]]
862 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
863 // CHECK1: omp.inner.for.body:
864 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
865 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
866 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
867 // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28
868 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28
869 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
870 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]]
871 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28
872 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
873 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28
874 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
875 // CHECK1: omp.body.continue:
876 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
877 // CHECK1: omp.inner.for.inc:
878 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
879 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28
880 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
881 // CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
882 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
883 // CHECK1: omp.inner.for.end:
884 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
885 // CHECK1: omp.loop.exit:
886 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]])
887 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
888 // CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
889 // CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
890 // CHECK1: .omp.final.then:
891 // CHECK1-NEXT: store i32 10, i32* [[I]], align 4
892 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
893 // CHECK1: .omp.final.done:
894 // CHECK1-NEXT: ret void
895 //
896 //
897 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
898 // CHECK1-SAME: ([10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR0]] {
899 // CHECK1-NEXT: entry:
900 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8
901 // CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
902 // CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
903 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
904 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
905 // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8
906 // CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8
907 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8
908 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32*
909 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
910 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
911 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
912 // CHECK1: user_code.entry:
913 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]])
914 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
915 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32*
916 // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4
917 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8
918 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
919 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
920 // CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
921 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
922 // CHECK1-NEXT: ret void
923 // CHECK1: worker.exit:
924 // CHECK1-NEXT: ret void
925 //
926 //
927 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__6
928 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
929 // CHECK1-NEXT: entry:
930 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
931 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
932 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8
933 // CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
934 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
935 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
936 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
937 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
938 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
939 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
940 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
941 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
942 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
943 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
944 // CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
945 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8
946 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
947 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
948 // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8
949 // CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8
950 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8
951 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32*
952 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
953 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4
954 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
955 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
956 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
957 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
958 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
959 // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
960 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
961 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
962 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
963 // CHECK1: cond.true:
964 // CHECK1-NEXT: br label [[COND_END:%.*]]
965 // CHECK1: cond.false:
966 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
967 // CHECK1-NEXT: br label [[COND_END]]
968 // CHECK1: cond.end:
969 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
970 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
971 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
972 // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
973 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
974 // CHECK1: omp.inner.for.cond:
975 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
976 // CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100
977 // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
978 // CHECK1: omp.inner.for.body:
979 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31
980 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
981 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
982 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
983 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !31
984 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32*
985 // CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4, !llvm.access.group !31
986 // CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8, !llvm.access.group !31
987 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
988 // CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8*
989 // CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group !31
990 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
991 // CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8*
992 // CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !llvm.access.group !31
993 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
994 // CHECK1-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8*
995 // CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8, !llvm.access.group !31
996 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
997 // CHECK1-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8*
998 // CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8, !llvm.access.group !31
999 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
1000 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4), !llvm.access.group !31
1001 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1002 // CHECK1: omp.inner.for.inc:
1003 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
1004 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31
1005 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
1006 // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
1007 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31
1008 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31
1009 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
1010 // CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31
1011 // CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
1012 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31
1013 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]]
1014 // CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
1015 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
1016 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99
1017 // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
1018 // CHECK1: cond.true7:
1019 // CHECK1-NEXT: br label [[COND_END9:%.*]]
1020 // CHECK1: cond.false8:
1021 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
1022 // CHECK1-NEXT: br label [[COND_END9]]
1023 // CHECK1: cond.end9:
1024 // CHECK1-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ]
1025 // CHECK1-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
1026 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31
1027 // CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
1028 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
1029 // CHECK1: omp.inner.for.end:
1030 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1031 // CHECK1: omp.loop.exit:
1032 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]])
1033 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
1034 // CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
1035 // CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1036 // CHECK1: .omp.final.then:
1037 // CHECK1-NEXT: store i32 10, i32* [[I]], align 4
1038 // CHECK1-NEXT: store i32 10, i32* [[J]], align 4
1039 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
1040 // CHECK1: .omp.final.done:
1041 // CHECK1-NEXT: ret void
1042 //
1043 //
1044 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__7
1045 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
1046 // CHECK1-NEXT: entry:
1047 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1048 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1049 // CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
1050 // CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
1051 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8
1052 // CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
1053 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1054 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
1055 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
1056 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1057 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1058 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1059 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1060 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
1061 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
1062 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
1063 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
1064 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
1065 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
1066 // CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
1067 // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8
1068 // CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8
1069 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8
1070 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32*
1071 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
1072 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4
1073 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
1074 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32
1075 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
1076 // CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP2]] to i32
1077 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4
1078 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4
1079 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1080 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1081 // CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
1082 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
1083 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
1084 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
1085 // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
1086 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1087 // CHECK1: omp.inner.for.cond:
1088 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
1089 // CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64
1090 // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !34
1091 // CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]]
1092 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1093 // CHECK1: omp.inner.for.body:
1094 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
1095 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10
1096 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
1097 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1098 // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34
1099 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
1100 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
1101 // CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10
1102 // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10
1103 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]]
1104 // CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1
1105 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]]
1106 // CHECK1-NEXT: store i32 [[ADD8]], i32* [[J]], align 4, !llvm.access.group !34
1107 // CHECK1-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34
1108 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34
1109 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34
1110 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34
1111 // CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]]
1112 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]]
1113 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34
1114 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]]
1115 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34
1116 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64
1117 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]]
1118 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34
1119 // CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64
1120 // CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]]
1121 // CHECK1-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4, !llvm.access.group !34
1122 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1123 // CHECK1: omp.body.continue:
1124 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1125 // CHECK1: omp.inner.for.inc:
1126 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
1127 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34
1128 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
1129 // CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
1130 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
1131 // CHECK1: omp.inner.for.end:
1132 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1133 // CHECK1: omp.loop.exit:
1134 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]])
1135 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
1136 // CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
1137 // CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1138 // CHECK1: .omp.final.then:
1139 // CHECK1-NEXT: store i32 10, i32* [[I]], align 4
1140 // CHECK1-NEXT: store i32 10, i32* [[J]], align 4
1141 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
1142 // CHECK1: .omp.final.done:
1143 // CHECK1-NEXT: ret void
1144 //
1145 //
1146 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26
1147 // CHECK2-SAME: (i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR0:[0-9]+]] {
1148 // CHECK2-NEXT: entry:
1149 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1150 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4
1151 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
1152 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
1153 // CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
1154 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1155 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1156 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
1157 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4
1158 // CHECK2-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4
1159 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4
1160 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false)
1161 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1162 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1163 // CHECK2: user_code.entry:
1164 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]])
1165 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
1166 // CHECK2-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4
1167 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4
1168 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4
1169 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4
1170 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4
1171 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
1172 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
1173 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
1174 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
1175 // CHECK2-NEXT: ret void
1176 // CHECK2: worker.exit:
1177 // CHECK2-NEXT: ret void
1178 //
1179 //
1180 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__
1181 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] {
1182 // CHECK2-NEXT: entry:
1183 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
1184 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
1185 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1186 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4
1187 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
1188 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1189 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1190 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1191 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
1192 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1193 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
1194 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
1195 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1196 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1197 // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4
1198 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
1199 // CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
1200 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4
1201 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
1202 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
1203 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
1204 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4
1205 // CHECK2-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4
1206 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4
1207 // CHECK2-NEXT: [[L1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4)
1208 // CHECK2-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32*
1209 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
1210 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4
1211 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
1212 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
1213 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1214 // CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
1215 // CHECK2-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
1216 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4
1217 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
1218 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
1219 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1220 // CHECK2: omp.precond.then:
1221 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
1222 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
1223 // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4
1224 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1225 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1226 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1227 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
1228 // CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128)
1229 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1230 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
1231 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
1232 // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1233 // CHECK2: cond.true:
1234 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
1235 // CHECK2-NEXT: br label [[COND_END:%.*]]
1236 // CHECK2: cond.false:
1237 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1238 // CHECK2-NEXT: br label [[COND_END]]
1239 // CHECK2: cond.end:
1240 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
1241 // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
1242 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
1243 // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4
1244 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1245 // CHECK2: omp.inner.for.cond:
1246 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
1247 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12
1248 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
1249 // CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
1250 // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1251 // CHECK2: omp.inner.for.body:
1252 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12
1253 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12
1254 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !12
1255 // CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !12
1256 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !12
1257 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4, !llvm.access.group !12
1258 // CHECK2-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4, !llvm.access.group !12
1259 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4, !llvm.access.group !12
1260 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
1261 // CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8*
1262 // CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !12
1263 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
1264 // CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8*
1265 // CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !12
1266 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
1267 // CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8*
1268 // CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !12
1269 // CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
1270 // CHECK2-NEXT: [[TMP27:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8*
1271 // CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4, !llvm.access.group !12
1272 // CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4
1273 // CHECK2-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8*
1274 // CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4, !llvm.access.group !12
1275 // CHECK2-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !12
1276 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !llvm.access.group !12
1277 // CHECK2-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
1278 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5), !llvm.access.group !12
1279 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1280 // CHECK2: omp.inner.for.inc:
1281 // CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
1282 // CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12
1283 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
1284 // CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
1285 // CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12
1286 // CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12
1287 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
1288 // CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12
1289 // CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12
1290 // CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12
1291 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
1292 // CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12
1293 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12
1294 // CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12
1295 // CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]]
1296 // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]]
1297 // CHECK2: cond.true11:
1298 // CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12
1299 // CHECK2-NEXT: br label [[COND_END13:%.*]]
1300 // CHECK2: cond.false12:
1301 // CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12
1302 // CHECK2-NEXT: br label [[COND_END13]]
1303 // CHECK2: cond.end13:
1304 // CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ]
1305 // CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12
1306 // CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12
1307 // CHECK2-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12
1308 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
1309 // CHECK2: omp.inner.for.end:
1310 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1311 // CHECK2: omp.loop.exit:
1312 // CHECK2-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1313 // CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4
1314 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]])
1315 // CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
1316 // CHECK2-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
1317 // CHECK2-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1318 // CHECK2: .omp.final.then:
1319 // CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
1320 // CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP48]], 0
1321 // CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
1322 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1
1323 // CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]]
1324 // CHECK2-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4
1325 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
1326 // CHECK2: .omp.final.done:
1327 // CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
1328 // CHECK2-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
1329 // CHECK2-NEXT: br i1 [[TMP50]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
1330 // CHECK2: .omp.lastprivate.then:
1331 // CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[L_ADDR]], align 4
1332 // CHECK2-NEXT: store i32 [[TMP51]], i32* [[L_ADDR]], align 4
1333 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
1334 // CHECK2: .omp.lastprivate.done:
1335 // CHECK2-NEXT: br label [[OMP_PRECOND_END]]
1336 // CHECK2: omp.precond.end:
1337 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[L1]], i32 4)
1338 // CHECK2-NEXT: ret void
1339 //
1340 //
1341 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1
1342 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1]] {
1343 // CHECK2-NEXT: entry:
1344 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
1345 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
1346 // CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
1347 // CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
1348 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1349 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4
1350 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
1351 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1352 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1353 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1354 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
1355 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1356 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1357 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1358 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1359 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1360 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
1361 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
1362 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
1363 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
1364 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1365 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
1366 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4
1367 // CHECK2-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4
1368 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4
1369 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
1370 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4
1371 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
1372 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
1373 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1374 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
1375 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
1376 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4
1377 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
1378 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
1379 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1380 // CHECK2: omp.precond.then:
1381 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
1382 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
1383 // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4
1384 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
1385 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1386 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4
1387 // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
1388 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1389 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1390 // CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1391 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
1392 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32)
1393 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
1394 // CHECK2: omp.dispatch.cond:
1395 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1396 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1397 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
1398 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1399 // CHECK2: cond.true:
1400 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1401 // CHECK2-NEXT: br label [[COND_END:%.*]]
1402 // CHECK2: cond.false:
1403 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1404 // CHECK2-NEXT: br label [[COND_END]]
1405 // CHECK2: cond.end:
1406 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
1407 // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
1408 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
1409 // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
1410 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1411 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1412 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
1413 // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
1414 // CHECK2: omp.dispatch.body:
1415 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1416 // CHECK2: omp.inner.for.cond:
1417 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16
1418 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16
1419 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
1420 // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1421 // CHECK2: omp.inner.for.body:
1422 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16
1423 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
1424 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1425 // CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16
1426 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16
1427 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]]
1428 // CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16
1429 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16
1430 // CHECK2-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4, !llvm.access.group !16
1431 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1432 // CHECK2: omp.body.continue:
1433 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1434 // CHECK2: omp.inner.for.inc:
1435 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16
1436 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1
1437 // CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16
1438 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]]
1439 // CHECK2: omp.inner.for.end:
1440 // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
1441 // CHECK2: omp.dispatch.inc:
1442 // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
1443 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
1444 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
1445 // CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4
1446 // CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1447 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
1448 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
1449 // CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4
1450 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]]
1451 // CHECK2: omp.dispatch.end:
1452 // CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1453 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4
1454 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]])
1455 // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
1456 // CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
1457 // CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1458 // CHECK2: .omp.final.then:
1459 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
1460 // CHECK2-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP30]], 0
1461 // CHECK2-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
1462 // CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
1463 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
1464 // CHECK2-NEXT: store i32 [[ADD13]], i32* [[I3]], align 4
1465 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
1466 // CHECK2: .omp.final.done:
1467 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
1468 // CHECK2-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
1469 // CHECK2-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
1470 // CHECK2: .omp.lastprivate.then:
1471 // CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[L_ADDR]], align 4
1472 // CHECK2-NEXT: store i32 [[TMP33]], i32* [[L_ADDR]], align 4
1473 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
1474 // CHECK2: .omp.lastprivate.done:
1475 // CHECK2-NEXT: br label [[OMP_PRECOND_END]]
1476 // CHECK2: omp.precond.end:
1477 // CHECK2-NEXT: ret void
1478 //
1479 //
1480 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32
1481 // CHECK2-SAME: (i32 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR6:[0-9]+]] {
1482 // CHECK2-NEXT: entry:
1483 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1484 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4
1485 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
1486 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1487 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1488 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
1489 // CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4
1490 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4
1491 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
1492 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1493 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1494 // CHECK2: user_code.entry:
1495 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]])
1496 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
1497 // CHECK2-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4
1498 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4
1499 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
1500 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
1501 // CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR3]]
1502 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
1503 // CHECK2-NEXT: ret void
1504 // CHECK2: worker.exit:
1505 // CHECK2-NEXT: ret void
1506 //
1507 //
1508 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2
1509 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
1510 // CHECK2-NEXT: entry:
1511 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
1512 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
1513 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1514 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4
1515 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1516 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1517 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1518 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
1519 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1520 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
1521 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
1522 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1523 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1524 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
1525 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
1526 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4
1527 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
1528 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
1529 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
1530 // CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4
1531 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4
1532 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
1533 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4
1534 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
1535 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
1536 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1537 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
1538 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
1539 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4
1540 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
1541 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
1542 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1543 // CHECK2: omp.precond.then:
1544 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
1545 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
1546 // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4
1547 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1548 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1549 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1550 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1551 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
1552 // CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
1553 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1554 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
1555 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
1556 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1557 // CHECK2: cond.true:
1558 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
1559 // CHECK2-NEXT: br label [[COND_END:%.*]]
1560 // CHECK2: cond.false:
1561 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1562 // CHECK2-NEXT: br label [[COND_END]]
1563 // CHECK2: cond.end:
1564 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
1565 // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
1566 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
1567 // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4
1568 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1569 // CHECK2: omp.inner.for.cond:
1570 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
1571 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19
1572 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
1573 // CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
1574 // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1575 // CHECK2: omp.inner.for.body:
1576 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19
1577 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
1578 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !19
1579 // CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !19
1580 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !19
1581 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
1582 // CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8*
1583 // CHECK2-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4, !llvm.access.group !19
1584 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
1585 // CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8*
1586 // CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !19
1587 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
1588 // CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8*
1589 // CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !19
1590 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
1591 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8*
1592 // CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !19
1593 // CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !19
1594 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, !llvm.access.group !19
1595 // CHECK2-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
1596 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4), !llvm.access.group !19
1597 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1598 // CHECK2: omp.inner.for.inc:
1599 // CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
1600 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19
1601 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
1602 // CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
1603 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19
1604 // CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19
1605 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
1606 // CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19
1607 // CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
1608 // CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19
1609 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
1610 // CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
1611 // CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
1612 // CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19
1613 // CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]]
1614 // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
1615 // CHECK2: cond.true10:
1616 // CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19
1617 // CHECK2-NEXT: br label [[COND_END12:%.*]]
1618 // CHECK2: cond.false11:
1619 // CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
1620 // CHECK2-NEXT: br label [[COND_END12]]
1621 // CHECK2: cond.end12:
1622 // CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ]
1623 // CHECK2-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19
1624 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19
1625 // CHECK2-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19
1626 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]]
1627 // CHECK2: omp.inner.for.end:
1628 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1629 // CHECK2: omp.loop.exit:
1630 // CHECK2-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1631 // CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4
1632 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]])
1633 // CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
1634 // CHECK2-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
1635 // CHECK2-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1636 // CHECK2: .omp.final.then:
1637 // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
1638 // CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0
1639 // CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
1640 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1
1641 // CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]]
1642 // CHECK2-NEXT: store i32 [[ADD16]], i32* [[I3]], align 4
1643 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
1644 // CHECK2: .omp.final.done:
1645 // CHECK2-NEXT: br label [[OMP_PRECOND_END]]
1646 // CHECK2: omp.precond.end:
1647 // CHECK2-NEXT: ret void
1648 //
1649 //
1650 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3
1651 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
1652 // CHECK2-NEXT: entry:
1653 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
1654 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
1655 // CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
1656 // CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
1657 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
1658 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4
1659 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1660 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1661 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
1662 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
1663 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1664 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1665 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1666 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1667 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1668 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
1669 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
1670 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
1671 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
1672 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1673 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4
1674 // CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4
1675 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4
1676 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
1677 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4
1678 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
1679 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
1680 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1681 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
1682 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
1683 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4
1684 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
1685 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
1686 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1687 // CHECK2: omp.precond.then:
1688 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
1689 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
1690 // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4
1691 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
1692 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1693 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4
1694 // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
1695 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1696 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1697 // CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1698 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
1699 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
1700 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
1701 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4
1702 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1703 // CHECK2: omp.inner.for.cond:
1704 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
1705 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !22
1706 // CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]]
1707 // CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1708 // CHECK2: omp.inner.for.body:
1709 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
1710 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
1711 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1712 // CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22
1713 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22
1714 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]]
1715 // CHECK2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22
1716 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32
1717 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1
1718 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16
1719 // CHECK2-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22
1720 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1721 // CHECK2: omp.body.continue:
1722 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1723 // CHECK2: omp.inner.for.inc:
1724 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
1725 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22
1726 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
1727 // CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22
1728 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
1729 // CHECK2: omp.inner.for.end:
1730 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1731 // CHECK2: omp.loop.exit:
1732 // CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1733 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4
1734 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]])
1735 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
1736 // CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
1737 // CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1738 // CHECK2: .omp.final.then:
1739 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
1740 // CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0
1741 // CHECK2-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1
1742 // CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1
1743 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
1744 // CHECK2-NEXT: store i32 [[ADD11]], i32* [[I3]], align 4
1745 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
1746 // CHECK2: .omp.final.done:
1747 // CHECK2-NEXT: br label [[OMP_PRECOND_END]]
1748 // CHECK2: omp.precond.end:
1749 // CHECK2-NEXT: ret void
1750 //
1751 //
1752 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37
1753 // CHECK2-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
1754 // CHECK2-NEXT: entry:
1755 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
1756 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1757 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1758 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
1759 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
1760 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
1761 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1762 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1763 // CHECK2: user_code.entry:
1764 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]])
1765 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
1766 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
1767 // CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR3]]
1768 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
1769 // CHECK2-NEXT: ret void
1770 // CHECK2: worker.exit:
1771 // CHECK2-NEXT: ret void
1772 //
1773 //
1774 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4
1775 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
1776 // CHECK2-NEXT: entry:
1777 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
1778 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
1779 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
1780 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1781 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1782 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
1783 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
1784 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1785 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1786 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1787 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4
1788 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
1789 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
1790 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
1791 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
1792 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
1793 // CHECK2-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4
1794 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1795 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1796 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
1797 // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1798 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
1799 // CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
1800 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1801 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
1802 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1803 // CHECK2: cond.true:
1804 // CHECK2-NEXT: br label [[COND_END:%.*]]
1805 // CHECK2: cond.false:
1806 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1807 // CHECK2-NEXT: br label [[COND_END]]
1808 // CHECK2: cond.end:
1809 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
1810 // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
1811 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
1812 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
1813 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1814 // CHECK2: omp.inner.for.cond:
1815 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
1816 // CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
1817 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1818 // CHECK2: omp.inner.for.body:
1819 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25
1820 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
1821 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
1822 // CHECK2-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8*
1823 // CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group !25
1824 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
1825 // CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8*
1826 // CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !25
1827 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
1828 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
1829 // CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !25
1830 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
1831 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3), !llvm.access.group !25
1832 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1833 // CHECK2: omp.inner.for.inc:
1834 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
1835 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25
1836 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
1837 // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
1838 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25
1839 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25
1840 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
1841 // CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25
1842 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
1843 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25
1844 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
1845 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
1846 // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
1847 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9
1848 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
1849 // CHECK2: cond.true5:
1850 // CHECK2-NEXT: br label [[COND_END7:%.*]]
1851 // CHECK2: cond.false6:
1852 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
1853 // CHECK2-NEXT: br label [[COND_END7]]
1854 // CHECK2: cond.end7:
1855 // CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ]
1856 // CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25
1857 // CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25
1858 // CHECK2-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
1859 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
1860 // CHECK2: omp.inner.for.end:
1861 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1862 // CHECK2: omp.loop.exit:
1863 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]])
1864 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
1865 // CHECK2-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
1866 // CHECK2-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1867 // CHECK2: .omp.final.then:
1868 // CHECK2-NEXT: store i32 10, i32* [[I]], align 4
1869 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
1870 // CHECK2: .omp.final.done:
1871 // CHECK2-NEXT: ret void
1872 //
1873 //
1874 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__5
1875 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
1876 // CHECK2-NEXT: entry:
1877 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
1878 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
1879 // CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
1880 // CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
1881 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
1882 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1883 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1884 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
1885 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
1886 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1887 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1888 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1889 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
1890 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
1891 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
1892 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1893 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
1894 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
1895 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
1896 // CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4
1897 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
1898 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1899 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4
1900 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4
1901 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1902 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1903 // CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1904 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
1905 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
1906 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
1907 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
1908 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1909 // CHECK2: omp.inner.for.cond:
1910 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
1911 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !28
1912 // CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
1913 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1914 // CHECK2: omp.inner.for.body:
1915 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
1916 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
1917 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1918 // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28
1919 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28
1920 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]]
1921 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28
1922 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1
1923 // CHECK2-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28
1924 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1925 // CHECK2: omp.body.continue:
1926 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1927 // CHECK2: omp.inner.for.inc:
1928 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
1929 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28
1930 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
1931 // CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
1932 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
1933 // CHECK2: omp.inner.for.end:
1934 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1935 // CHECK2: omp.loop.exit:
1936 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]])
1937 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
1938 // CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
1939 // CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
1940 // CHECK2: .omp.final.then:
1941 // CHECK2-NEXT: store i32 10, i32* [[I]], align 4
1942 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
1943 // CHECK2: .omp.final.done:
1944 // CHECK2-NEXT: ret void
1945 //
1946 //
1947 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
1948 // CHECK2-SAME: ([10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR0]] {
1949 // CHECK2-NEXT: entry:
1950 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4
1951 // CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
1952 // CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
1953 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
1954 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
1955 // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4
1956 // CHECK2-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4
1957 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4
1958 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false)
1959 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
1960 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
1961 // CHECK2: user_code.entry:
1962 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]])
1963 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4
1964 // CHECK2-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4
1965 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4
1966 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
1967 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
1968 // CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
1969 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
1970 // CHECK2-NEXT: ret void
1971 // CHECK2: worker.exit:
1972 // CHECK2-NEXT: ret void
1973 //
1974 //
1975 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__6
1976 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] {
1977 // CHECK2-NEXT: entry:
1978 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
1979 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
1980 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4
1981 // CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
1982 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
1983 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
1984 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
1985 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
1986 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
1987 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1988 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1989 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
1990 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
1991 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
1992 // CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
1993 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4
1994 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
1995 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
1996 // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4
1997 // CHECK2-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4
1998 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4
1999 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
2000 // CHECK2-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4
2001 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
2002 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
2003 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
2004 // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
2005 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
2006 // CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
2007 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2008 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
2009 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2010 // CHECK2: cond.true:
2011 // CHECK2-NEXT: br label [[COND_END:%.*]]
2012 // CHECK2: cond.false:
2013 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2014 // CHECK2-NEXT: br label [[COND_END]]
2015 // CHECK2: cond.end:
2016 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
2017 // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
2018 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
2019 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
2020 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2021 // CHECK2: omp.inner.for.cond:
2022 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
2023 // CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100
2024 // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2025 // CHECK2: omp.inner.for.body:
2026 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31
2027 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
2028 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !31
2029 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4, !llvm.access.group !31
2030 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4, !llvm.access.group !31
2031 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
2032 // CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8*
2033 // CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !31
2034 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
2035 // CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8*
2036 // CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !31
2037 // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
2038 // CHECK2-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8*
2039 // CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group !31
2040 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
2041 // CHECK2-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8*
2042 // CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4, !llvm.access.group !31
2043 // CHECK2-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
2044 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4), !llvm.access.group !31
2045 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2046 // CHECK2: omp.inner.for.inc:
2047 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
2048 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31
2049 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
2050 // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
2051 // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31
2052 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31
2053 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
2054 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31
2055 // CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
2056 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31
2057 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
2058 // CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
2059 // CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
2060 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99
2061 // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
2062 // CHECK2: cond.true6:
2063 // CHECK2-NEXT: br label [[COND_END8:%.*]]
2064 // CHECK2: cond.false7:
2065 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
2066 // CHECK2-NEXT: br label [[COND_END8]]
2067 // CHECK2: cond.end8:
2068 // CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ]
2069 // CHECK2-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
2070 // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31
2071 // CHECK2-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
2072 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
2073 // CHECK2: omp.inner.for.end:
2074 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
2075 // CHECK2: omp.loop.exit:
2076 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]])
2077 // CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
2078 // CHECK2-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
2079 // CHECK2-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
2080 // CHECK2: .omp.final.then:
2081 // CHECK2-NEXT: store i32 10, i32* [[I]], align 4
2082 // CHECK2-NEXT: store i32 10, i32* [[J]], align 4
2083 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
2084 // CHECK2: .omp.final.done:
2085 // CHECK2-NEXT: ret void
2086 //
2087 //
2088 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__7
2089 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] {
2090 // CHECK2-NEXT: entry:
2091 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
2092 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
2093 // CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
2094 // CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
2095 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4
2096 // CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
2097 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
2098 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
2099 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
2100 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
2101 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
2102 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2103 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2104 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
2105 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
2106 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
2107 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
2108 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
2109 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
2110 // CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
2111 // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4
2112 // CHECK2-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4
2113 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4
2114 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
2115 // CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4
2116 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
2117 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
2118 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4
2119 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4
2120 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
2121 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
2122 // CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
2123 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
2124 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
2125 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
2126 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
2127 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
2128 // CHECK2: omp.inner.for.cond:
2129 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
2130 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !34
2131 // CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
2132 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2133 // CHECK2: omp.inner.for.body:
2134 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
2135 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10
2136 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
2137 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2138 // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34
2139 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
2140 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
2141 // CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10
2142 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10
2143 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]]
2144 // CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1
2145 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
2146 // CHECK2-NEXT: store i32 [[ADD5]], i32* [[J]], align 4, !llvm.access.group !34
2147 // CHECK2-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34
2148 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34
2149 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34
2150 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !34
2151 // CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]]
2152 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]]
2153 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34
2154 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]]
2155 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34
2156 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]]
2157 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34
2158 // CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]]
2159 // CHECK2-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4, !llvm.access.group !34
2160 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
2161 // CHECK2: omp.body.continue:
2162 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
2163 // CHECK2: omp.inner.for.inc:
2164 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
2165 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34
2166 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
2167 // CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
2168 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
2169 // CHECK2: omp.inner.for.end:
2170 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
2171 // CHECK2: omp.loop.exit:
2172 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]])
2173 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
2174 // CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
2175 // CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
2176 // CHECK2: .omp.final.then:
2177 // CHECK2-NEXT: store i32 10, i32* [[I]], align 4
2178 // CHECK2-NEXT: store i32 10, i32* [[J]], align 4
2179 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
2180 // CHECK2: .omp.final.done:
2181 // CHECK2-NEXT: ret void
2182 //
2183