1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // Test target codegen - host bc file has to be created first.
3 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
4 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK4
5 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
6 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK5
7 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK5
8
9 // expected-no-diagnostics
10 #ifndef HEADER
11 #define HEADER
12
13 int a;
14
15 int foo(int *a);
16
main(int argc,char ** argv)17 int main(int argc, char **argv) {
18 int b[10], c[10], d[10];
19 #pragma omp target teams map(tofrom:a)
20 #pragma omp distribute parallel for firstprivate(b) lastprivate(c) if(a)
21 for (int i= 0; i < argc; ++i)
22 a = foo(&i) + foo(&a) + foo(&b[i]) + foo(&c[i]) + foo(&d[i]);
23 return 0;
24 }
25
26 #endif
27 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19
28 // CHECK4-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[ARGC:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] {
29 // CHECK4-NEXT: entry:
30 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
31 // CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8
32 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8
33 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
34 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8
35 // CHECK4-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8
36 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
37 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
38 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
39 // CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8
40 // CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8
41 // CHECK4-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8
42 // CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8
43 // CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
44 // CHECK4-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8
45 // CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8
46 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32*
47 // CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8
48 // CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false)
49 // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
50 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
51 // CHECK4: user_code.entry:
52 // CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]])
53 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4
54 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32*
55 // CHECK4-NEXT: store i32 [[TMP6]], i32* [[CONV1]], align 4
56 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8
57 // CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
58 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4
59 // CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i64 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR5:[0-9]+]]
60 // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
61 // CHECK4-NEXT: ret void
62 // CHECK4: worker.exit:
63 // CHECK4-NEXT: ret void
64 //
65 //
66 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__
67 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[ARGC:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] {
68 // CHECK4-NEXT: entry:
69 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
70 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
71 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
72 // CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8
73 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8
74 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
75 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8
76 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
77 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4
78 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
79 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
80 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
81 // CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
82 // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
83 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
84 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
85 // CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4
86 // CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4
87 // CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 8
88 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
89 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
90 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
91 // CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8
92 // CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8
93 // CHECK4-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8
94 // CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8
95 // CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
96 // CHECK4-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8
97 // CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8
98 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32*
99 // CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8
100 // CHECK4-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 40)
101 // CHECK4-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]*
102 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4
103 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4
104 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
105 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
106 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
107 // CHECK4-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
108 // CHECK4-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
109 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4
110 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
111 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
112 // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
113 // CHECK4: omp.precond.then:
114 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
115 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
116 // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4
117 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
118 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
119 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast [10 x i32]* [[B4]] to i8*
120 // CHECK4-NEXT: [[TMP9:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
121 // CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
122 // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
123 // CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
124 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
125 // CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
126 // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
127 // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
128 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
129 // CHECK4-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
130 // CHECK4: cond.true:
131 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
132 // CHECK4-NEXT: br label [[COND_END:%.*]]
133 // CHECK4: cond.false:
134 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
135 // CHECK4-NEXT: br label [[COND_END]]
136 // CHECK4: cond.end:
137 // CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
138 // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
139 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
140 // CHECK4-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4
141 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
142 // CHECK4: omp.inner.for.cond:
143 // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
144 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
145 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1
146 // CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]]
147 // CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
148 // CHECK4: omp.inner.for.body:
149 // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
150 // CHECK4-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
151 // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
152 // CHECK4-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64
153 // CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
154 // CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP20]] to i8*
155 // CHECK4-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8
156 // CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
157 // CHECK4-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP22]] to i8*
158 // CHECK4-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8
159 // CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
160 // CHECK4-NEXT: [[TMP28:%.*]] = bitcast i32* [[CONV]] to i8*
161 // CHECK4-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8
162 // CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
163 // CHECK4-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8*
164 // CHECK4-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8
165 // CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
166 // CHECK4-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8*
167 // CHECK4-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8
168 // CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
169 // CHECK4-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8*
170 // CHECK4-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8
171 // CHECK4-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 6
172 // CHECK4-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8*
173 // CHECK4-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8
174 // CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4
175 // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0
176 // CHECK4-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32
177 // CHECK4-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
178 // CHECK4-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4
179 // CHECK4-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
180 // CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i64 7)
181 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
182 // CHECK4: omp.inner.for.inc:
183 // CHECK4-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
184 // CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
185 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]]
186 // CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
187 // CHECK4-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
188 // CHECK4-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
189 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]]
190 // CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4
191 // CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
192 // CHECK4-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
193 // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]]
194 // CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4
195 // CHECK4-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
196 // CHECK4-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
197 // CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]]
198 // CHECK4-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
199 // CHECK4: cond.true12:
200 // CHECK4-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
201 // CHECK4-NEXT: br label [[COND_END14:%.*]]
202 // CHECK4: cond.false13:
203 // CHECK4-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
204 // CHECK4-NEXT: br label [[COND_END14]]
205 // CHECK4: cond.end14:
206 // CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ]
207 // CHECK4-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4
208 // CHECK4-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
209 // CHECK4-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4
210 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
211 // CHECK4: omp.inner.for.end:
212 // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
213 // CHECK4: omp.loop.exit:
214 // CHECK4-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
215 // CHECK4-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4
216 // CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP54]])
217 // CHECK4-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
218 // CHECK4-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0
219 // CHECK4-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
220 // CHECK4: .omp.lastprivate.then:
221 // CHECK4-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8*
222 // CHECK4-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8*
223 // CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i64 40, i1 false)
224 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
225 // CHECK4: .omp.lastprivate.done:
226 // CHECK4-NEXT: br label [[OMP_PRECOND_END]]
227 // CHECK4: omp.precond.end:
228 // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i64 40)
229 // CHECK4-NEXT: ret void
230 //
231 //
232 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1
233 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] {
234 // CHECK4-NEXT: entry:
235 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
236 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
237 // CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
238 // CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
239 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8
240 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8
241 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
242 // CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8
243 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8
244 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
245 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4
246 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
247 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
248 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
249 // CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
250 // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
251 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
252 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
253 // CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4
254 // CHECK4-NEXT: [[C5:%.*]] = alloca [10 x i32], align 4
255 // CHECK4-NEXT: [[I6:%.*]] = alloca i32, align 4
256 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
257 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
258 // CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
259 // CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
260 // CHECK4-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
261 // CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8
262 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
263 // CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8
264 // CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8
265 // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
266 // CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8
267 // CHECK4-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
268 // CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8
269 // CHECK4-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8
270 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4
271 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4
272 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
273 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
274 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
275 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
276 // CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
277 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4
278 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
279 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]]
280 // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
281 // CHECK4: omp.precond.then:
282 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
283 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
284 // CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4
285 // CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
286 // CHECK4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32
287 // CHECK4-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
288 // CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP10]] to i32
289 // CHECK4-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4
290 // CHECK4-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4
291 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
292 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
293 // CHECK4-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B4]] to i8*
294 // CHECK4-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8*
295 // CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 40, i1 false)
296 // CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
297 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
298 // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
299 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
300 // CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4
301 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
302 // CHECK4: omp.inner.for.cond:
303 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
304 // CHECK4-NEXT: [[CONV7:%.*]] = sext i32 [[TMP16]] to i64
305 // CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
306 // CHECK4-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP17]]
307 // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
308 // CHECK4: omp.inner.for.body:
309 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
310 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
311 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
312 // CHECK4-NEXT: store i32 [[ADD]], i32* [[I6]], align 4
313 // CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[I6]]) #[[ATTR8:[0-9]+]]
314 // CHECK4-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[TMP1]]) #[[ATTR8]]
315 // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]]
316 // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4
317 // CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64
318 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B4]], i64 0, i64 [[IDXPROM]]
319 // CHECK4-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX]]) #[[ATTR8]]
320 // CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]]
321 // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4
322 // CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP20]] to i64
323 // CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C5]], i64 0, i64 [[IDXPROM13]]
324 // CHECK4-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX14]]) #[[ATTR8]]
325 // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]]
326 // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4
327 // CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP21]] to i64
328 // CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM17]]
329 // CHECK4-NEXT: [[CALL19:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX18]]) #[[ATTR8]]
330 // CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]]
331 // CHECK4-NEXT: store i32 [[ADD20]], i32* [[TMP1]], align 4
332 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
333 // CHECK4: omp.body.continue:
334 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
335 // CHECK4: omp.inner.for.inc:
336 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
337 // CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
338 // CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
339 // CHECK4-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4
340 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
341 // CHECK4: omp.inner.for.end:
342 // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
343 // CHECK4: omp.loop.exit:
344 // CHECK4-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
345 // CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
346 // CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]])
347 // CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
348 // CHECK4-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
349 // CHECK4-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
350 // CHECK4: .omp.lastprivate.then:
351 // CHECK4-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8*
352 // CHECK4-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C5]] to i8*
353 // CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 40, i1 false)
354 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
355 // CHECK4: .omp.lastprivate.done:
356 // CHECK4-NEXT: br label [[OMP_PRECOND_END]]
357 // CHECK4: omp.precond.end:
358 // CHECK4-NEXT: ret void
359 //
360 //
361 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19
362 // CHECK5-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[ARGC:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] {
363 // CHECK5-NEXT: entry:
364 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
365 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4
366 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4
367 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
368 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4
369 // CHECK5-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
370 // CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
371 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
372 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
373 // CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4
374 // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
375 // CHECK5-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
376 // CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4
377 // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
378 // CHECK5-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4
379 // CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4
380 // CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4
381 // CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false)
382 // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
383 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
384 // CHECK5: user_code.entry:
385 // CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]])
386 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
387 // CHECK5-NEXT: store i32 [[TMP6]], i32* [[ARGC_CASTED]], align 4
388 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4
389 // CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
390 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4
391 // CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR5:[0-9]+]]
392 // CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
393 // CHECK5-NEXT: ret void
394 // CHECK5: worker.exit:
395 // CHECK5-NEXT: ret void
396 //
397 //
398 // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__
399 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[ARGC:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] {
400 // CHECK5-NEXT: entry:
401 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
402 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
403 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
404 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4
405 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4
406 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
407 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4
408 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
409 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4
410 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
411 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
412 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
413 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
414 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
415 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
416 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
417 // CHECK5-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4
418 // CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4
419 // CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4
420 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
421 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
422 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
423 // CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4
424 // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
425 // CHECK5-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
426 // CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4
427 // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
428 // CHECK5-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4
429 // CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4
430 // CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4
431 // CHECK5-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 40)
432 // CHECK5-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]*
433 // CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
434 // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4
435 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
436 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
437 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
438 // CHECK5-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
439 // CHECK5-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
440 // CHECK5-NEXT: store i32 0, i32* [[I]], align 4
441 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
442 // CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
443 // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
444 // CHECK5: omp.precond.then:
445 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
446 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
447 // CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4
448 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
449 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
450 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast [10 x i32]* [[B4]] to i8*
451 // CHECK5-NEXT: [[TMP9:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
452 // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
453 // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
454 // CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
455 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
456 // CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
457 // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
458 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
459 // CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
460 // CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
461 // CHECK5: cond.true:
462 // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
463 // CHECK5-NEXT: br label [[COND_END:%.*]]
464 // CHECK5: cond.false:
465 // CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
466 // CHECK5-NEXT: br label [[COND_END]]
467 // CHECK5: cond.end:
468 // CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
469 // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
470 // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
471 // CHECK5-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4
472 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
473 // CHECK5: omp.inner.for.cond:
474 // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
475 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
476 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1
477 // CHECK5-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]]
478 // CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
479 // CHECK5: omp.inner.for.body:
480 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
481 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
482 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
483 // CHECK5-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP19]] to i8*
484 // CHECK5-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4
485 // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
486 // CHECK5-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP20]] to i8*
487 // CHECK5-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4
488 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
489 // CHECK5-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8*
490 // CHECK5-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4
491 // CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
492 // CHECK5-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP2]] to i8*
493 // CHECK5-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4
494 // CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4
495 // CHECK5-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[B4]] to i8*
496 // CHECK5-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4
497 // CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5
498 // CHECK5-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8*
499 // CHECK5-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4
500 // CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6
501 // CHECK5-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8*
502 // CHECK5-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4
503 // CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP2]], align 4
504 // CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP35]], 0
505 // CHECK5-NEXT: [[TMP36:%.*]] = zext i1 [[TOBOOL]] to i32
506 // CHECK5-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
507 // CHECK5-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4
508 // CHECK5-NEXT: [[TMP39:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
509 // CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP38]], i32 [[TMP36]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP39]], i32 7)
510 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
511 // CHECK5: omp.inner.for.inc:
512 // CHECK5-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
513 // CHECK5-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
514 // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]]
515 // CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
516 // CHECK5-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
517 // CHECK5-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
518 // CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]]
519 // CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4
520 // CHECK5-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
521 // CHECK5-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
522 // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP44]], [[TMP45]]
523 // CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4
524 // CHECK5-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
525 // CHECK5-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
526 // CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]]
527 // CHECK5-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
528 // CHECK5: cond.true12:
529 // CHECK5-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
530 // CHECK5-NEXT: br label [[COND_END14:%.*]]
531 // CHECK5: cond.false13:
532 // CHECK5-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
533 // CHECK5-NEXT: br label [[COND_END14]]
534 // CHECK5: cond.end14:
535 // CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE12]] ], [ [[TMP49]], [[COND_FALSE13]] ]
536 // CHECK5-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4
537 // CHECK5-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
538 // CHECK5-NEXT: store i32 [[TMP50]], i32* [[DOTOMP_IV]], align 4
539 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]]
540 // CHECK5: omp.inner.for.end:
541 // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
542 // CHECK5: omp.loop.exit:
543 // CHECK5-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
544 // CHECK5-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4
545 // CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP52]])
546 // CHECK5-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
547 // CHECK5-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0
548 // CHECK5-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
549 // CHECK5: .omp.lastprivate.then:
550 // CHECK5-NEXT: [[TMP55:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8*
551 // CHECK5-NEXT: [[TMP56:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8*
552 // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP55]], i8* align 4 [[TMP56]], i32 40, i1 false)
553 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
554 // CHECK5: .omp.lastprivate.done:
555 // CHECK5-NEXT: br label [[OMP_PRECOND_END]]
556 // CHECK5: omp.precond.end:
557 // CHECK5-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i32 40)
558 // CHECK5-NEXT: ret void
559 //
560 //
561 // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1
562 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] {
563 // CHECK5-NEXT: entry:
564 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
565 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
566 // CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
567 // CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
568 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4
569 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4
570 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
571 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4
572 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4
573 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
574 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4
575 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
576 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
577 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
578 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
579 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
580 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
581 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
582 // CHECK5-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4
583 // CHECK5-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4
584 // CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4
585 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
586 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
587 // CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
588 // CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
589 // CHECK5-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4
590 // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
591 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
592 // CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4
593 // CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4
594 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4
595 // CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4
596 // CHECK5-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
597 // CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4
598 // CHECK5-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4
599 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4
600 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4
601 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
602 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
603 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
604 // CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
605 // CHECK5-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
606 // CHECK5-NEXT: store i32 0, i32* [[I]], align 4
607 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
608 // CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]]
609 // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
610 // CHECK5: omp.precond.then:
611 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
612 // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
613 // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4
614 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
615 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
616 // CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4
617 // CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4
618 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
619 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
620 // CHECK5-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8*
621 // CHECK5-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8*
622 // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false)
623 // CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
624 // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
625 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
626 // CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
627 // CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4
628 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
629 // CHECK5: omp.inner.for.cond:
630 // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
631 // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
632 // CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]]
633 // CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
634 // CHECK5: omp.inner.for.body:
635 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
636 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
637 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
638 // CHECK5-NEXT: store i32 [[ADD]], i32* [[I5]], align 4
639 // CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[I5]]) #[[ATTR8:[0-9]+]]
640 // CHECK5-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[TMP1]]) #[[ATTR8]]
641 // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]]
642 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4
643 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]]
644 // CHECK5-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX]]) #[[ATTR8]]
645 // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]]
646 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4
647 // CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]]
648 // CHECK5-NEXT: [[CALL12:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX11]]) #[[ATTR8]]
649 // CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]]
650 // CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4
651 // CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]]
652 // CHECK5-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX14]]) #[[ATTR8]]
653 // CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]]
654 // CHECK5-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4
655 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
656 // CHECK5: omp.body.continue:
657 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
658 // CHECK5: omp.inner.for.inc:
659 // CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
660 // CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
661 // CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
662 // CHECK5-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4
663 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]]
664 // CHECK5: omp.inner.for.end:
665 // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
666 // CHECK5: omp.loop.exit:
667 // CHECK5-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
668 // CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
669 // CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]])
670 // CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
671 // CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
672 // CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
673 // CHECK5: .omp.lastprivate.then:
674 // CHECK5-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8*
675 // CHECK5-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8*
676 // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false)
677 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
678 // CHECK5: .omp.lastprivate.done:
679 // CHECK5-NEXT: br label [[OMP_PRECOND_END]]
680 // CHECK5: omp.precond.end:
681 // CHECK5-NEXT: ret void
682 //
683