1 // Check code generation
2 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
3 
4 // Check same results after serialization round-trip
5 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
6 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
7 // expected-no-diagnostics
8 
9 #ifndef HEADER
10 #define HEADER
11 
12 // placeholder for loop body code.
13 extern "C" void body(...) {}
14 
15 // IR: define {{.*}}void @_ZN1SC2Ev(%struct.S*
16 // IR:         [[THIS_ADDR:%.+]] = alloca %struct.S*, align 8
17 // IR-NEXT:    [[I_REF:%.+]] = alloca i32*, align 8
18 // IR-NEXT:    [[FLOOR:%.+]] = alloca i32, align 4
19 // IR-NEXT:    [[TILE:%.+]] = alloca i32, align 4
20 // IR-NEXT:    store %struct.S* %{{.+}}, %struct.S** [[THIS_ADDR]], align 8
21 // IR-NEXT:    [[THIS:%.+]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
22 // IR-NEXT:    [[I:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS]], i32 0, i32 0
23 // IR-NEXT:    store i32* [[I]], i32** [[I_REF]], align 8
24 // IR-NEXT:    store i32 0, i32* [[FLOOR]], align 4
25 // IR-NEXT:    br label %[[FOR_COND:.+]]
26 // IR:         [[FOR_COND]]:
27 // IR-NEXT:    [[TMP0:%.+]] = load i32, i32* [[FLOOR]], align 4
28 // IR-NEXT:    [[CMP:%.+]] = icmp slt i32 [[TMP0]], 4
29 // IR-NEXT:    br i1 [[CMP]], label %[[FOR_BODY:.+]], label %[[FOR_END11:.+]]
30 // IR:         [[FOR_BODY]]:
31 // IR-NEXT:    [[TMP1:%.+]] = load i32, i32* [[FLOOR]], align 4
32 // IR-NEXT:    store i32 [[TMP1]], i32* [[TILE]], align 4
33 // IR-NEXT:    br label %[[FOR_COND3:.+]]
34 // IR:         [[FOR_COND3]]:
35 // IR-NEXT:    [[TMP2:%.+]] = load i32, i32* [[TILE]], align 4
36 // IR-NEXT:    [[TMP3:%.+]] = load i32, i32* [[FLOOR]], align 4
37 // IR-NEXT:    [[ADD:%.+]] = add nsw i32 [[TMP3]], 5
38 // IR-NEXT:    [[CMP4:%.+]] = icmp slt i32 4, [[ADD]]
39 // IR-NEXT:    br i1 [[CMP4]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
40 // IR:         [[COND_TRUE]]:
41 // IR-NEXT:    br label %[[COND_END:.+]]
42 // IR:         [[COND_FALSE]]:
43 // IR-NEXT:    [[TMP4:%.+]] = load i32, i32* [[FLOOR]], align 4
44 // IR-NEXT:    [[ADD5:%.+]] = add nsw i32 [[TMP4]], 5
45 // IR-NEXT:    br label %[[COND_END]]
46 // IR:         [[COND_END]]:
47 // IR-NEXT:    [[COND:%.+]] = phi i32 [ 4, %[[COND_TRUE]] ], [ [[ADD5]], %[[COND_FALSE]] ]
48 // IR-NEXT:    [[CMP6:%.+]] = icmp slt i32 [[TMP2]], [[COND]]
49 // IR-NEXT:    br i1 [[CMP6]], label %[[FOR_BODY7:.+]], label %[[FOR_END:.+]]
50 // IR:         [[FOR_BODY7]]:
51 // IR-NEXT:    [[TMP5:%.+]] = load i32, i32* [[TILE]], align 4
52 // IR-NEXT:    [[MUL:%.+]] = mul nsw i32 [[TMP5]], 3
53 // IR-NEXT:    [[ADD8:%.+]] = add nsw i32 7, [[MUL]]
54 // IR-NEXT:    [[TMP6:%.+]] = load i32*, i32** [[I_REF]], align 8
55 // IR-NEXT:    store i32 [[ADD8]], i32* [[TMP6]], align 4
56 // IR-NEXT:    [[TMP7:%.+]] = load i32*, i32** [[I_REF]], align 8
57 // IR-NEXT:    [[TMP8:%.+]] = load i32, i32* [[TMP7]], align 4
58 // IR-NEXT:    call void (...) @body(i32 [[TMP8]])
59 // IR-NEXT:    br label %[[FOR_INC:.+]]
60 // IR:         [[FOR_INC]]:
61 // IR-NEXT:    [[TMP9:%.+]] = load i32, i32* [[TILE]], align 4
62 // IR-NEXT:    [[INC:%.+]] = add nsw i32 [[TMP9]], 1
63 // IR-NEXT:    store i32 [[INC]], i32* [[TILE]], align 4
64 // IR-NEXT:    br label %[[FOR_COND3]]
65 // IR:         [[FOR_END]]:
66 // IR-NEXT:    br label %[[FOR_INC9:.+]]
67 // IR:         [[FOR_INC9]]:
68 // IR-NEXT:    [[TMP10:%.+]] = load i32, i32* [[FLOOR]], align 4
69 // IR-NEXT:    [[ADD10:%.+]] = add nsw i32 [[TMP10]], 5
70 // IR-NEXT:    store i32 [[ADD10]], i32* [[FLOOR]], align 4
71 // IR-NEXT:    br label %[[FOR_COND]]
72 // IR:         [[FOR_END11]]:
73 // IR-NEXT:    ret void
74 
75 struct S {
76   int i;
77   S() {
78 #pragma omp tile sizes(5)
79     for (i = 7; i < 17; i += 3)
80       body(i);
81   }
82 } s;
83 
84 // IR-LABEL: define {{.*}}void @foo1(
85 // IR:         [[START_ADDR:%.*]] = alloca i32, align 4
86 // IR-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
87 // IR-NEXT:    [[STEP_ADDR:%.*]] = alloca i32, align 4
88 // IR-NEXT:    [[I:%.*]] = alloca i32, align 4
89 // IR-NEXT:    [[CAP_EXPR:%.+]] = alloca i32, align 4
90 // IR-NEXT:    [[CAP_EXPR1:%.+]] = alloca i32, align 4
91 // IR-NEXT:    [[CAP_EXPR2:%.+]] = alloca i32, align 4
92 // IR-NEXT:    [[CAP_EXPR3:%.+]] = alloca i32, align 4
93 // IR-NEXT:    [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4
94 // IR-NEXT:    [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4
95 // IR-NEXT:    store i32 [[START:%.*]], i32* [[START_ADDR]], align 4
96 // IR-NEXT:    store i32 [[END:%.*]], i32* [[END_ADDR]], align 4
97 // IR-NEXT:    store i32 [[STEP:%.*]], i32* [[STEP_ADDR]], align 4
98 // IR-NEXT:    [[TMP0:%.+]] = load i32, i32* [[START_ADDR]], align 4
99 // IR-NEXT:    store i32 [[TMP0]], i32* [[CAP_EXPR]], align 4
100 // IR-NEXT:    [[TMP1:%.+]] = load i32, i32* [[END_ADDR]], align 4
101 // IR-NEXT:    store i32 [[TMP1]], i32* [[CAP_EXPR1]], align 4
102 // IR-NEXT:    [[TMP2:%.+]] = load i32, i32* [[STEP_ADDR]], align 4
103 // IR-NEXT:    store i32 [[TMP2]], i32* [[CAP_EXPR2]], align 4
104 // IR-NEXT:    [[TMP3:%.+]] = load i32, i32* [[CAP_EXPR1]], align 4
105 // IR-NEXT:    [[TMP4:%.+]] = load i32, i32* [[CAP_EXPR]], align 4
106 // IR-NEXT:    [[SUB:%.+]] = sub i32 [[TMP3]], [[TMP4]]
107 // IR-NEXT:    [[SUB4:%.+]] = sub i32 [[SUB]], 1
108 // IR-NEXT:    [[TMP5:%.+]] = load i32, i32* [[CAP_EXPR2]], align 4
109 // IR-NEXT:    [[ADD:%.+]] = add i32 [[SUB4]], [[TMP5]]
110 // IR-NEXT:    [[TMP6:%.+]] = load i32, i32* [[CAP_EXPR2]], align 4
111 // IR-NEXT:    [[DIV:%.+]] = udiv i32 [[ADD]], [[TMP6]]
112 // IR-NEXT:    [[SUB5:%.+]] = sub i32 [[DIV]], 1
113 // IR-NEXT:    store i32 [[SUB5]], i32* [[CAP_EXPR3]], align 4
114 // IR-NEXT:    store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4
115 // IR-NEXT:    br label %[[FOR_COND:.*]]
116 // IR:         [[FOR_COND]]:
117 // IR-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
118 // IR-NEXT:    [[TMP8:%.+]] = load i32, i32* [[CAP_EXPR3]], align 4
119 // IR-NEXT:    [[ADD3:%.*]] = add i32 [[TMP8]], 1
120 // IR-NEXT:    [[CMP:%.*]] = icmp ult i32 [[TMP0]], [[ADD3]]
121 // IR-NEXT:    br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END25:.*]]
122 // IR:         [[FOR_BODY]]:
123 // IR-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
124 // IR-NEXT:    store i32 [[TMP5]], i32* [[DOTTILE_0_IV_I]], align 4
125 // IR-NEXT:    br label %[[FOR_COND4:.*]]
126 // IR:         [[FOR_COND4]]:
127 // IR-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
128 // IR-NEXT:    [[TMP11:%.+]] = load i32, i32* [[CAP_EXPR3]], align 4
129 // IR-NEXT:    [[ADD10:%.*]] = add i32 [[TMP11]], 1
130 // IR-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
131 // IR-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP11]], 5
132 // IR-NEXT:    [[CMP12:%.*]] = icmp ult i32 [[ADD10]], [[ADD11]]
133 // IR-NEXT:    br i1 [[CMP12]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
134 // IR:         [[COND_TRUE]]:
135 // IR-NEXT:    [[TMP13:%.+]] = load i32, i32* [[CAP_EXPR3]], align 4
136 // IR-NEXT:    [[ADD18:%.*]] = add i32 [[TMP13]], 1
137 // IR-NEXT:    br label %[[COND_END:.*]]
138 // IR:         [[COND_FALSE]]:
139 // IR-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
140 // IR-NEXT:    [[ADD19:%.*]] = add nsw i32 [[TMP16]], 5
141 // IR-NEXT:    br label %[[COND_END]]
142 // IR:         [[COND_END]]:
143 // IR-NEXT:    [[COND:%.*]] = phi i32 [ [[ADD18]], %[[COND_TRUE]] ], [ [[ADD19]], %[[COND_FALSE]] ]
144 // IR-NEXT:    [[CMP20:%.*]] = icmp ult i32 [[TMP6]], [[COND]]
145 // IR-NEXT:    br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END:.*]]
146 // IR:         [[FOR_BODY21]]:
147 // IR-NEXT:    [[TMP15:%.+]] = load i32, i32* [[CAP_EXPR]], align 4
148 // IR-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
149 // IR-NEXT:    [[TMP17:%.+]] = load i32, i32* [[CAP_EXPR2]], align 4
150 // IR-NEXT:    [[MUL:%.*]] = mul i32 [[TMP19]], [[TMP17]]
151 // IR-NEXT:    [[ADD22:%.*]] = add i32 [[TMP15]], [[MUL]]
152 // IR-NEXT:    store i32 [[ADD22]], i32* [[I]], align 4
153 // IR-NEXT:    [[TMP21:%.*]] = load i32, i32* [[I]], align 4
154 // IR-NEXT:    call void (...) @body(i32 [[TMP21]])
155 // IR-NEXT:    br label %[[FOR_INC:.*]]
156 // IR:         [[FOR_INC]]:
157 // IR-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
158 // IR-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP22]], 1
159 // IR-NEXT:    store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4
160 // IR-NEXT:    br label %[[FOR_COND4]]
161 // IR:         [[FOR_END]]:
162 // IR-NEXT:    br label %[[FOR_INC23:.*]]
163 // IR:         [[FOR_INC23]]:
164 // IR-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
165 // IR-NEXT:    [[ADD24:%.*]] = add nsw i32 [[TMP23]], 5
166 // IR-NEXT:    store i32 [[ADD24]], i32* [[DOTFLOOR_0_IV_I]], align 4
167 // IR-NEXT:    br label %[[FOR_COND]]
168 // IR:        [[FOR_END25]]:
169 // IR-NEXT:    ret void
170 //
171 extern "C" void foo1(int start, int end, int step) {
172   int i;
173 #pragma omp tile sizes(5)
174   for (i = start; i < end; i += step)
175     body(i);
176 }
177 
178 // IR-LABEL: define {{.*}}void @foo2(
179 // IR-NEXT:  entry:
180 // IR-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
181 // IR-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
182 // IR-NEXT:    [[STEP_ADDR:%.*]] = alloca i32, align 4
183 // IR-NEXT:    [[I:%.*]] = alloca i32, align 4
184 // IR-NEXT:    [[J:%.*]] = alloca i32, align 4
185 // IR-NEXT:    [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4
186 // IR-NEXT:    [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4
187 // IR-NEXT:    [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4
188 // IR-NEXT:    [[DOTTILE_1_IV_J:%.*]] = alloca i32, align 4
189 // IR-NEXT:    store i32 [[START:%.*]], i32* [[START_ADDR]], align 4
190 // IR-NEXT:    store i32 [[END:%.*]], i32* [[END_ADDR]], align 4
191 // IR-NEXT:    store i32 [[STEP:%.*]], i32* [[STEP_ADDR]], align 4
192 // IR-NEXT:    store i32 7, i32* [[I]], align 4
193 // IR-NEXT:    store i32 7, i32* [[J]], align 4
194 // IR-NEXT:    store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4
195 // IR-NEXT:    br label %[[FOR_COND:.*]]
196 // IR:         [[FOR_COND]]:
197 // IR-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
198 // IR-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
199 // IR-NEXT:    br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END30:.*]]
200 // IR:         [[FOR_BODY]]:
201 // IR-NEXT:    store i32 0, i32* [[DOTFLOOR_1_IV_J]], align 4
202 // IR-NEXT:    br label %[[FOR_COND1:.*]]
203 // IR:         [[FOR_COND1]]:
204 // IR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
205 // IR-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP1]], 4
206 // IR-NEXT:    br i1 [[CMP2]], label %[[FOR_BODY3:.*]], label %[[FOR_END27:.*]]
207 // IR:         [[FOR_BODY3]]:
208 // IR-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
209 // IR-NEXT:    store i32 [[TMP2]], i32* [[DOTTILE_0_IV_I]], align 4
210 // IR-NEXT:    br label %[[FOR_COND4:.*]]
211 // IR:         [[FOR_COND4]]:
212 // IR-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
213 // IR-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
214 // IR-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 5
215 // IR-NEXT:    [[CMP5:%.*]] = icmp slt i32 4, [[ADD]]
216 // IR-NEXT:    br i1 [[CMP5]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
217 // IR:         [[COND_TRUE]]:
218 // IR-NEXT:    br label %[[COND_END:.*]]
219 // IR:         [[COND_FALSE]]:
220 // IR-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
221 // IR-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 5
222 // IR-NEXT:    br label %[[COND_END]]
223 // IR:         [[COND_END]]:
224 // IR-NEXT:    [[COND:%.*]] = phi i32 [ 4, %[[COND_TRUE]] ], [ [[ADD6]], %[[COND_FALSE]] ]
225 // IR-NEXT:    [[CMP7:%.*]] = icmp slt i32 [[TMP3]], [[COND]]
226 // IR-NEXT:    br i1 [[CMP7]], label %[[FOR_BODY8:.*]], label %[[FOR_END24:.*]]
227 // IR:         [[FOR_BODY8]]:
228 // IR-NEXT:    [[TMP6:%.+]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
229 // IR-NEXT:    [[MUL:%.+]] = mul nsw i32 [[TMP6]], 3
230 // IR-NEXT:    [[ADD9:%.+]] = add nsw i32 7, [[MUL]]
231 // IR-NEXT:    store i32 [[ADD9]], i32* [[I]], align 4
232 // IR-NEXT:    [[TMP7:%.+]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
233 // IR-NEXT:    store i32 [[TMP7]], i32* [[DOTTILE_1_IV_J]], align 4
234 // IR-NEXT:    br label %[[FOR_COND10:.+]]
235 // IR:         [[FOR_COND10]]:
236 // IR-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4
237 // IR-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
238 // IR-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP8]], 5
239 // IR-NEXT:    [[CMP11:%.*]] = icmp slt i32 4, [[ADD10]]
240 // IR-NEXT:    br i1 [[CMP11]], label %[[COND_TRUE12:.*]], label %[[COND_FALSE13:.*]]
241 // IR:         [[COND_TRUE12]]:
242 // IR-NEXT:    br label %[[COND_END15:.*]]
243 // IR:         [[COND_FALSE13]]:
244 // IR-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
245 // IR-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP9]], 5
246 // IR-NEXT:    br label %[[COND_END15]]
247 // IR:         [[COND_END15]]:
248 // IR-NEXT:    [[COND16:%.*]] = phi i32 [ 4, %[[COND_TRUE12]] ], [ [[ADD14]], %[[COND_FALSE13]] ]
249 // IR-NEXT:    [[CMP17:%.*]] = icmp slt i32 [[TMP7]], [[COND16]]
250 // IR-NEXT:    br i1 [[CMP17]], label %[[FOR_BODY18:.*]], label %[[FOR_END:.*]]
251 // IR:         [[FOR_BODY18]]:
252 // IR-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4
253 // IR-NEXT:    [[MUL20:%.*]] = mul nsw i32 [[TMP11]], 3
254 // IR-NEXT:    [[ADD21:%.*]] = add nsw i32 7, [[MUL20]]
255 // IR-NEXT:    store i32 [[ADD21]], i32* [[J]], align 4
256 // IR-NEXT:    [[TMP12:%.*]] = load i32, i32* [[I]], align 4
257 // IR-NEXT:    [[TMP13:%.*]] = load i32, i32* [[J]], align 4
258 // IR-NEXT:    call void (...) @body(i32 [[TMP12]], i32 [[TMP13]])
259 // IR-NEXT:    br label %[[FOR_INC:.*]]
260 // IR:         [[FOR_INC]]:
261 // IR-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4
262 // IR-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP14]], 1
263 // IR-NEXT:    store i32 [[INC]], i32* [[DOTTILE_1_IV_J]], align 4
264 // IR-NEXT:    br label %[[FOR_COND10]]
265 // IR:         [[FOR_END]]:
266 // IR-NEXT:    br label %[[FOR_INC22:.*]]
267 // IR:         [[FOR_INC22]]:
268 // IR-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
269 // IR-NEXT:    [[INC23:%.*]] = add nsw i32 [[TMP15]], 1
270 // IR-NEXT:    store i32 [[INC23]], i32* [[DOTTILE_0_IV_I]], align 4
271 // IR-NEXT:    br label %[[FOR_COND4]]
272 // IR:         [[FOR_END24]]:
273 // IR-NEXT:    br label %[[FOR_INC25:.*]]
274 // IR:         [[FOR_INC25]]:
275 // IR-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
276 // IR-NEXT:    [[ADD26:%.*]] = add nsw i32 [[TMP16]], 5
277 // IR-NEXT:    store i32 [[ADD26]], i32* [[DOTFLOOR_1_IV_J]], align 4
278 // IR-NEXT:    br label %[[FOR_COND1]]
279 // IR:         [[FOR_END27]]:
280 // IR-NEXT:    br label %[[FOR_INC28:.*]]
281 // IR:         [[FOR_INC28]]:
282 // IR-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
283 // IR-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP17]], 5
284 // IR-NEXT:    store i32 [[ADD29]], i32* [[DOTFLOOR_0_IV_I]], align 4
285 // IR-NEXT:    br label %[[FOR_COND]]
286 // IR:         [[FOR_END30]]:
287 // IR-NEXT:    ret void
288 //
289 extern "C" void foo2(int start, int end, int step) {
290 #pragma omp tile sizes(5,5)
291   for (int i = 7; i < 17; i+=3)
292     for (int j = 7; j < 17; j+=3)
293       body(i,j);
294 }
295 
296 // IR-LABEL: @foo3(
297 // IR-NEXT:  entry:
298 // IR-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
299 // IR-NEXT:    [[TMP:%.*]] = alloca i32, align 4
300 // IR-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
301 // IR-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
302 // IR-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
303 // IR-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
304 // IR-NEXT:    [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4
305 // IR-NEXT:    [[I:%.*]] = alloca i32, align 4
306 // IR-NEXT:    [[J:%.*]] = alloca i32, align 4
307 // IR-NEXT:    [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4
308 // IR-NEXT:    [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4
309 // IR-NEXT:    [[DOTTILE_1_IV_J:%.*]] = alloca i32, align 4
310 // IR-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB2:@.*]])
311 // IR-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
312 // IR-NEXT:    store i32 0, i32* [[DOTOMP_UB]], align 4
313 // IR-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
314 // IR-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
315 // IR-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* [[GLOB1:@.*]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
316 // IR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
317 // IR-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 0
318 // IR-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
319 // IR:         [[COND_TRUE]]:
320 // IR-NEXT:    br label %[[COND_END:.*]]
321 // IR:         [[COND_FALSE]]:
322 // IR-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
323 // IR-NEXT:    br label %[[COND_END]]
324 // IR:         [[COND_END]]:
325 // IR-NEXT:    [[COND:%.*]] = phi i32 [ 0, %[[COND_TRUE]] ], [ [[TMP2]], %[[COND_FALSE]] ]
326 // IR-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
327 // IR-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
328 // IR-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4
329 // IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.*]]
330 // IR:         [[OMP_INNER_FOR_COND]]:
331 // IR-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
332 // IR-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
333 // IR-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
334 // IR-NEXT:    br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
335 // IR:         [[OMP_INNER_FOR_BODY]]:
336 // IR-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
337 // IR-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP6]], 5
338 // IR-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
339 // IR-NEXT:    store i32 [[ADD]], i32* [[DOTFLOOR_0_IV_I]], align 4
340 // IR-NEXT:    store i32 7, i32* [[I]], align 4
341 // IR-NEXT:    store i32 7, i32* [[J]], align 4
342 // IR-NEXT:    store i32 0, i32* [[DOTFLOOR_1_IV_J]], align 4
343 // IR-NEXT:    br label %[[FOR_COND:.*]]
344 // IR:         [[FOR_COND]]:
345 // IR-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
346 // IR-NEXT:    [[CMP3:%.*]] = icmp slt i32 [[TMP7]], 4
347 // IR-NEXT:    br i1 [[CMP3]], label %[[FOR_BODY:.*]], label %[[FOR_END33:.*]]
348 // IR:         [[FOR_BODY]]:
349 // IR-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
350 // IR-NEXT:    store i32 [[TMP8]], i32* [[DOTTILE_0_IV_I]], align 4
351 // IR-NEXT:    br label %[[FOR_COND4:.*]]
352 // IR:         [[FOR_COND4]]:
353 // IR-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
354 // IR-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
355 // IR-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP10]], 5
356 // IR-NEXT:    [[CMP6:%.*]] = icmp slt i32 4, [[ADD5]]
357 // IR-NEXT:    br i1 [[CMP6]], label %[[COND_TRUE7:.*]], label %[[COND_FALSE8:.*]]
358 // IR:         [[COND_TRUE7]]:
359 // IR-NEXT:    br label %[[COND_END10:.*]]
360 // IR:         [[COND_FALSE8]]:
361 // IR-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
362 // IR-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP11]], 5
363 // IR-NEXT:    br label %[[COND_END10]]
364 // IR:         [[COND_END10]]:
365 // IR-NEXT:    [[COND11:%.*]] = phi i32 [ 4, %[[COND_TRUE7]] ], [ [[ADD9]], %[[COND_FALSE8]] ]
366 // IR-NEXT:    [[CMP12:%.*]] = icmp slt i32 [[TMP9]], [[COND11]]
367 // IR-NEXT:    br i1 [[CMP12]], label %[[FOR_BODY13:.*]], label %[[FOR_END30:.*]]
368 // IR:         [[FOR_BODY13]]:
369 // IR-NEXT:    [[TMP12:%.+]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
370 // IR-NEXT:    [[MUL13:%.+]] = mul nsw i32 [[TMP12]], 3
371 // IR-NEXT:    [[ADD14:%.+]] = add nsw i32 7, [[MUL13]]
372 // IR-NEXT:    store i32 [[ADD14]], i32* [[I]], align 4
373 // IR-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
374 // IR-NEXT:    store i32 [[TMP12]], i32* [[DOTTILE_1_IV_J]], align 4
375 // IR-NEXT:    br label %[[FOR_COND14:.*]]
376 // IR:         [[FOR_COND14]]:
377 // IR-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4
378 // IR-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
379 // IR-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP14]], 5
380 // IR-NEXT:    [[CMP16:%.*]] = icmp slt i32 4, [[ADD15]]
381 // IR-NEXT:    br i1 [[CMP16]], label %[[COND_TRUE17:.*]], label %[[COND_FALSE18:.*]]
382 // IR:         [[COND_TRUE17]]:
383 // IR-NEXT:    br label %[[COND_END20:.*]]
384 // IR:         [[COND_FALSE18]]:
385 // IR-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
386 // IR-NEXT:    [[ADD19:%.*]] = add nsw i32 [[TMP15]], 5
387 // IR-NEXT:    br label %[[COND_END20]]
388 // IR:         [[COND_END20]]:
389 // IR-NEXT:    [[COND21:%.*]] = phi i32 [ 4, %[[COND_TRUE17]] ], [ [[ADD19]], %[[COND_FALSE18]] ]
390 // IR-NEXT:    [[CMP22:%.*]] = icmp slt i32 [[TMP13]], [[COND21]]
391 // IR-NEXT:    br i1 [[CMP22]], label %[[FOR_BODY23:.*]], label %[[FOR_END:.*]]
392 // IR:         [[FOR_BODY23]]:
393 // IR-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4
394 // IR-NEXT:    [[MUL26:%.*]] = mul nsw i32 [[TMP17]], 3
395 // IR-NEXT:    [[ADD27:%.*]] = add nsw i32 7, [[MUL26]]
396 // IR-NEXT:    store i32 [[ADD27]], i32* [[J]], align 4
397 // IR-NEXT:    [[TMP18:%.*]] = load i32, i32* [[I]], align 4
398 // IR-NEXT:    [[TMP19:%.*]] = load i32, i32* [[J]], align 4
399 // IR-NEXT:    call void (...) @body(i32 [[TMP18]], i32 [[TMP19]])
400 // IR-NEXT:    br label %[[FOR_INC:.*]]
401 // IR:         [[FOR_INC]]:
402 // IR-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4
403 // IR-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP20]], 1
404 // IR-NEXT:    store i32 [[INC]], i32* [[DOTTILE_1_IV_J]], align 4
405 // IR-NEXT:    br label %[[FOR_COND14]]
406 // IR:         [[FOR_END]]:
407 // IR-NEXT:    br label %[[FOR_INC28:.*]]
408 // IR:         [[FOR_INC28]]:
409 // IR-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
410 // IR-NEXT:    [[INC29:%.*]] = add nsw i32 [[TMP21]], 1
411 // IR-NEXT:    store i32 [[INC29]], i32* [[DOTTILE_0_IV_I]], align 4
412 // IR-NEXT:    br label %[[FOR_COND4]]
413 // IR:         [[FOR_END30]]:
414 // IR-NEXT:    br label %[[FOR_INC31:.*]]
415 // IR:         [[FOR_INC31]]:
416 // IR-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
417 // IR-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP22]], 5
418 // IR-NEXT:    store i32 [[ADD32]], i32* [[DOTFLOOR_1_IV_J]], align 4
419 // IR-NEXT:    br label %[[FOR_COND]]
420 // IR:         [[FOR_END33]]:
421 // IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.*]]
422 // IR:         [[OMP_BODY_CONTINUE]]:
423 // IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.*]]
424 // IR:         [[OMP_INNER_FOR_INC]]:
425 // IR-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
426 // IR-NEXT:    [[ADD34:%.*]] = add nsw i32 [[TMP23]], 1
427 // IR-NEXT:    store i32 [[ADD34]], i32* [[DOTOMP_IV]], align 4
428 // IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
429 // IR:         [[OMP_INNER_FOR_END]]:
430 // IR-NEXT:    br label %[[OMP_LOOP_EXIT:.*]]
431 // IR:         [[OMP_LOOP_EXIT]]:
432 // IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[TMP0]])
433 // IR-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB3:@.*]], i32 [[TMP0]])
434 // IR-NEXT:    ret void
435 //
436 extern "C" void foo3() {
437 #pragma omp for
438 #pragma omp tile sizes(5,5)
439     for (int i = 7; i < 17; i += 3)
440       for (int j = 7; j < 17; j += 3)
441         body(i, j);
442 }
443 
444 // IR-LABEL: @foo4(
445 // IR-NEXT:  entry:
446 // IR-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
447 // IR-NEXT:    [[TMP:%.*]] = alloca i32, align 4
448 // IR-NEXT:    [[TMP1:%.*]] = alloca i32, align 4
449 // IR-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
450 // IR-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
451 // IR-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
452 // IR-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
453 // IR-NEXT:    [[K:%.*]] = alloca i32, align 4
454 // IR-NEXT:    [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4
455 // IR-NEXT:    [[I:%.*]] = alloca i32, align 4
456 // IR-NEXT:    [[J:%.*]] = alloca i32, align 4
457 // IR-NEXT:    [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4
458 // IR-NEXT:    [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4
459 // IR-NEXT:    [[DOTTILE_1_IV_J:%.*]] = alloca i32, align 4
460 // IR-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB2]])
461 // IR-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
462 // IR-NEXT:    store i32 3, i32* [[DOTOMP_UB]], align 4
463 // IR-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
464 // IR-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
465 // IR-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* [[GLOB1]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
466 // IR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
467 // IR-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 3
468 // IR-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
469 // IR:         [[COND_TRUE]]:
470 // IR-NEXT:    br label %[[COND_END:.*]]
471 // IR:         [[COND_FALSE]]:
472 // IR-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
473 // IR-NEXT:    br label %[[COND_END]]
474 // IR:         [[COND_END]]:
475 // IR-NEXT:    [[COND:%.*]] = phi i32 [ 3, %[[COND_TRUE]] ], [ [[TMP2]], %[[COND_FALSE]] ]
476 // IR-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
477 // IR-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
478 // IR-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4
479 // IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.*]]
480 // IR:         [[OMP_INNER_FOR_COND]]:
481 // IR-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
482 // IR-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
483 // IR-NEXT:    [[CMP3:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
484 // IR-NEXT:    br i1 [[CMP3]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
485 // IR:         [[OMP_INNER_FOR_BODY]]:
486 // IR-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
487 // IR-NEXT:    [[DIV:%.*]] = sdiv i32 [[TMP6]], 1
488 // IR-NEXT:    [[MUL:%.*]] = mul nsw i32 [[DIV]], 3
489 // IR-NEXT:    [[ADD:%.*]] = add nsw i32 7, [[MUL]]
490 // IR-NEXT:    store i32 [[ADD]], i32* [[K]], align 4
491 // IR-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
492 // IR-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
493 // IR-NEXT:    [[DIV4:%.*]] = sdiv i32 [[TMP8]], 1
494 // IR-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 1
495 // IR-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL5]]
496 // IR-NEXT:    [[MUL6:%.*]] = mul nsw i32 [[SUB]], 5
497 // IR-NEXT:    [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
498 // IR-NEXT:    store i32 [[ADD7]], i32* [[DOTFLOOR_0_IV_I]], align 4
499 // IR-NEXT:    store i32 7, i32* [[I]], align 4
500 // IR-NEXT:    store i32 7, i32* [[J]], align 4
501 // IR-NEXT:    store i32 0, i32* [[DOTFLOOR_1_IV_J]], align 4
502 // IR-NEXT:    br label %[[FOR_COND:.*]]
503 // IR:         [[FOR_COND]]:
504 // IR-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
505 // IR-NEXT:    [[CMP8:%.*]] = icmp slt i32 [[TMP9]], 4
506 // IR-NEXT:    br i1 [[CMP8]], label %[[FOR_BODY:.*]], label %[[FOR_END38:.*]]
507 // IR:         [[FOR_BODY]]:
508 // IR-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
509 // IR-NEXT:    store i32 [[TMP10]], i32* [[DOTTILE_0_IV_I]], align 4
510 // IR-NEXT:    br label %[[FOR_COND9:.*]]
511 // IR:         [[FOR_COND9]]:
512 // IR-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
513 // IR-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
514 // IR-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP12]], 5
515 // IR-NEXT:    [[CMP11:%.*]] = icmp slt i32 4, [[ADD10]]
516 // IR-NEXT:    br i1 [[CMP11]], label %[[COND_TRUE12:.*]], label %[[COND_FALSE13:.*]]
517 // IR:         [[COND_TRUE12]]:
518 // IR-NEXT:    br label %[[COND_END15:.*]]
519 // IR:         [[COND_FALSE13]]:
520 // IR-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
521 // IR-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP13]], 5
522 // IR-NEXT:    br label %[[COND_END15]]
523 // IR:         [[COND_END15]]:
524 // IR-NEXT:    [[COND16:%.*]] = phi i32 [ 4, %[[COND_TRUE12]] ], [ [[ADD14]], %[[COND_FALSE13]] ]
525 // IR-NEXT:    [[CMP17:%.*]] = icmp slt i32 [[TMP11]], [[COND16]]
526 // IR-NEXT:    br i1 [[CMP17]], label %[[FOR_BODY18:.*]], label %[[FOR_END35:.*]]
527 // IR:         [[FOR_BODY18]]:
528 // IR-NEXT:    [[TMP14:%.+]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
529 // IR-NEXT:    [[MUL18:%.+]] = mul nsw i32 [[TMP14]], 3
530 // IR-NEXT:    [[ADD19:%.+]] = add nsw i32 7, [[MUL18]]
531 // IR-NEXT:    store i32 [[ADD19]], i32* [[I]], align 4
532 // IR-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
533 // IR-NEXT:    store i32 [[TMP14]], i32* [[DOTTILE_1_IV_J]], align 4
534 // IR-NEXT:    br label %[[FOR_COND19:.*]]
535 // IR:         [[FOR_COND19]]:
536 // IR-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4
537 // IR-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
538 // IR-NEXT:    [[ADD20:%.*]] = add nsw i32 [[TMP16]], 5
539 // IR-NEXT:    [[CMP21:%.*]] = icmp slt i32 4, [[ADD20]]
540 // IR-NEXT:    br i1 [[CMP21]], label %[[COND_TRUE22:.*]], label %[[COND_FALSE23:.*]]
541 // IR:         [[COND_TRUE22]]:
542 // IR-NEXT:    br label %[[COND_END25:.*]]
543 // IR:         [[COND_FALSE23]]:
544 // IR-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
545 // IR-NEXT:    [[ADD24:%.*]] = add nsw i32 [[TMP17]], 5
546 // IR-NEXT:    br label %[[COND_END25]]
547 // IR:         [[COND_END25]]:
548 // IR-NEXT:    [[COND26:%.*]] = phi i32 [ 4, %[[COND_TRUE22]] ], [ [[ADD24]], %[[COND_FALSE23]] ]
549 // IR-NEXT:    [[CMP27:%.*]] = icmp slt i32 [[TMP15]], [[COND26]]
550 // IR-NEXT:    br i1 [[CMP27]], label %[[FOR_BODY28:.*]], label %[[FOR_END:.*]]
551 // IR:         [[FOR_BODY28]]:
552 // IR-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4
553 // IR-NEXT:    [[MUL31:%.*]] = mul nsw i32 [[TMP19]], 3
554 // IR-NEXT:    [[ADD32:%.*]] = add nsw i32 7, [[MUL31]]
555 // IR-NEXT:    store i32 [[ADD32]], i32* [[J]], align 4
556 // IR-NEXT:    [[TMP20:%.*]] = load i32, i32* [[I]], align 4
557 // IR-NEXT:    [[TMP21:%.*]] = load i32, i32* [[J]], align 4
558 // IR-NEXT:    call void (...) @body(i32 [[TMP20]], i32 [[TMP21]])
559 // IR-NEXT:    br label %[[FOR_INC:.*]]
560 // IR:         [[FOR_INC]]:
561 // IR-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4
562 // IR-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP22]], 1
563 // IR-NEXT:    store i32 [[INC]], i32* [[DOTTILE_1_IV_J]], align 4
564 // IR-NEXT:    br label %[[FOR_COND19]]
565 // IR:         [[FOR_END]]:
566 // IR-NEXT:    br label %[[FOR_INC33:.*]]
567 // IR:         [[FOR_INC33]]:
568 // IR-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
569 // IR-NEXT:    [[INC34:%.*]] = add nsw i32 [[TMP23]], 1
570 // IR-NEXT:    store i32 [[INC34]], i32* [[DOTTILE_0_IV_I]], align 4
571 // IR-NEXT:    br label %[[FOR_COND9]]
572 // IR:         [[FOR_END35]]:
573 // IR-NEXT:    br label %[[FOR_INC36:.*]]
574 // IR:         [[FOR_INC36]]:
575 // IR-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4
576 // IR-NEXT:    [[ADD37:%.*]] = add nsw i32 [[TMP24]], 5
577 // IR-NEXT:    store i32 [[ADD37]], i32* [[DOTFLOOR_1_IV_J]], align 4
578 // IR-NEXT:    br label %[[FOR_COND]]
579 // IR:         [[FOR_END38]]:
580 // IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.*]]
581 // IR:         [[OMP_BODY_CONTINUE]]:
582 // IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.*]]
583 // IR:         [[OMP_INNER_FOR_INC]]:
584 // IR-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
585 // IR-NEXT:    [[ADD39:%.*]] = add nsw i32 [[TMP25]], 1
586 // IR-NEXT:    store i32 [[ADD39]], i32* [[DOTOMP_IV]], align 4
587 // IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
588 // IR:         [[OMP_INNER_FOR_END]]:
589 // IR-NEXT:    br label %[[OMP_LOOP_EXIT:.*]]
590 // IR:         [[OMP_LOOP_EXIT]]:
591 // IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[TMP0]])
592 // IR-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB3]], i32 [[TMP0]])
593 // IR-NEXT:    ret void
594 //
595 extern "C" void foo4() {
596 #pragma omp for collapse(2)
597   for (int k = 7; k < 17; k += 3)
598 #pragma omp tile sizes(5,5)
599   for (int i = 7; i < 17; i += 3)
600     for (int j = 7; j < 17; j += 3)
601       body(i, j);
602 }
603 
604 
605 // IR-LABEL: @foo5(
606 // IR-NEXT:  entry:
607 // IR-NEXT:    [[DOTOMP_IV:%.*]] = alloca i64, align 8
608 // IR-NEXT:    [[TMP:%.*]] = alloca i32, align 4
609 // IR-NEXT:    [[TMP1:%.*]] = alloca i32, align 4
610 // IR-NEXT:    [[TMP2:%.*]] = alloca i32, align 4
611 // IR-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
612 // IR-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
613 // IR-NEXT:    [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8
614 // IR-NEXT:    [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4
615 // IR-NEXT:    [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4
616 // IR-NEXT:    [[J:%.*]] = alloca i32, align 4
617 // IR-NEXT:    [[DOTOMP_LB:%.*]] = alloca i64, align 8
618 // IR-NEXT:    [[DOTOMP_UB:%.*]] = alloca i64, align 8
619 // IR-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
620 // IR-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
621 // IR-NEXT:    [[DOTFLOOR_0_IV_I10:%.*]] = alloca i32, align 4
622 // IR-NEXT:    [[DOTTILE_0_IV_I11:%.*]] = alloca i32, align 4
623 // IR-NEXT:    [[J15:%.*]] = alloca i32, align 4
624 // IR-NEXT:    [[I:%.*]] = alloca i32, align 4
625 // IR-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB2]])
626 // IR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
627 // IR-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4
628 // IR-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP]], align 4
629 // IR-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 5
630 // IR-NEXT:    [[CMP:%.*]] = icmp slt i32 4, [[ADD]]
631 // IR-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
632 // IR:         [[COND_TRUE]]:
633 // IR-NEXT:    br label %[[COND_END:.*]]
634 // IR:         [[COND_FALSE]]:
635 // IR-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP]], align 4
636 // IR-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP3]], 5
637 // IR-NEXT:    br label %[[COND_END]]
638 // IR:         [[COND_END]]:
639 // IR-NEXT:    [[COND:%.*]] = phi i32 [ 4, %[[COND_TRUE]] ], [ [[ADD4]], %[[COND_FALSE]] ]
640 // IR-NEXT:    store i32 [[COND]], i32* [[DOTCAPTURE_EXPR_3]], align 4
641 // IR-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
642 // IR-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
643 // IR-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]]
644 // IR-NEXT:    [[SUB6:%.*]] = sub i32 [[SUB]], 1
645 // IR-NEXT:    [[ADD7:%.*]] = add i32 [[SUB6]], 1
646 // IR-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD7]], 1
647 // IR-NEXT:    [[CONV:%.*]] = zext i32 [[DIV]] to i64
648 // IR-NEXT:    [[MUL:%.*]] = mul nsw i64 1, [[CONV]]
649 // IR-NEXT:    [[MUL8:%.*]] = mul nsw i64 [[MUL]], 4
650 // IR-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL8]], 1
651 // IR-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8
652 // IR-NEXT:    store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4
653 // IR-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
654 // IR-NEXT:    store i32 [[TMP6]], i32* [[DOTTILE_0_IV_I]], align 4
655 // IR-NEXT:    store i32 7, i32* [[J]], align 4
656 // IR-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
657 // IR-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
658 // IR-NEXT:    [[CMP12:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]]
659 // IR-NEXT:    br i1 [[CMP12]], label %[[OMP_PRECOND_THEN:.*]], label %[[OMP_PRECOND_END:.*]]
660 // IR:         [[OMP_PRECOND_THEN]]:
661 // IR-NEXT:    store i64 0, i64* [[DOTOMP_LB]], align 8
662 // IR-NEXT:    [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8
663 // IR-NEXT:    store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8
664 // IR-NEXT:    store i64 1, i64* [[DOTOMP_STRIDE]], align 8
665 // IR-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
666 // IR-NEXT:    call void @__kmpc_for_static_init_8(%struct.ident_t* [[GLOB1]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1)
667 // IR-NEXT:    [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8
668 // IR-NEXT:    [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8
669 // IR-NEXT:    [[CMP16:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]]
670 // IR-NEXT:    br i1 [[CMP16]], label %[[COND_TRUE17:.*]], label %[[COND_FALSE18:.*]]
671 // IR:         [[COND_TRUE17]]:
672 // IR-NEXT:    [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8
673 // IR-NEXT:    br label %[[COND_END19:.*]]
674 // IR:         [[COND_FALSE18]]:
675 // IR-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8
676 // IR-NEXT:    br label %[[COND_END19]]
677 // IR:         [[COND_END19]]:
678 // IR-NEXT:    [[COND20:%.*]] = phi i64 [ [[TMP12]], %[[COND_TRUE17]] ], [ [[TMP13]], %[[COND_FALSE18]] ]
679 // IR-NEXT:    store i64 [[COND20]], i64* [[DOTOMP_UB]], align 8
680 // IR-NEXT:    [[TMP14:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8
681 // IR-NEXT:    store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8
682 // IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.*]]
683 // IR:         [[OMP_INNER_FOR_COND]]:
684 // IR-NEXT:    [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
685 // IR-NEXT:    [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8
686 // IR-NEXT:    [[CMP21:%.*]] = icmp sle i64 [[TMP15]], [[TMP16]]
687 // IR-NEXT:    br i1 [[CMP21]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
688 // IR:         [[OMP_INNER_FOR_BODY]]:
689 // IR-NEXT:    [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
690 // IR-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
691 // IR-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
692 // IR-NEXT:    [[SUB22:%.*]] = sub i32 [[TMP18]], [[TMP19]]
693 // IR-NEXT:    [[SUB23:%.*]] = sub i32 [[SUB22]], 1
694 // IR-NEXT:    [[ADD24:%.*]] = add i32 [[SUB23]], 1
695 // IR-NEXT:    [[DIV25:%.*]] = udiv i32 [[ADD24]], 1
696 // IR-NEXT:    [[MUL26:%.*]] = mul i32 1, [[DIV25]]
697 // IR-NEXT:    [[MUL27:%.*]] = mul i32 [[MUL26]], 4
698 // IR-NEXT:    [[CONV28:%.*]] = zext i32 [[MUL27]] to i64
699 // IR-NEXT:    [[DIV29:%.*]] = sdiv i64 [[TMP17]], [[CONV28]]
700 // IR-NEXT:    [[MUL30:%.*]] = mul nsw i64 [[DIV29]], 5
701 // IR-NEXT:    [[ADD31:%.*]] = add nsw i64 0, [[MUL30]]
702 // IR-NEXT:    [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32
703 // IR-NEXT:    store i32 [[CONV32]], i32* [[DOTFLOOR_0_IV_I10]], align 4
704 // IR-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
705 // IR-NEXT:    [[CONV33:%.*]] = sext i32 [[TMP20]] to i64
706 // IR-NEXT:    [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
707 // IR-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
708 // IR-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
709 // IR-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
710 // IR-NEXT:    [[SUB34:%.*]] = sub i32 [[TMP23]], [[TMP24]]
711 // IR-NEXT:    [[SUB35:%.*]] = sub i32 [[SUB34]], 1
712 // IR-NEXT:    [[ADD36:%.*]] = add i32 [[SUB35]], 1
713 // IR-NEXT:    [[DIV37:%.*]] = udiv i32 [[ADD36]], 1
714 // IR-NEXT:    [[MUL38:%.*]] = mul i32 1, [[DIV37]]
715 // IR-NEXT:    [[MUL39:%.*]] = mul i32 [[MUL38]], 4
716 // IR-NEXT:    [[CONV40:%.*]] = zext i32 [[MUL39]] to i64
717 // IR-NEXT:    [[DIV41:%.*]] = sdiv i64 [[TMP22]], [[CONV40]]
718 // IR-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
719 // IR-NEXT:    [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
720 // IR-NEXT:    [[SUB42:%.*]] = sub i32 [[TMP25]], [[TMP26]]
721 // IR-NEXT:    [[SUB43:%.*]] = sub i32 [[SUB42]], 1
722 // IR-NEXT:    [[ADD44:%.*]] = add i32 [[SUB43]], 1
723 // IR-NEXT:    [[DIV45:%.*]] = udiv i32 [[ADD44]], 1
724 // IR-NEXT:    [[MUL46:%.*]] = mul i32 1, [[DIV45]]
725 // IR-NEXT:    [[MUL47:%.*]] = mul i32 [[MUL46]], 4
726 // IR-NEXT:    [[CONV48:%.*]] = zext i32 [[MUL47]] to i64
727 // IR-NEXT:    [[MUL49:%.*]] = mul nsw i64 [[DIV41]], [[CONV48]]
728 // IR-NEXT:    [[SUB50:%.*]] = sub nsw i64 [[TMP21]], [[MUL49]]
729 // IR-NEXT:    [[DIV51:%.*]] = sdiv i64 [[SUB50]], 4
730 // IR-NEXT:    [[MUL52:%.*]] = mul nsw i64 [[DIV51]], 1
731 // IR-NEXT:    [[ADD53:%.*]] = add nsw i64 [[CONV33]], [[MUL52]]
732 // IR-NEXT:    [[CONV54:%.*]] = trunc i64 [[ADD53]] to i32
733 // IR-NEXT:    store i32 [[CONV54]], i32* [[DOTTILE_0_IV_I11]], align 4
734 // IR-NEXT:    [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
735 // IR-NEXT:    [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
736 // IR-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
737 // IR-NEXT:    [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
738 // IR-NEXT:    [[SUB55:%.*]] = sub i32 [[TMP29]], [[TMP30]]
739 // IR-NEXT:    [[SUB56:%.*]] = sub i32 [[SUB55]], 1
740 // IR-NEXT:    [[ADD57:%.*]] = add i32 [[SUB56]], 1
741 // IR-NEXT:    [[DIV58:%.*]] = udiv i32 [[ADD57]], 1
742 // IR-NEXT:    [[MUL59:%.*]] = mul i32 1, [[DIV58]]
743 // IR-NEXT:    [[MUL60:%.*]] = mul i32 [[MUL59]], 4
744 // IR-NEXT:    [[CONV61:%.*]] = zext i32 [[MUL60]] to i64
745 // IR-NEXT:    [[DIV62:%.*]] = sdiv i64 [[TMP28]], [[CONV61]]
746 // IR-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
747 // IR-NEXT:    [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
748 // IR-NEXT:    [[SUB63:%.*]] = sub i32 [[TMP31]], [[TMP32]]
749 // IR-NEXT:    [[SUB64:%.*]] = sub i32 [[SUB63]], 1
750 // IR-NEXT:    [[ADD65:%.*]] = add i32 [[SUB64]], 1
751 // IR-NEXT:    [[DIV66:%.*]] = udiv i32 [[ADD65]], 1
752 // IR-NEXT:    [[MUL67:%.*]] = mul i32 1, [[DIV66]]
753 // IR-NEXT:    [[MUL68:%.*]] = mul i32 [[MUL67]], 4
754 // IR-NEXT:    [[CONV69:%.*]] = zext i32 [[MUL68]] to i64
755 // IR-NEXT:    [[MUL70:%.*]] = mul nsw i64 [[DIV62]], [[CONV69]]
756 // IR-NEXT:    [[SUB71:%.*]] = sub nsw i64 [[TMP27]], [[MUL70]]
757 // IR-NEXT:    [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
758 // IR-NEXT:    [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
759 // IR-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
760 // IR-NEXT:    [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
761 // IR-NEXT:    [[SUB72:%.*]] = sub i32 [[TMP35]], [[TMP36]]
762 // IR-NEXT:    [[SUB73:%.*]] = sub i32 [[SUB72]], 1
763 // IR-NEXT:    [[ADD74:%.*]] = add i32 [[SUB73]], 1
764 // IR-NEXT:    [[DIV75:%.*]] = udiv i32 [[ADD74]], 1
765 // IR-NEXT:    [[MUL76:%.*]] = mul i32 1, [[DIV75]]
766 // IR-NEXT:    [[MUL77:%.*]] = mul i32 [[MUL76]], 4
767 // IR-NEXT:    [[CONV78:%.*]] = zext i32 [[MUL77]] to i64
768 // IR-NEXT:    [[DIV79:%.*]] = sdiv i64 [[TMP34]], [[CONV78]]
769 // IR-NEXT:    [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
770 // IR-NEXT:    [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
771 // IR-NEXT:    [[SUB80:%.*]] = sub i32 [[TMP37]], [[TMP38]]
772 // IR-NEXT:    [[SUB81:%.*]] = sub i32 [[SUB80]], 1
773 // IR-NEXT:    [[ADD82:%.*]] = add i32 [[SUB81]], 1
774 // IR-NEXT:    [[DIV83:%.*]] = udiv i32 [[ADD82]], 1
775 // IR-NEXT:    [[MUL84:%.*]] = mul i32 1, [[DIV83]]
776 // IR-NEXT:    [[MUL85:%.*]] = mul i32 [[MUL84]], 4
777 // IR-NEXT:    [[CONV86:%.*]] = zext i32 [[MUL85]] to i64
778 // IR-NEXT:    [[MUL87:%.*]] = mul nsw i64 [[DIV79]], [[CONV86]]
779 // IR-NEXT:    [[SUB88:%.*]] = sub nsw i64 [[TMP33]], [[MUL87]]
780 // IR-NEXT:    [[DIV89:%.*]] = sdiv i64 [[SUB88]], 4
781 // IR-NEXT:    [[MUL90:%.*]] = mul nsw i64 [[DIV89]], 4
782 // IR-NEXT:    [[SUB91:%.*]] = sub nsw i64 [[SUB71]], [[MUL90]]
783 // IR-NEXT:    [[MUL92:%.*]] = mul nsw i64 [[SUB91]], 3
784 // IR-NEXT:    [[ADD93:%.*]] = add nsw i64 7, [[MUL92]]
785 // IR-NEXT:    [[CONV94:%.*]] = trunc i64 [[ADD93]] to i32
786 // IR-NEXT:    store i32 [[CONV94]], i32* [[J15]], align 4
787 // IR-NEXT:    store i32 7, i32* [[I]], align 4
788 // IR-NEXT:    [[TMP39:%.*]] = load i32, i32* [[DOTTILE_0_IV_I11]], align 4
789 // IR-NEXT:    [[MUL95:%.*]] = mul nsw i32 [[TMP39]], 3
790 // IR-NEXT:    [[ADD96:%.*]] = add nsw i32 7, [[MUL95]]
791 // IR-NEXT:    store i32 [[ADD96]], i32* [[I]], align 4
792 // IR-NEXT:    [[TMP40:%.*]] = load i32, i32* [[I]], align 4
793 // IR-NEXT:    [[TMP41:%.*]] = load i32, i32* [[J15]], align 4
794 // IR-NEXT:    call void (...) @body(i32 [[TMP40]], i32 [[TMP41]])
795 // IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.*]]
796 // IR:         [[OMP_BODY_CONTINUE]]:
797 // IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.*]]
798 // IR:         [[OMP_INNER_FOR_INC]]:
799 // IR-NEXT:    [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
800 // IR-NEXT:    [[ADD97:%.*]] = add nsw i64 [[TMP42]], 1
801 // IR-NEXT:    store i64 [[ADD97]], i64* [[DOTOMP_IV]], align 8
802 // IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
803 // IR:         [[OMP_INNER_FOR_END]]:
804 // IR-NEXT:    br label %[[OMP_LOOP_EXIT:.*]]
805 // IR:         [[OMP_LOOP_EXIT]]:
806 // IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[TMP0]])
807 // IR-NEXT:    br label %[[OMP_PRECOND_END]]
808 // IR:         [[OMP_PRECOND_END]]:
809 // IR-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB3]], i32 [[TMP0]])
810 // IR-NEXT:    ret void
811 //
812 extern "C" void foo5() {
813 #pragma omp for collapse(3)
814 #pragma omp tile sizes(5)
815   for (int i = 7; i < 17; i += 3)
816     for (int j = 7; j < 17; j += 3)
817       body(i, j);
818 }
819 
820 
821 // IR-LABEL: @foo6(
822 // IR-NEXT:  entry:
823 // IR-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
824 // IR-NEXT:    ret void
825 //
826 // IR-LABEL: @.omp_outlined.(
827 // IR-NEXT:  entry:
828 // IR-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
829 // IR-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
830 // IR-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
831 // IR-NEXT:    [[TMP:%.*]] = alloca i32, align 4
832 // IR-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
833 // IR-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
834 // IR-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
835 // IR-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
836 // IR-NEXT:    [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4
837 // IR-NEXT:    [[I:%.*]] = alloca i32, align 4
838 // IR-NEXT:    [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4
839 // IR-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
840 // IR-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
841 // IR-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
842 // IR-NEXT:    store i32 0, i32* [[DOTOMP_UB]], align 4
843 // IR-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
844 // IR-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
845 // IR-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
846 // IR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
847 // IR-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* [[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
848 // IR-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
849 // IR-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0
850 // IR-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
851 // IR:         [[COND_TRUE]]:
852 // IR-NEXT:    br label %[[COND_END:.*]]
853 // IR:         [[COND_FALSE]]:
854 // IR-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
855 // IR-NEXT:    br label %[[COND_END]]
856 // IR:         [[COND_END]]:
857 // IR-NEXT:    [[COND:%.*]] = phi i32 [ 0, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ]
858 // IR-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
859 // IR-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
860 // IR-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
861 // IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.*]]
862 // IR:         [[OMP_INNER_FOR_COND]]:
863 // IR-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
864 // IR-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
865 // IR-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
866 // IR-NEXT:    br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
867 // IR:         [[OMP_INNER_FOR_BODY]]:
868 // IR-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
869 // IR-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5
870 // IR-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
871 // IR-NEXT:    store i32 [[ADD]], i32* [[DOTFLOOR_0_IV_I]], align 4
872 // IR-NEXT:    store i32 7, i32* [[I]], align 4
873 // IR-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
874 // IR-NEXT:    store i32 [[TMP8]], i32* [[DOTTILE_0_IV_I]], align 4
875 // IR-NEXT:    br label %[[FOR_COND:.*]]
876 // IR:         [[FOR_COND]]:
877 // IR-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
878 // IR-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
879 // IR-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP10]], 5
880 // IR-NEXT:    [[CMP4:%.*]] = icmp slt i32 4, [[ADD3]]
881 // IR-NEXT:    br i1 [[CMP4]], label %[[COND_TRUE5:.*]], label %[[COND_FALSE6:.*]]
882 // IR:         [[COND_TRUE5]]:
883 // IR-NEXT:    br label %[[COND_END8:.*]]
884 // IR:         [[COND_FALSE6]]:
885 // IR-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
886 // IR-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP11]], 5
887 // IR-NEXT:    br label %[[COND_END8]]
888 // IR:         [[COND_END8]]:
889 // IR-NEXT:    [[COND9:%.*]] = phi i32 [ 4, %[[COND_TRUE5]] ], [ [[ADD7]], %[[COND_FALSE6]] ]
890 // IR-NEXT:    [[CMP10:%.*]] = icmp slt i32 [[TMP9]], [[COND9]]
891 // IR-NEXT:    br i1 [[CMP10]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
892 // IR:         [[FOR_BODY]]:
893 // IR-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
894 // IR-NEXT:    [[MUL11:%.*]] = mul nsw i32 [[TMP12]], 3
895 // IR-NEXT:    [[ADD12:%.*]] = add nsw i32 7, [[MUL11]]
896 // IR-NEXT:    store i32 [[ADD12]], i32* [[I]], align 4
897 // IR-NEXT:    [[TMP13:%.*]] = load i32, i32* [[I]], align 4
898 // IR-NEXT:    call void (...) @body(i32 [[TMP13]])
899 // IR-NEXT:    br label %[[FOR_INC:.*]]
900 // IR:         [[FOR_INC]]:
901 // IR-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
902 // IR-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP14]], 1
903 // IR-NEXT:    store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4
904 // IR-NEXT:    br label %[[FOR_COND]]
905 // IR:         [[FOR_END]]:
906 // IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.*]]
907 // IR:         [[OMP_BODY_CONTINUE]]:
908 // IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.*]]
909 // IR:         [[OMP_INNER_FOR_INC]]:
910 // IR-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
911 // IR-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP15]], 1
912 // IR-NEXT:    store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4
913 // IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
914 // IR:         [[OMP_INNER_FOR_END]]:
915 // IR-NEXT:    br label %[[OMP_LOOP_EXIT:.*]]
916 // IR:         [[OMP_LOOP_EXIT]]:
917 // IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[TMP1]])
918 // IR-NEXT:    ret void
919 //
920 extern "C" void foo6() {
921 #pragma omp parallel for
922 #pragma omp tile sizes(5)
923   for (int i = 7; i < 17; i += 3)
924     body(i);
925 }
926 
927 
928 template<typename T, T Step, T Tile>
929 void foo7(T start, T end) {
930 #pragma omp tile sizes(Tile)
931   for (T i = start; i < end; i += Step)
932     body(i);
933 }
934 
935 // IR-LABEL: define {{.*}}void @tfoo7(
936 // IR-NEXT:  entry:
937 // IR-NEXT:    call void @_Z4foo7IiLi3ELi5EEvT_S0_(i32 0, i32 42)
938 // IR-NEXT:    ret void
939 //
940 // IR-LABEL: define linkonce_odr void @_Z4foo7IiLi3ELi5EEvT_S0_(
941 // IR-NEXT:  entry:
942 // IR-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
943 // IR-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
944 // IR-NEXT:    [[CAPTURE_EXPR:%.+]] = alloca i32, align 4
945 // IR-NEXT:    [[CAPTURE_EXPR1:%.+]] = alloca i32, align 4
946 // IR-NEXT:    [[CAPTURE_EXPR2:%.+]] = alloca i32, align 4
947 // IR-NEXT:    [[I:%.*]] = alloca i32, align 4
948 // IR-NEXT:    [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4
949 // IR-NEXT:    [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4
950 // IR-NEXT:    store i32 [[START:%.*]], i32* [[START_ADDR]], align 4
951 // IR-NEXT:    store i32 [[END:%.*]], i32* [[END_ADDR]], align 4
952 // IR-NEXT:    [[TMP0:%.+]] = load i32, i32* [[START_ADDR]], align 4
953 // IR-NEXT:    store i32 [[TMP0]], i32* [[CAPTURE_EXPR]], align 4
954 // IR-NEXT:    [[TMP1:%.+]] = load i32, i32* [[END_ADDR]], align 4
955 // IR-NEXT:    store i32 [[TMP1]], i32* [[CAPTURE_EXPR1]], align 4
956 // IR-NEXT:    [[TMP2:%.+]] = load i32, i32* [[CAPTURE_EXPR1]], align 4
957 // IR-NEXT:    [[TMP3:%.+]] = load i32, i32* [[CAPTURE_EXPR]], align 4
958 // IR-NEXT:    [[SUB:%.+]] = sub i32 [[TMP2]], [[TMP3]]
959 // IR-NEXT:    [[SUB3:%.+]] = sub i32 [[SUB]], 1
960 // IR-NEXT:    [[ADD:%.+]] = add i32 [[SUB3]], 3
961 // IR-NEXT:    [[DIV:%.+]] = udiv i32 [[ADD]], 3
962 // IR-NEXT:    [[SUB4:%.+]] = sub i32 [[DIV]], 1
963 // IR-NEXT:    store i32 [[SUB4]], i32* [[CAPTURE_EXPR2]], align 4
964 // IR-NEXT:    [[TMP4:%.+]] = load i32, i32* [[START_ADDR]], align 4
965 // IR-NEXT:    store i32 [[TMP4]], i32* [[I]], align 4
966 // IR-NEXT:    store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4
967 // IR-NEXT:    br label %[[FOR_COND:.*]]
968 // IR:         [[FOR_COND]]:
969 // IR-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
970 // IR-NEXT:    [[TMP6:%.+]] = load i32, i32* [[CAPTURE_EXPR2]], align 4
971 // IR-NEXT:    [[ADD3:%.*]] = add i32 [[TMP6]], 1
972 // IR-NEXT:    [[CMP:%.*]] = icmp ult i32 [[TMP0]], [[ADD3]]
973 // IR-NEXT:    br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END25:.*]]
974 // IR:         [[FOR_BODY]]:
975 // IR-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
976 // IR-NEXT:    store i32 [[TMP3]], i32* [[DOTTILE_0_IV_I]], align 4
977 // IR-NEXT:    br label %[[FOR_COND4:.*]]
978 // IR:         [[FOR_COND4]]:
979 // IR-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
980 // IR-NEXT:    [[TMP5:%.*]] = load i32, i32* [[CAPTURE_EXPR2]], align 4
981 // IR-NEXT:    [[ADD10:%.*]] = add i32 [[TMP5]], 1
982 // IR-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
983 // IR-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP7]], 5
984 // IR-NEXT:    [[CMP12:%.*]] = icmp ult i32 [[ADD10]], [[ADD11]]
985 // IR-NEXT:    br i1 [[CMP12]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
986 // IR:         [[COND_TRUE]]:
987 // IR-NEXT:    [[TMP8:%.*]] = load i32, i32* [[CAPTURE_EXPR2]], align 4
988 // IR-NEXT:    [[ADD18:%.*]] = add i32 [[TMP8]], 1
989 // IR-NEXT:    br label %[[COND_END:.*]]
990 // IR:         [[COND_FALSE]]:
991 // IR-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
992 // IR-NEXT:    [[ADD19:%.*]] = add nsw i32 [[TMP10]], 5
993 // IR-NEXT:    br label %[[COND_END]]
994 // IR:         [[COND_END]]:
995 // IR-NEXT:    [[COND:%.*]] = phi i32 [ [[ADD18]], %[[COND_TRUE]] ], [ [[ADD19]], %[[COND_FALSE]] ]
996 // IR-NEXT:    [[CMP20:%.*]] = icmp ult i32 [[TMP4]], [[COND]]
997 // IR-NEXT:    br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END:.*]]
998 // IR:         [[FOR_BODY21]]:
999 // IR-NEXT:    [[TMP11:%.*]] = load i32, i32* [[CAPTURE_EXPR]], align 4
1000 // IR-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
1001 // IR-NEXT:    [[MUL:%.*]] = mul i32 [[TMP13]], 3
1002 // IR-NEXT:    [[ADD22:%.*]] = add i32 [[TMP11]], [[MUL]]
1003 // IR-NEXT:    store i32 [[ADD22]], i32* [[I]], align 4
1004 // IR-NEXT:    [[TMP14:%.*]] = load i32, i32* [[I]], align 4
1005 // IR-NEXT:    call void (...) @body(i32 [[TMP14]])
1006 // IR-NEXT:    br label %[[FOR_INC:.*]]
1007 // IR:         [[FOR_INC]]:
1008 // IR-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4
1009 // IR-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP15]], 1
1010 // IR-NEXT:    store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4
1011 // IR-NEXT:    br label %[[FOR_COND4]]
1012 // IR:         [[FOR_END]]:
1013 // IR-NEXT:    br label %[[FOR_INC23:.*]]
1014 // IR:         [[FOR_INC23]]:
1015 // IR-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4
1016 // IR-NEXT:    [[ADD24:%.*]] = add nsw i32 [[TMP16]], 5
1017 // IR-NEXT:    store i32 [[ADD24]], i32* [[DOTFLOOR_0_IV_I]], align 4
1018 // IR-NEXT:    br label %[[FOR_COND]]
1019 // IR:         [[FOR_END25]]:
1020 // IR-NEXT:    ret void
1021 //
1022 extern "C" void tfoo7() {
1023   foo7<int,3,5>(0, 42);
1024 }
1025 
1026 #endif /* HEADER */
1027