1 // Test host code gen
2 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
3 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
4 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
5 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
6 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
7 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
8 
9 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
10 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
11 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
12 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
13 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
14 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
15 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
16 
17 // RUN: %clang_cc1  -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
18 // RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
19 // RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
20 // RUN: %clang_cc1  -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
21 // RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
22 // RUN: %clang_cc1  -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
23 
24 // RUN: %clang_cc1  -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
25 // RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
26 // RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
27 // RUN: %clang_cc1  -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
28 // RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
29 // RUN: %clang_cc1  -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
30 // SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
31 // expected-no-diagnostics
32 #ifndef HEADER
33 #define HEADER
34 
35 
36 template <typename T>
37 T tmain() {
38   T *a, *b, *c;
39   int n = 10000;
40   int ch = 100;
41 
42   // no schedule clauses
43   #pragma omp target
44   #pragma omp teams
45   #pragma omp distribute parallel for
46   for (int i = 0; i < n; ++i) {
47     #pragma omp cancel for
48     a[i] = b[i] + c[i];
49   }
50 
51   // dist_schedule: static no chunk
52   #pragma omp target
53   #pragma omp teams
54   #pragma omp distribute parallel for dist_schedule(static)
55   for (int i = 0; i < n; ++i) {
56     a[i] = b[i] + c[i];
57   }
58 
59   // dist_schedule: static chunk
60   #pragma omp target
61   #pragma omp teams
62   #pragma omp distribute parallel for dist_schedule(static, ch)
63   for (int i = 0; i < n; ++i) {
64     a[i] = b[i] + c[i];
65   }
66 
67   // schedule: static no chunk
68   #pragma omp target
69   #pragma omp teams
70   #pragma omp distribute parallel for schedule(static)
71   for (int i = 0; i < n; ++i) {
72     a[i] = b[i] + c[i];
73   }
74 
75   // schedule: static chunk
76   #pragma omp target
77   #pragma omp teams
78   #pragma omp distribute parallel for schedule(static, ch)
79   for (int i = 0; i < n; ++i) {
80     a[i] = b[i] + c[i];
81   }
82 
83   // schedule: dynamic no chunk
84   #pragma omp target
85   #pragma omp teams
86   #pragma omp distribute parallel for schedule(dynamic)
87   for (int i = 0; i < n; ++i) {
88     a[i] = b[i] + c[i];
89   }
90 
91   // schedule: dynamic chunk
92   #pragma omp target
93   #pragma omp teams
94   #pragma omp distribute parallel for schedule(dynamic, ch)
95   for (int i = 0; i < n; ++i) {
96     a[i] = b[i] + c[i];
97   }
98 
99   return T();
100 }
101 
102 int main() {
103   double *a, *b, *c;
104   int n = 10000;
105   int ch = 100;
106 
107 #ifdef LAMBDA
108   // LAMBDA-LABEL: @main
109   // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
110   [&]() {
111     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
112 
113     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
114     // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]](
115 
116     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
117     // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]](
118 
119     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
120     // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]](
121 
122     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
123     // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]](
124 
125     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
126     // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]](
127 
128     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
129     // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]](
130 
131     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
132     // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]](
133 
134     // no schedule clauses
135     #pragma omp target
136     #pragma omp teams
137     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]](
138     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
139 
140     #pragma omp distribute parallel for
141     for (int i = 0; i < n; ++i) {
142       a[i] = b[i] + c[i];
143       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]](
144       // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
145       // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
146       // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
147       // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
148 
149       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
150 
151       // check EUB for distribute
152       // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
153       // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
154       // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
155       // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
156       // LAMBDA-DAG: [[EUB_TRUE]]:
157       // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
158       // LAMBDA: br label %[[EUB_END:.+]]
159       // LAMBDA-DAG: [[EUB_FALSE]]:
160       // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
161       // LAMBDA: br label %[[EUB_END]]
162       // LAMBDA-DAG: [[EUB_END]]:
163       // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
164       // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
165 
166       // initialize omp.iv
167       // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
168       // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
169       // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
170 
171       // check exit condition
172       // LAMBDA: [[OMP_JUMP_BACK]]:
173       // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
174       // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
175       // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
176       // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
177 
178       // check that PrevLB and PrevUB are passed to the 'for'
179       // LAMBDA: [[DIST_BODY]]:
180       // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
181       // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
182       // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
183       // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
184       // check that distlb and distub are properly passed to fork_call
185       // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
186       // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
187       // LAMBDA: br label %[[DIST_INC:.+]]
188 
189       // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
190       // LAMBDA: [[DIST_INC]]:
191       // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
192       // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
193       // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
194       // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
195       // LAMBDA: br label %[[OMP_JUMP_BACK]]
196 
197       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
198       // LAMBDA: ret
199 
200       // implementation of 'parallel for'
201       // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
202 
203       // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
204       // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
205       // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
206 
207       // initialize lb and ub to PrevLB and PrevUB
208       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
209       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
210       // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
211       // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
212       // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
213       // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
214       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
215       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
216       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
217       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
218       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
219 
220       // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
221       // In this case we use EUB
222       // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
223       // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
224       // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
225       // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
226       // LAMBDA: [[PF_EUB_TRUE]]:
227       // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
228       // LAMBDA: br label %[[PF_EUB_END:.+]]
229       // LAMBDA-DAG: [[PF_EUB_FALSE]]:
230       // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
231       // LAMBDA: br label %[[PF_EUB_END]]
232       // LAMBDA-DAG: [[PF_EUB_END]]:
233       // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
234       // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
235 
236       // initialize omp.iv
237       // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
238       // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
239       // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
240 
241       // check exit condition
242       // LAMBDA: [[OMP_PF_JUMP_BACK]]:
243       // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
244       // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
245       // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
246       // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
247 
248       // check that PrevLB and PrevUB are passed to the 'for'
249       // LAMBDA: [[PF_BODY]]:
250       // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
251       // LAMBDA: br label {{.+}}
252 
253       // check stride 1 for 'for' in 'distribute parallel for'
254       // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
255       // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
256       // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
257       // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
258 
259       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
260       // LAMBDA: ret
261 
262       [&]() {
263 	a[i] = b[i] + c[i];
264       }();
265     }
266 
267     // dist_schedule: static no chunk (same sa default - no dist_schedule)
268     #pragma omp target
269     #pragma omp teams
270     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]](
271     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
272 
273     #pragma omp distribute parallel for dist_schedule(static)
274     for (int i = 0; i < n; ++i) {
275       a[i] = b[i] + c[i];
276       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]](
277       // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
278       // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
279       // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
280       // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
281 
282       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
283 
284       // check EUB for distribute
285       // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
286       // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
287       // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
288       // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
289       // LAMBDA-DAG: [[EUB_TRUE]]:
290       // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
291       // LAMBDA: br label %[[EUB_END:.+]]
292       // LAMBDA-DAG: [[EUB_FALSE]]:
293       // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
294       // LAMBDA: br label %[[EUB_END]]
295       // LAMBDA-DAG: [[EUB_END]]:
296       // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
297       // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
298 
299       // initialize omp.iv
300       // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
301       // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
302       // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
303 
304       // check exit condition
305       // LAMBDA: [[OMP_JUMP_BACK]]:
306       // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
307       // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
308       // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
309       // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
310 
311       // check that PrevLB and PrevUB are passed to the 'for'
312       // LAMBDA: [[DIST_BODY]]:
313       // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
314       // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
315       // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
316       // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
317       // check that distlb and distub are properly passed to fork_call
318       // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
319       // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
320       // LAMBDA: br label %[[DIST_INC:.+]]
321 
322       // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
323       // LAMBDA: [[DIST_INC]]:
324       // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
325       // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
326       // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
327       // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
328       // LAMBDA: br label %[[OMP_JUMP_BACK]]
329 
330       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
331       // LAMBDA: ret
332 
333       // implementation of 'parallel for'
334       // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
335 
336       // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
337       // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
338       // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
339 
340       // initialize lb and ub to PrevLB and PrevUB
341       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
342       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
343       // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
344       // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
345       // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
346       // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
347       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
348       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
349       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
350       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
351       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
352 
353       // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
354       // In this case we use EUB
355       // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
356       // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
357       // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
358       // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
359       // LAMBDA: [[PF_EUB_TRUE]]:
360       // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
361       // LAMBDA: br label %[[PF_EUB_END:.+]]
362       // LAMBDA-DAG: [[PF_EUB_FALSE]]:
363       // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
364       // LAMBDA: br label %[[PF_EUB_END]]
365       // LAMBDA-DAG: [[PF_EUB_END]]:
366       // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
367       // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
368 
369       // initialize omp.iv
370       // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
371       // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
372       // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
373 
374       // check exit condition
375       // LAMBDA: [[OMP_PF_JUMP_BACK]]:
376       // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
377       // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
378       // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
379       // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
380 
381       // check that PrevLB and PrevUB are passed to the 'for'
382       // LAMBDA: [[PF_BODY]]:
383       // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
384       // LAMBDA: br label {{.+}}
385 
386       // check stride 1 for 'for' in 'distribute parallel for'
387       // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
388       // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
389       // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
390       // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
391 
392       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
393       // LAMBDA: ret
394       [&]() {
395 	a[i] = b[i] + c[i];
396       }();
397     }
398 
399     // dist_schedule: static chunk
400     #pragma omp target
401     #pragma omp teams
402     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]](
403     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
404 
405     #pragma omp distribute parallel for dist_schedule(static, ch)
406     for (int i = 0; i < n; ++i) {
407       a[i] = b[i] + c[i];
408       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
409       // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
410       // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
411       // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
412       // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
413 
414       // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
415       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
416       // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
417 
418       // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]:
419       // check EUB for distribute
420       // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
421       // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
422       // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
423       // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
424       // LAMBDA-DAG: [[EUB_TRUE]]:
425       // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
426       // LAMBDA: br label %[[EUB_END:.+]]
427       // LAMBDA-DAG: [[EUB_FALSE]]:
428       // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
429       // LAMBDA: br label %[[EUB_END]]
430       // LAMBDA-DAG: [[EUB_END]]:
431       // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
432       // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
433 
434       // initialize omp.iv
435       // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
436       // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
437 
438       // check exit condition
439       // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
440       // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
441       // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
442       // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
443 
444       // LAMBDA: [[DIST_OUTER_LOOP_BODY]]:
445       // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]]
446 
447       // LAMBDA: [[DIST_INNER_LOOP_HEADER]]:
448       // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
449       // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
450       // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
451       // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
452 
453       // check that PrevLB and PrevUB are passed to the 'for'
454       // LAMBDA: [[DIST_INNER_LOOP_BODY]]:
455       // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
456       // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
457       // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
458       // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
459       // check that distlb and distub are properly passed to fork_call
460       // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
461       // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
462       // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]]
463 
464       // check DistInc
465       // LAMBDA: [[DIST_INNER_LOOP_INC]]:
466       // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
467       // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
468       // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
469       // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
470       // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]]
471 
472       // LAMBDA: [[DIST_INNER_LOOP_END]]:
473       // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]]
474 
475       // LAMBDA: [[DIST_OUTER_LOOP_INC]]:
476       // check NextLB and NextUB
477       // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
478       // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
479       // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
480       // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
481       // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
482       // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
483       // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
484       // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
485       // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]]
486 
487       // outer loop exit
488       // LAMBDA: [[DIST_OUTER_LOOP_END]]:
489       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
490       // LAMBDA: ret
491 
492       // skip implementation of 'parallel for': using default scheduling and was tested above
493       [&]() {
494 	a[i] = b[i] + c[i];
495       }();
496     }
497 
498     // schedule: static no chunk
499     #pragma omp target
500     #pragma omp teams
501     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]](
502     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
503 
504     #pragma omp distribute parallel for schedule(static)
505     for (int i = 0; i < n; ++i) {
506       a[i] = b[i] + c[i];
507       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]](
508       // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
509       // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
510       // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
511       // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
512 
513       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
514       // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
515       // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
516       // LAMBDA: ret
517 
518       // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
519       // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
520 
521       // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
522       // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
523       // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
524 
525       // initialize lb and ub to PrevLB and PrevUB
526       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
527       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
528       // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
529       // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
530       // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
531       // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
532       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
533       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
534       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
535       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
536       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
537 
538       // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
539       // In this case we use EUB
540       // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
541       // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
542       // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
543       // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
544       // LAMBDA: [[PF_EUB_TRUE]]:
545       // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
546       // LAMBDA: br label %[[PF_EUB_END:.+]]
547       // LAMBDA-DAG: [[PF_EUB_FALSE]]:
548       // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
549       // LAMBDA: br label %[[PF_EUB_END]]
550       // LAMBDA-DAG: [[PF_EUB_END]]:
551       // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
552       // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
553 
554       // initialize omp.iv
555       // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
556       // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
557       // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
558 
559       // check exit condition
560       // LAMBDA: [[OMP_PF_JUMP_BACK]]:
561       // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
562       // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
563       // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
564       // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
565 
566       // check that PrevLB and PrevUB are passed to the 'for'
567       // LAMBDA: [[PF_BODY]]:
568       // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
569       // LAMBDA: br label {{.+}}
570 
571       // check stride 1 for 'for' in 'distribute parallel for'
572       // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
573       // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
574       // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
575       // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
576 
577       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
578       // LAMBDA: ret
579 
580       [&]() {
581 	a[i] = b[i] + c[i];
582       }();
583     }
584 
585     // schedule: static chunk
586     #pragma omp target
587     #pragma omp teams
588     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]](
589     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
590 
591     #pragma omp distribute parallel for schedule(static, ch)
592     for (int i = 0; i < n; ++i) {
593       a[i] = b[i] + c[i];
594       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]](
595       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
596       // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
597       // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
598       // LAMBDA: ret
599 
600       // 'parallel for' implementation using outer and inner loops and PrevEUB
601       // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
602       // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
603       // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
604       // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
605       // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
606 
607       // initialize lb and ub to PrevLB and PrevUB
608       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
609       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
610       // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
611       // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
612       // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
613       // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
614       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
615       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
616       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
617       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
618       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
619       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
620 
621       // check PrevEUB (using PrevUB instead of NumIt as upper bound)
622       // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
623       // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
624       // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
625       // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
626       // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
627       // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
628       // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
629       // LAMBDA: [[PF_EUB_TRUE]]:
630       // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
631       // LAMBDA: br label %[[PF_EUB_END:.+]]
632       // LAMBDA-DAG: [[PF_EUB_FALSE]]:
633       // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
634       // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
635       // LAMBDA: br label %[[PF_EUB_END]]
636       // LAMBDA-DAG: [[PF_EUB_END]]:
637       // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
638       // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
639       // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
640       // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}}  [[OMP_PF_UB]],
641       // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
642 
643       // initialize omp.iv (IV = LB)
644       // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
645       // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
646 
647       // outer loop: while (IV < UB) {
648       // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
649       // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
650       // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
651       // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
652 
653       // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
654       // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
655 
656       // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]:
657       // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
658       // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
659       // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
660       // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
661 
662       // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
663       // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
664       // skip body branch
665       // LAMBDA: br{{.+}}
666       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
667 
668       // IV = IV + 1 and inner loop latch
669       // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
670       // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
671       // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
672       // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
673       // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]]
674 
675       // check NextLB and NextUB
676       // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
677       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
678 
679       // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
680       // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
681       // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
682       // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
683       // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
684       // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
685       // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
686       // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
687       // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
688       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
689 
690       // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
691       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
692       // LAMBDA: ret
693       [&]() {
694 	a[i] = b[i] + c[i];
695       }();
696     }
697 
698     // schedule: dynamic no chunk
699     #pragma omp target
700     #pragma omp teams
701     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]](
702     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
703 
704     #pragma omp distribute parallel for schedule(dynamic)
705     for (int i = 0; i < n; ++i) {
706       a[i] = b[i] + c[i];
707       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]](
708       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
709       // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
710       // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
711       // LAMBDA: ret
712 
713       // 'parallel for' implementation using outer and inner loops and PrevEUB
714       // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
715       // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
716       // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
717       // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
718       // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
719 
720       // initialize lb and ub to PrevLB and PrevUB
721       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
722       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
723       // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
724       // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
725       // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
726       // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
727       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
728       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
729       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
730       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
731       // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
732       // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
733       // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
734       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
735 
736       // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
737       // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
738       // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
739       // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
740 
741       // initialize omp.iv (IV = LB)
742       // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
743       // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
744       // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
745       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
746 
747       // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
748       // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
749       // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
750       // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
751       // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
752 
753       // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
754       // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
755       // skip body branch
756       // LAMBDA: br{{.+}}
757       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
758 
759       // IV = IV + 1 and inner loop latch
760       // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
761       // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
762       // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
763       // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
764       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]]
765 
766       // check NextLB and NextUB
767       // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
768       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
769 
770       // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
771       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
772 
773       // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
774       // LAMBDA: ret
775       [&]() {
776 	a[i] = b[i] + c[i];
777       }();
778     }
779 
780     // schedule: dynamic chunk
781     #pragma omp target
782     #pragma omp teams
783     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]](
784     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
785 
786     #pragma omp distribute parallel for schedule(dynamic, ch)
787     for (int i = 0; i < n; ++i) {
788       a[i] = b[i] + c[i];
789       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]](
790       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
791       // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
792       // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
793       // LAMBDA: ret
794 
795       // 'parallel for' implementation using outer and inner loops and PrevEUB
796       // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
797       // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
798       // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
799       // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
800       // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
801 
802       // initialize lb and ub to PrevLB and PrevUB
803       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
804       // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
805       // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
806       // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
807       // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
808       // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
809       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
810       // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
811       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
812       // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
813       // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
814       // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
815       // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
816       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
817 
818       // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
819       // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
820       // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
821       // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
822 
823       // initialize omp.iv (IV = LB)
824       // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
825       // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
826       // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
827       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
828 
829       // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
830       // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
831       // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
832       // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
833       // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
834 
835       // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
836       // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
837       // skip body branch
838       // LAMBDA: br{{.+}}
839       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
840 
841       // IV = IV + 1 and inner loop latch
842       // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
843       // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
844       // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
845       // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
846       // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]]
847 
848       // check NextLB and NextUB
849       // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
850       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
851 
852       // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
853       // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
854 
855       // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
856       // LAMBDA: ret
857       [&]() {
858 	a[i] = b[i] + c[i];
859       }();
860     }
861   }();
862   return 0;
863 #else
864   // CHECK-LABEL: @main
865 
866   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
867   // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
868 
869   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
870   // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
871 
872   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
873   // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
874 
875   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
876   // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
877 
878   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
879   // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
880 
881   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
882   // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
883 
884   // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
885   // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
886 
887   // CHECK: call{{.+}} [[TMAIN:@.+]]()
888 
889   // no schedule clauses
890   #pragma omp target
891   #pragma omp teams
892   // CHECK: define internal void [[OFFLOADING_FUN_1]](
893   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
894 
895   #pragma omp distribute parallel for
896   for (int i = 0; i < n; ++i) {
897     a[i] = b[i] + c[i];
898     // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
899     // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
900     // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
901     // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
902     // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
903 
904     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
905 
906     // check EUB for distribute
907     // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
908     // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
909     // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
910     // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
911     // CHECK-DAG: [[EUB_TRUE]]:
912     // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
913     // CHECK: br label %[[EUB_END:.+]]
914     // CHECK-DAG: [[EUB_FALSE]]:
915     // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
916     // CHECK: br label %[[EUB_END]]
917     // CHECK-DAG: [[EUB_END]]:
918     // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
919     // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
920 
921     // initialize omp.iv
922     // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
923     // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
924     // CHECK: br label %[[OMP_JUMP_BACK:.+]]
925 
926     // check exit condition
927     // CHECK: [[OMP_JUMP_BACK]]:
928     // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
929     // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
930     // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
931     // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
932 
933     // check that PrevLB and PrevUB are passed to the 'for'
934     // CHECK: [[DIST_BODY]]:
935     // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
936     // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
937     // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
938     // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
939     // check that distlb and distub are properly passed to fork_call
940     // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
941     // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
942     // CHECK: br label %[[DIST_INC:.+]]
943 
944     // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
945     // CHECK: [[DIST_INC]]:
946     // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
947     // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
948     // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
949     // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
950     // CHECK: br label %[[OMP_JUMP_BACK]]
951 
952     // CHECK-DAG: call void @__kmpc_for_static_fini(
953     // CHECK: ret
954 
955     // implementation of 'parallel for'
956     // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
957 
958     // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
959     // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
960     // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
961 
962     // initialize lb and ub to PrevLB and PrevUB
963     // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
964     // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
965     // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
966     // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
967     // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
968     // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
969     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
970     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
971     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
972     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
973     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
974 
975     // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
976     // In this case we use EUB
977     // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
978     // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
979     // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
980     // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
981     // CHECK: [[PF_EUB_TRUE]]:
982     // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
983     // CHECK: br label %[[PF_EUB_END:.+]]
984     // CHECK-DAG: [[PF_EUB_FALSE]]:
985     // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
986     // CHECK: br label %[[PF_EUB_END]]
987     // CHECK-DAG: [[PF_EUB_END]]:
988     // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
989     // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
990 
991     // initialize omp.iv
992     // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
993     // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
994     // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
995 
996     // check exit condition
997     // CHECK: [[OMP_PF_JUMP_BACK]]:
998     // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
999     // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1000     // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1001     // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1002 
1003     // check that PrevLB and PrevUB are passed to the 'for'
1004     // CHECK: [[PF_BODY]]:
1005     // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1006     // CHECK: br label {{.+}}
1007 
1008     // check stride 1 for 'for' in 'distribute parallel for'
1009     // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1010     // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1011     // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1012     // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1013 
1014     // CHECK-DAG: call void @__kmpc_for_static_fini(
1015     // CHECK: ret
1016   }
1017 
1018   // dist_schedule: static no chunk
1019   #pragma omp target
1020   #pragma omp teams
1021   // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
1022   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
1023 
1024   #pragma omp distribute parallel for dist_schedule(static)
1025   for (int i = 0; i < n; ++i) {
1026     a[i] = b[i] + c[i];
1027     // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
1028     // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1029     // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1030     // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1031     // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1032 
1033     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1034 
1035     // check EUB for distribute
1036     // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1037     // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1038     // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1039     // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1040     // CHECK-DAG: [[EUB_TRUE]]:
1041     // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1042     // CHECK: br label %[[EUB_END:.+]]
1043     // CHECK-DAG: [[EUB_FALSE]]:
1044     // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1045     // CHECK: br label %[[EUB_END]]
1046     // CHECK-DAG: [[EUB_END]]:
1047     // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1048     // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1049 
1050     // initialize omp.iv
1051     // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1052     // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1053     // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1054 
1055     // check exit condition
1056     // CHECK: [[OMP_JUMP_BACK]]:
1057     // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1058     // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1059     // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1060     // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1061 
1062     // check that PrevLB and PrevUB are passed to the 'for'
1063     // CHECK: [[DIST_BODY]]:
1064     // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1065     // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1066     // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1067     // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1068     // check that distlb and distub are properly passed to fork_call
1069     // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1070     // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1071     // CHECK: br label %[[DIST_INC:.+]]
1072 
1073     // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1074     // CHECK: [[DIST_INC]]:
1075     // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1076     // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1077     // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1078     // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1079     // CHECK: br label %[[OMP_JUMP_BACK]]
1080 
1081     // CHECK-DAG: call void @__kmpc_for_static_fini(
1082     // CHECK: ret
1083 
1084     // implementation of 'parallel for'
1085     // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1086 
1087     // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1088     // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1089     // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1090 
1091     // initialize lb and ub to PrevLB and PrevUB
1092     // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1093     // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1094     // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1095     // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1096     // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1097     // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1098     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1099     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1100     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1101     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1102     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1103 
1104     // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1105     // In this case we use EUB
1106     // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1107     // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1108     // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1109     // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1110     // CHECK: [[PF_EUB_TRUE]]:
1111     // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1112     // CHECK: br label %[[PF_EUB_END:.+]]
1113     // CHECK-DAG: [[PF_EUB_FALSE]]:
1114     // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1115     // CHECK: br label %[[PF_EUB_END]]
1116     // CHECK-DAG: [[PF_EUB_END]]:
1117     // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1118     // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
1119 
1120     // initialize omp.iv
1121     // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1122     // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1123     // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1124 
1125     // check exit condition
1126     // CHECK: [[OMP_PF_JUMP_BACK]]:
1127     // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1128     // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1129     // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1130     // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1131 
1132     // check that PrevLB and PrevUB are passed to the 'for'
1133     // CHECK: [[PF_BODY]]:
1134     // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1135     // CHECK: br label {{.+}}
1136 
1137     // check stride 1 for 'for' in 'distribute parallel for'
1138     // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1139     // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1140     // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1141     // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1142 
1143     // CHECK-DAG: call void @__kmpc_for_static_fini(
1144     // CHECK: ret
1145   }
1146 
1147   // dist_schedule: static chunk
1148   #pragma omp target
1149   #pragma omp teams
1150   // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
1151   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
1152 
1153   #pragma omp distribute parallel for dist_schedule(static, ch)
1154   for (int i = 0; i < n; ++i) {
1155     a[i] = b[i] + c[i];
1156     // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
1157     // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1158     // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1159     // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1160     // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1161 
1162     // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
1163     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
1164     // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
1165 
1166     // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
1167     // check EUB for distribute
1168     // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1169     // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1170     // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1171     // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1172     // CHECK-DAG: [[EUB_TRUE]]:
1173     // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1174     // CHECK: br label %[[EUB_END:.+]]
1175     // CHECK-DAG: [[EUB_FALSE]]:
1176     // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1177     // CHECK: br label %[[EUB_END]]
1178     // CHECK-DAG: [[EUB_END]]:
1179     // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1180     // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1181 
1182     // initialize omp.iv
1183     // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1184     // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1185 
1186     // check exit condition
1187     // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1188     // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1189     // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1190     // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
1191 
1192     // CHECK: [[DIST_OUTER_LOOP_BODY]]:
1193     // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
1194 
1195     // CHECK: [[DIST_INNER_LOOP_HEADER]]:
1196     // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
1197     // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
1198     // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
1199     // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
1200 
1201     // check that PrevLB and PrevUB are passed to the 'for'
1202     // CHECK: [[DIST_INNER_LOOP_BODY]]:
1203     // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1204     // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1205     // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1206     // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1207     // check that distlb and distub are properly passed to fork_call
1208     // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1209     // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1210     // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
1211 
1212     // check DistInc
1213     // CHECK: [[DIST_INNER_LOOP_INC]]:
1214     // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1215     // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1216     // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
1217     // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1218     // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
1219 
1220     // CHECK: [[DIST_INNER_LOOP_END]]:
1221     // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
1222 
1223     // CHECK: [[DIST_OUTER_LOOP_INC]]:
1224     // check NextLB and NextUB
1225     // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1226     // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1227     // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
1228     // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
1229     // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1230     // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1231     // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
1232     // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
1233     // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
1234 
1235     // outer loop exit
1236     // CHECK: [[DIST_OUTER_LOOP_END]]:
1237     // CHECK-DAG: call void @__kmpc_for_static_fini(
1238     // CHECK: ret
1239 
1240     // skip implementation of 'parallel for': using default scheduling and was tested above
1241   }
1242 
1243   // schedule: static no chunk
1244   #pragma omp target
1245   #pragma omp teams
1246   // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
1247   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
1248 
1249   #pragma omp distribute parallel for schedule(static)
1250   for (int i = 0; i < n; ++i) {
1251     a[i] = b[i] + c[i];
1252     // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
1253     // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1254     // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1255     // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1256     // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1257 
1258     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1259     // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
1260     // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1261     // CHECK: ret
1262 
1263     // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
1264     // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1265 
1266     // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1267     // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1268     // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1269 
1270     // initialize lb and ub to PrevLB and PrevUB
1271     // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1272     // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1273     // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1274     // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1275     // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1276     // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1277     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1278     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1279     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1280     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1281     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1282 
1283     // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1284     // In this case we use EUB
1285     // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1286     // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1287     // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1288     // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1289     // CHECK: [[PF_EUB_TRUE]]:
1290     // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1291     // CHECK: br label %[[PF_EUB_END:.+]]
1292     // CHECK-DAG: [[PF_EUB_FALSE]]:
1293     // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1294     // CHECK: br label %[[PF_EUB_END]]
1295     // CHECK-DAG: [[PF_EUB_END]]:
1296     // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1297     // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
1298 
1299     // initialize omp.iv
1300     // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1301     // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1302     // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1303 
1304     // check exit condition
1305     // CHECK: [[OMP_PF_JUMP_BACK]]:
1306     // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1307     // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1308     // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1309     // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1310 
1311     // check that PrevLB and PrevUB are passed to the 'for'
1312     // CHECK: [[PF_BODY]]:
1313     // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1314     // CHECK: br label {{.+}}
1315 
1316     // check stride 1 for 'for' in 'distribute parallel for'
1317     // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1318     // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1319     // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1320     // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1321 
1322     // CHECK-DAG: call void @__kmpc_for_static_fini(
1323     // CHECK: ret
1324   }
1325 
1326   // schedule: static chunk
1327   #pragma omp target
1328   #pragma omp teams
1329   // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
1330   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
1331 
1332   #pragma omp distribute parallel for schedule(static, ch)
1333   for (int i = 0; i < n; ++i) {
1334     a[i] = b[i] + c[i];
1335     // CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
1336     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1337     // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
1338     // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1339     // CHECK: ret
1340 
1341     // 'parallel for' implementation using outer and inner loops and PrevEUB
1342     // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
1343     // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1344     // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1345     // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1346     // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1347 
1348     // initialize lb and ub to PrevLB and PrevUB
1349     // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1350     // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1351     // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1352     // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1353     // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1354     // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1355     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1356     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1357     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1358     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1359     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1360     // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1361 
1362     // check PrevEUB (using PrevUB instead of NumIt as upper bound)
1363     // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1364     // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1365     // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
1366     // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1367     // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
1368     // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
1369     // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1370     // CHECK: [[PF_EUB_TRUE]]:
1371     // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1372     // CHECK: br label %[[PF_EUB_END:.+]]
1373     // CHECK-DAG: [[PF_EUB_FALSE]]:
1374     // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1375     // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
1376     // CHECK: br label %[[PF_EUB_END]]
1377     // CHECK-DAG: [[PF_EUB_END]]:
1378     // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
1379     // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
1380     // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
1381     // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}}  [[OMP_PF_UB]],
1382     // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
1383 
1384     // initialize omp.iv (IV = LB)
1385     // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1386     // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1387 
1388     // outer loop: while (IV < UB) {
1389     // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1390     // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1391     // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1392     // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1393 
1394     // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1395     // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
1396 
1397     // CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
1398     // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1399     // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1400     // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1401     // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1402 
1403     // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1404     // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1405     // skip body branch
1406     // CHECK: br{{.+}}
1407     // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1408 
1409     // IV = IV + 1 and inner loop latch
1410     // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1411     // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1412     // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1413     // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1414     // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
1415 
1416     // check NextLB and NextUB
1417     // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1418     // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1419 
1420     // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1421     // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1422     // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
1423     // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
1424     // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
1425     // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1426     // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
1427     // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
1428     // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
1429     // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1430 
1431     // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1432     // CHECK-DAG: call void @__kmpc_for_static_fini(
1433     // CHECK: ret
1434   }
1435 
1436   // schedule: dynamic no chunk
1437   #pragma omp target
1438   #pragma omp teams
1439   // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
1440   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
1441 
1442   #pragma omp distribute parallel for schedule(dynamic)
1443   for (int i = 0; i < n; ++i) {
1444     a[i] = b[i] + c[i];
1445     // CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
1446     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1447     // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
1448     // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1449     // CHECK: ret
1450 
1451     // 'parallel for' implementation using outer and inner loops and PrevEUB
1452     // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1453     // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1454     // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1455     // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1456     // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1457 
1458     // initialize lb and ub to PrevLB and PrevUB
1459     // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1460     // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1461     // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1462     // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1463     // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1464     // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1465     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1466     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1467     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1468     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1469     // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1470     // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1471     // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
1472     // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1473 
1474     // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1475     // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
1476     // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
1477     // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1478 
1479     // initialize omp.iv (IV = LB)
1480     // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1481     // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1482     // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1483     // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
1484 
1485     // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
1486     // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1487     // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1488     // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1489     // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1490 
1491     // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1492     // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1493     // skip body branch
1494     // CHECK: br{{.+}}
1495     // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1496 
1497     // IV = IV + 1 and inner loop latch
1498     // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1499     // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1500     // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1501     // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1502     // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
1503 
1504     // check NextLB and NextUB
1505     // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1506     // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1507 
1508     // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1509     // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1510 
1511     // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1512     // CHECK: ret
1513   }
1514 
1515   // schedule: dynamic chunk
1516   #pragma omp target
1517   #pragma omp teams
1518   // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
1519   // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
1520 
1521   #pragma omp distribute parallel for schedule(dynamic, ch)
1522   for (int i = 0; i < n; ++i) {
1523     a[i] = b[i] + c[i];
1524     // CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
1525     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1526     // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
1527     // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1528     // CHECK: ret
1529 
1530     // 'parallel for' implementation using outer and inner loops and PrevEUB
1531     // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
1532     // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1533     // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1534     // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1535     // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1536 
1537     // initialize lb and ub to PrevLB and PrevUB
1538     // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1539     // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1540     // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1541     // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1542     // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1543     // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1544     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1545     // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1546     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1547     // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1548     // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1549     // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1550     // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
1551     // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1552 
1553     // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1554     // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
1555     // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
1556     // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1557 
1558     // initialize omp.iv (IV = LB)
1559     // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1560     // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1561     // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1562     // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
1563 
1564     // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
1565     // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1566     // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1567     // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1568     // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1569 
1570     // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1571     // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1572     // skip body branch
1573     // CHECK: br{{.+}}
1574     // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1575 
1576     // IV = IV + 1 and inner loop latch
1577     // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1578     // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1579     // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1580     // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1581     // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
1582 
1583     // check NextLB and NextUB
1584     // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1585     // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1586 
1587     // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1588     // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1589 
1590     // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1591     // CHECK: ret
1592   }
1593 
1594   return tmain<int>();
1595 #endif
1596 }
1597 
1598 // check code
1599 // CHECK: define{{.+}} [[TMAIN]]()
1600 
1601 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1602 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
1603 
1604 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1605 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
1606 
1607 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1608 // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
1609 
1610 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1611 // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
1612 
1613 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1614 // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
1615 
1616 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1617 // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
1618 
1619 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
1620 // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
1621 
1622 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]](
1623 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
1624 
1625 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
1626 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1627 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1628 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1629 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1630 
1631 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1632 
1633 // check EUB for distribute
1634 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1635 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1636 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1637 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1638 // CHECK-DAG: [[EUB_TRUE]]:
1639 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1640 // CHECK: br label %[[EUB_END:.+]]
1641 // CHECK-DAG: [[EUB_FALSE]]:
1642 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1643 // CHECK: br label %[[EUB_END]]
1644 // CHECK-DAG: [[EUB_END]]:
1645 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1646 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1647 
1648 // initialize omp.iv
1649 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1650 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1651 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1652 
1653 // check exit condition
1654 // CHECK: [[OMP_JUMP_BACK]]:
1655 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1656 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1657 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1658 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1659 
1660 // check that PrevLB and PrevUB are passed to the 'for'
1661 // CHECK: [[DIST_BODY]]:
1662 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1663 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1664 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1665 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1666 // check that distlb and distub are properly passed to fork_call
1667 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1668 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1669 // CHECK: br label %[[DIST_INC:.+]]
1670 
1671 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1672 // CHECK: [[DIST_INC]]:
1673 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1674 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1675 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1676 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1677 // CHECK: br label %[[OMP_JUMP_BACK]]
1678 
1679 // CHECK-DAG: call void @__kmpc_for_static_fini(
1680 // CHECK: ret
1681 
1682 // implementation of 'parallel for'
1683 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1684 
1685 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1686 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1687 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1688 
1689 // initialize lb and ub to PrevLB and PrevUB
1690 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1691 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1692 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1693 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1694 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1695 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1696 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1697 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1698 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1699 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1700 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1701 
1702 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1703 // In this case we use EUB
1704 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1705 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1706 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1707 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1708 // CHECK: [[PF_EUB_TRUE]]:
1709 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1710 // CHECK: br label %[[PF_EUB_END:.+]]
1711 // CHECK-DAG: [[PF_EUB_FALSE]]:
1712 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1713 // CHECK: br label %[[PF_EUB_END]]
1714 // CHECK-DAG: [[PF_EUB_END]]:
1715 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1716 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
1717 
1718 // initialize omp.iv
1719 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1720 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1721 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1722 
1723 // check exit condition
1724 // CHECK: [[OMP_PF_JUMP_BACK]]:
1725 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1726 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1727 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1728 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1729 
1730 // check that PrevLB and PrevUB are passed to the 'for'
1731 // CHECK: [[PF_BODY]]:
1732 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1733 // CHECK: br label {{.+}}
1734 
1735 // check stride 1 for 'for' in 'distribute parallel for'
1736 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1737 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1738 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1739 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1740 
1741 // CHECK-DAG: call void @__kmpc_for_static_fini(
1742 // CHECK: ret
1743 
1744 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
1745 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
1746 
1747 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
1748 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1749 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1750 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1751 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1752 
1753 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1754 
1755 // check EUB for distribute
1756 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1757 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1758 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1759 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1760 // CHECK-DAG: [[EUB_TRUE]]:
1761 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1762 // CHECK: br label %[[EUB_END:.+]]
1763 // CHECK-DAG: [[EUB_FALSE]]:
1764 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1765 // CHECK: br label %[[EUB_END]]
1766 // CHECK-DAG: [[EUB_END]]:
1767 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1768 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1769 
1770 // initialize omp.iv
1771 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1772 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1773 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1774 
1775 // check exit condition
1776 // CHECK: [[OMP_JUMP_BACK]]:
1777 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1778 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1779 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1780 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1781 
1782 // check that PrevLB and PrevUB are passed to the 'for'
1783 // CHECK: [[DIST_BODY]]:
1784 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1785 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1786 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1787 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1788 // check that distlb and distub are properly passed to fork_call
1789 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1790 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1791 // CHECK: br label %[[DIST_INC:.+]]
1792 
1793 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1794 // CHECK: [[DIST_INC]]:
1795 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1796 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1797 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1798 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1799 // CHECK: br label %[[OMP_JUMP_BACK]]
1800 
1801 // CHECK-DAG: call void @__kmpc_for_static_fini(
1802 // CHECK: ret
1803 
1804 // implementation of 'parallel for'
1805 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1806 
1807 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1808 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1809 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1810 
1811 // initialize lb and ub to PrevLB and PrevUB
1812 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1813 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1814 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1815 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1816 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1817 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1818 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1819 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1820 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1821 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1822 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1823 
1824 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1825 // In this case we use EUB
1826 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1827 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1828 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1829 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1830 // CHECK: [[PF_EUB_TRUE]]:
1831 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1832 // CHECK: br label %[[PF_EUB_END:.+]]
1833 // CHECK-DAG: [[PF_EUB_FALSE]]:
1834 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1835 // CHECK: br label %[[PF_EUB_END]]
1836 // CHECK-DAG: [[PF_EUB_END]]:
1837 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1838 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
1839 
1840 // initialize omp.iv
1841 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1842 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1843 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1844 
1845 // check exit condition
1846 // CHECK: [[OMP_PF_JUMP_BACK]]:
1847 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1848 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1849 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1850 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1851 
1852 // check that PrevLB and PrevUB are passed to the 'for'
1853 // CHECK: [[PF_BODY]]:
1854 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1855 // CHECK: br label {{.+}}
1856 
1857 // check stride 1 for 'for' in 'distribute parallel for'
1858 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1859 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1860 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1861 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1862 
1863 // CHECK-DAG: call void @__kmpc_for_static_fini(
1864 // CHECK: ret
1865 
1866 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
1867 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
1868 
1869 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
1870 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1871 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1872 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1873 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1874 
1875 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
1876 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
1877 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
1878 
1879 // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
1880 // check EUB for distribute
1881 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1882 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1883 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1884 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1885 // CHECK-DAG: [[EUB_TRUE]]:
1886 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1887 // CHECK: br label %[[EUB_END:.+]]
1888 // CHECK-DAG: [[EUB_FALSE]]:
1889 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1890 // CHECK: br label %[[EUB_END]]
1891 // CHECK-DAG: [[EUB_END]]:
1892 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1893 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1894 
1895 // initialize omp.iv
1896 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1897 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1898 
1899 // check exit condition
1900 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1901 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1902 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1903 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
1904 
1905 // CHECK: [[DIST_OUTER_LOOP_BODY]]:
1906 // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
1907 
1908 // CHECK: [[DIST_INNER_LOOP_HEADER]]:
1909 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
1910 // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
1911 // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
1912 // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
1913 
1914 // check that PrevLB and PrevUB are passed to the 'for'
1915 // CHECK: [[DIST_INNER_LOOP_BODY]]:
1916 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1917 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1918 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1919 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1920 // check that distlb and distub are properly passed to fork_call
1921 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1922 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1923 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
1924 
1925 // check DistInc
1926 // CHECK: [[DIST_INNER_LOOP_INC]]:
1927 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1928 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1929 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
1930 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1931 // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
1932 
1933 // CHECK: [[DIST_INNER_LOOP_END]]:
1934 // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
1935 
1936 // CHECK: [[DIST_OUTER_LOOP_INC]]:
1937 // check NextLB and NextUB
1938 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1939 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1940 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
1941 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
1942 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1943 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1944 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
1945 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
1946 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
1947 
1948 // outer loop exit
1949 // CHECK: [[DIST_OUTER_LOOP_END]]:
1950 // CHECK-DAG: call void @__kmpc_for_static_fini(
1951 // CHECK: ret
1952 
1953 // skip implementation of 'parallel for': using default scheduling and was tested above
1954 
1955 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
1956 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
1957 
1958 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
1959 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1960 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1961 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1962 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1963 
1964 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1965 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
1966 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1967 // CHECK: ret
1968 
1969 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
1970 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1971 
1972 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1973 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1974 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1975 
1976 // initialize lb and ub to PrevLB and PrevUB
1977 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1978 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1979 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1980 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1981 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1982 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1983 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1984 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1985 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1986 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1987 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1988 
1989 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1990 // In this case we use EUB
1991 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1992 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1993 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1994 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1995 // CHECK: [[PF_EUB_TRUE]]:
1996 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1997 // CHECK: br label %[[PF_EUB_END:.+]]
1998 // CHECK-DAG: [[PF_EUB_FALSE]]:
1999 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
2000 // CHECK: br label %[[PF_EUB_END]]
2001 // CHECK-DAG: [[PF_EUB_END]]:
2002 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
2003 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}}  [[OMP_PF_UB]],
2004 
2005 // initialize omp.iv
2006 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2007 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2008 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
2009 
2010 // check exit condition
2011 // CHECK: [[OMP_PF_JUMP_BACK]]:
2012 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
2013 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
2014 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
2015 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
2016 
2017 // check that PrevLB and PrevUB are passed to the 'for'
2018 // CHECK: [[PF_BODY]]:
2019 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2020 // CHECK: br label {{.+}}
2021 
2022 // check stride 1 for 'for' in 'distribute parallel for'
2023 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
2024 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
2025 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
2026 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
2027 
2028 // CHECK-DAG: call void @__kmpc_for_static_fini(
2029 // CHECK: ret
2030 
2031 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
2032 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
2033 
2034 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
2035 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2036 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
2037 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2038 // CHECK: ret
2039 
2040 // 'parallel for' implementation using outer and inner loops and PrevEUB
2041 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
2042 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2043 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2044 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2045 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2046 
2047 // initialize lb and ub to PrevLB and PrevUB
2048 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2049 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2050 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2051 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2052 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2053 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2054 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2055 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2056 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2057 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2058 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
2059 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2060 
2061 // check PrevEUB (using PrevUB instead of NumIt as upper bound)
2062 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2063 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
2064 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
2065 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2066 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
2067 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
2068 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
2069 // CHECK: [[PF_EUB_TRUE]]:
2070 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2071 // CHECK: br label %[[PF_EUB_END:.+]]
2072 // CHECK-DAG: [[PF_EUB_FALSE]]:
2073 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
2074 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
2075 // CHECK: br label %[[PF_EUB_END]]
2076 // CHECK-DAG: [[PF_EUB_END]]:
2077 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
2078 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
2079 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
2080 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}}  [[OMP_PF_UB]],
2081 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
2082 
2083 // initialize omp.iv (IV = LB)
2084 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2085 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2086 
2087 // outer loop: while (IV < UB) {
2088 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2089 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2090 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
2091 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2092 
2093 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2094 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
2095 
2096 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
2097 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2098 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2099 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2100 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2101 
2102 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2103 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2104 // skip body branch
2105 // CHECK: br{{.+}}
2106 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2107 
2108 // IV = IV + 1 and inner loop latch
2109 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2110 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2111 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2112 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2113 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
2114 
2115 // check NextLB and NextUB
2116 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2117 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2118 
2119 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2120 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2121 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
2122 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
2123 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
2124 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2125 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
2126 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
2127 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
2128 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2129 
2130 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2131 // CHECK-DAG: call void @__kmpc_for_static_fini(
2132 // CHECK: ret
2133 
2134 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
2135 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
2136 
2137 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
2138 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2139 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
2140 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2141 // CHECK: ret
2142 
2143 // 'parallel for' implementation using outer and inner loops and PrevEUB
2144 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
2145 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2146 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2147 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2148 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2149 
2150 // initialize lb and ub to PrevLB and PrevUB
2151 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2152 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2153 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2154 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2155 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2156 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2157 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2158 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2159 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2160 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2161 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2162 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2163 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
2164 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2165 
2166 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2167 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
2168 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
2169 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2170 
2171 // initialize omp.iv (IV = LB)
2172 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2173 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2174 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2175 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
2176 
2177 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
2178 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2179 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2180 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2181 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2182 
2183 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2184 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2185 // skip body branch
2186 // CHECK: br{{.+}}
2187 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2188 
2189 // IV = IV + 1 and inner loop latch
2190 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2191 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2192 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2193 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2194 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
2195 
2196 // check NextLB and NextUB
2197 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2198 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2199 
2200 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2201 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2202 
2203 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2204 // CHECK: ret
2205 
2206 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
2207 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
2208 
2209 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
2210 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2211 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
2212 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2213 // CHECK: ret
2214 
2215 // 'parallel for' implementation using outer and inner loops and PrevEUB
2216 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
2217 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2218 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2219 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2220 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2221 
2222 // initialize lb and ub to PrevLB and PrevUB
2223 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2224 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2225 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2226 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2227 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2228 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2229 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2230 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2231 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2232 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2233 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2234 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2235 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
2236 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2237 
2238 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2239 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
2240 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
2241 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2242 
2243 // initialize omp.iv (IV = LB)
2244 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2245 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2246 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2247 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
2248 
2249 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
2250 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2251 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2252 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2253 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2254 
2255 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2256 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2257 // skip body branch
2258 // CHECK: br{{.+}}
2259 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2260 
2261 // IV = IV + 1 and inner loop latch
2262 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2263 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2264 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2265 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2266 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
2267 
2268 // check NextLB and NextUB
2269 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2270 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2271 
2272 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2273 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2274 
2275 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2276 // CHECK: ret
2277 #endif
2278