1 // RxUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
2 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
3 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
4 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
5 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
6 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
7 
8 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
9 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
10 // RUN: %clang_cc1  -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
11 // RUN: %clang_cc1  -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
12 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
13 // RUN: %clang_cc1  -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
14 // expected-no-diagnostics
15 #ifndef HEADER
16 #define HEADER
17 
18 template <class T>
19 struct S {
20   T f;
21   S(T a) : f(a) {}
22   S() : f() {}
23   operator T() { return T(); }
24   ~S() {}
25 };
26 
27 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
28 // CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
29 template <typename T>
30 T tmain() {
31   S<T> test;
32   T t_var = T();
33   T vec[] = {1, 2};
34   S<T> s_arr[] = {1, 2};
35   S<T> &var = test;
36   #pragma omp target
37   #pragma omp teams
38   #pragma omp distribute parallel for simd firstprivate(t_var, vec, s_arr, s_arr, var, var)
39   for (int i = 0; i < 2; ++i) {
40     vec[i] = t_var;
41     s_arr[i] = var;
42   }
43   return T();
44 }
45 
46 int main() {
47   static int svar;
48   volatile double g;
49   volatile double &g1 = g;
50 
51   #ifdef LAMBDA
52   // LAMBDA-LABEL: @main
53   // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
54   [&]() {
55     static float sfvar;
56     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
57     // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
58     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
59 
60     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
61     // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
62     #pragma omp target
63     #pragma omp teams
64     #pragma omp distribute parallel for simd firstprivate(g, g1, svar, sfvar)
65     for (int i = 0; i < 2; ++i) {
66       // LAMBDA-64: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{[0-9]+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
67       // LAMBDA-32: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double* {{.+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
68 
69       // addr alloca's
70       // LAMBDA-64: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
71       // LAMBDA-32: [[G_ADDR:%.+]] = alloca double*,
72       // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
73       // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
74       // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
75       // LAMBDA: [[G1_REF:%.+]] = alloca double*,
76       // LAMBDA: [[TMP:%.+]] = alloca double*,
77 
78       // private alloca's
79       // LAMBDA: [[G_PRIV:%.+]] = alloca double,
80       // LAMBDA: [[G1_PRIV:%.+]] = alloca double,
81       // LAMBDA: [[TMP_PRIV:%.+]] = alloca double*,
82       // LAMBDA: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
83       // LAMBDA: [[SFVAR_PRIV:%.+]] = alloca float,
84 
85       // transfer input parameters into addr alloca's
86       // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
87       // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
88       // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
89       // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
90 
91       // init private alloca's with addr alloca's
92       // g
93       // LAMBDA-64-DAG: [[G_CONV:%.+]] = bitcast {{.+}}* [[G_ADDR]] to
94       // LAMBDA-32-DAG: [[G_CONV:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]]
95       // LAMBDA-DAG: [[G_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_CONV]],
96       // LAMBDA-DAG: store {{.+}} [[G_ADDR_VAL]], {{.+}}* [[G_PRIV]],
97 
98       // g1
99       // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}}* [[G1_ADDR]] to
100       // LAMBDA-DAG: store {{.+}}* [[G1_CONV]], {{.+}}** [[G1_REF]],
101       // LAMBDA-DAG: [[G1_REF_VAL:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]],
102       // LAMBDA-DAG: store {{.+}}* [[G1_REF_VAL]], {{.+}}** [[TMP]],
103       // LAMBDA-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[TMP]],
104       // LAMBDA-DAG: [[TMP_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_REF]],
105       // LAMBDA-DAG: store {{.+}} [[TMP_VAL]], {{.+}}* [[G1_PRIV]]
106       // LAMBDA-DAG: store {{.+}}* [[G1_PRIV]], {{.+}}** [[TMP_PRIV]],
107 
108       // svar
109       // LAMBDA-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to
110       // LAMBDA-64-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV]],
111       // LAMBDA-32-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_ADDR]],
112       // LAMBDA-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_PRIV]],
113 
114       // sfvar
115       // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_ADDR]] to
116       // LAMBDA-DAG: [[SFVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_CONV]],
117       // LAMBDA-DAG: store {{.+}} [[SFVAR_VAL]], {{.+}}* [[SFVAR_PRIV]],
118 
119       // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
120       // pass firstprivate parameters to parallel outlined function
121       // g
122       // LAMBDA-64-DAG: [[G_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_PRIV]],
123       // LAMBDA-64: [[G_CAST_CONV:%.+]] = bitcast {{.+}}* [[G_CAST:%.+]] to
124       // LAMBDA-64-DAG: store {{.+}} [[G_PRIV_VAL]], {{.+}}* [[G_CAST_CONV]],
125       // LAMBDA-64-DAG: [[G_PAR:%.+]] = load {{.+}}, {{.+}}* [[G_CAST]],
126 
127       // g1
128       // LAMBDA-DAG: [[TMP_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_PRIV]],
129       // LAMBDA-DAG: [[G1_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_PRIV_VAL]],
130       // LAMBDA: [[G1_CAST_CONV:%.+]] = bitcast {{.+}}* [[G1_CAST:%.+]] to
131       // LAMBDA-DAG: store {{.+}} [[G1_PRIV_VAL]], {{.+}}* [[G1_CAST_CONV]],
132       // LAMBDA-DAG: [[G1_PAR:%.+]] = load {{.+}}, {{.+}}* [[G1_CAST]],
133 
134       // svar
135       // LAMBDA: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_PRIV]],
136       // LAMBDA-64-DAG: [[SVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SVAR_CAST:%.+]] to
137       // LAMBDA-64-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_CAST_CONV]],
138       // LAMBDA-32-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_CAST:%.+]],
139       // LAMBDA-DAG: [[SVAR_PAR:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CAST]],
140 
141       // sfvar
142       // LAMBDA: [[SFVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_PRIV]],
143       // LAMBDA-DAG: [[SFVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_CAST:%.+]] to
144       // LAMBDA-DAG: store {{.+}} [[SFVAR_VAL]], {{.+}}* [[SFVAR_CAST_CONV]],
145       // LAMBDA-DAG: [[SFVAR_PAR:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_CAST]],
146 
147       // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PAR]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PAR]], {{.+}} [[SFVAR_PAR]])
148       // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PRIV]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PAR]], {{.+}} [[SFVAR_PAR]])
149       // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
150       // LAMBDA: ret void
151 
152 
153       // LAMBDA-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, i{{[0-9]+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
154       // LAMBDA-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, double* {{.+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
155       // skip initial params
156       // LAMBDA: {{.+}} = alloca{{.+}},
157       // LAMBDA: {{.+}} = alloca{{.+}},
158       // LAMBDA: {{.+}} = alloca{{.+}},
159       // LAMBDA: {{.+}} = alloca{{.+}},
160 
161       // addr alloca's
162       // LAMBDA-64: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
163       // LAMBDA-32: [[G_ADDR:%.+]] = alloca double*,
164       // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
165       // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
166       // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
167       // LAMBDA: [[G1_REF:%.+]] = alloca double*,
168 
169       // private alloca's (only for 32-bit)
170       // LAMBDA-32: [[G_PRIV:%.+]] = alloca double,
171 
172       // transfer input parameters into addr alloca's
173       // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
174       // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
175       // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
176       // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
177 
178       // prepare parameters for lambda
179       // g
180       // LAMBDA-64-DAG: [[G_CONV:%.+]] = bitcast {{.+}}* [[G_ADDR]] to
181       // LAMBDA-32-DAG: [[G_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]]
182       // LAMBDA-32-DAG: [[G_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_ADDR_REF]],
183       // LAMBDA-32-DAG: store {{.+}} [[G_ADDR_VAL]], {{.+}}* [[G_PRIV]],
184 
185       // g1
186       // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}}* [[G1_ADDR]] to
187       // LAMBDA-DAG: store {{.+}}* [[G1_CONV]], {{.+}}* [[G1_REF]],
188 
189       // svar
190       // LAMBDA-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to
191 
192       // sfvar
193       // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_ADDR]] to
194 
195       // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
196       g = 1;
197       g1 = 1;
198       svar = 3;
199       sfvar = 4.0;
200       // LAMBDA-64: store double 1.0{{.+}}, double* [[G_CONV]],
201       // LAMBDA-32: store double 1.0{{.+}}, double* [[G_PRIV]],
202       // LAMBDA: [[G1_REF_REF:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]],
203       // LAMBDA: store {{.+}} 1.0{{.+}}, {{.+}}* [[G1_REF_REF]],
204       // LAMBDA-64: store {{.+}} 3, {{.+}}* [[SVAR_CONV]],
205       // LAMBDA-32: store {{.+}} 3, {{.+}}* [[SVAR_ADDR]],
206       // LAMBDA: store {{.+}} 4.0{{.+}}, {{.+}}* [[SFVAR_CONV]],
207 
208       // pass params to inner lambda
209       // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
210       // LAMBDA-64: store double* [[G_CONV]], double** [[G_PRIVATE_ADDR_REF]],
211       // LAMBDA-32: store double* [[G_PRIV]], double** [[G_PRIVATE_ADDR_REF]],
212       // LAMBDA: [[G1_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
213       // LAMBDA: [[G1_REF_REF:%.+]] = load double*, double** [[G1_REF]],
214       // LAMBDA: store double* [[G1_REF_REF]], double** [[G1_PRIVATE_ADDR_REF]],
215       // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
216       // LAMBDA-64: store i{{[0-9]+}}* [[SVAR_CONV]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
217       // LAMBDA-32: store i{{[0-9]+}}* [[SVAR_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
218       // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
219       // LAMBDA: store float* [[SFVAR_CONV]], float** [[SFVAR_PRIVATE_ADDR_REF]]
220       // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
221       // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
222       // LAMBDA: ret void
223       [&]() {
224 	// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
225 	// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
226 	g = 2;
227 	g1 = 2;
228 	svar = 4;
229 	sfvar = 8.0;
230 	// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
231 	// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
232 	// LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
233 	// LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
234 
235 	// LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
236 	// LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
237 	// LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
238 	// LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
239 	// LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
240 	// LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
241 	// LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
242 	// LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
243 	// LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
244       }();
245     }
246   }();
247   return 0;
248   #else
249   S<float> test;
250   int t_var = 0;
251   int vec[] = {1, 2};
252   S<float> s_arr[] = {1, 2};
253   S<float> &var = test;
254 
255   #pragma omp target
256   #pragma omp teams
257   #pragma omp distribute parallel for simd firstprivate(t_var, vec, s_arr, s_arr, var, var, svar)
258   for (int i = 0; i < 2; ++i) {
259     vec[i] = t_var;
260     s_arr[i] = var;
261   }
262   return tmain<int>();
263   #endif
264 }
265 
266 // CHECK-LABEL: define{{.*}} i{{[0-9]+}} @main()
267 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
268 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
269 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
270 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
271 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
272 
273 // CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]])
274 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[OMP_OUTLINED_0:@.+]] to void
275 // CHECK: ret
276 
277 // CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]])
278 
279 // addr alloca's
280 // CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
281 // CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
282 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
283 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
284 // CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
285 // CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*,
286 
287 // skip loop alloca's
288 // CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
289 // CHECK: [[OMP_LB:.omp.comb.lb+]] = alloca i{{[0-9]+}},
290 // CHECK: [[OMP_UB:.omp.comb.ub+]] = alloca i{{[0-9]+}},
291 // CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}},
292 // CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}},
293 
294 // private alloca's
295 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
296 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
297 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
298 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
299 // CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
300 // CHECK: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
301 
302 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
303 
304 // init addr alloca's with input values
305 // CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]],
306 // CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
307 // CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
308 // CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
309 // CHECK-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
310 
311 // init private alloca's with addr alloca's
312 // t-var
313 // CHECK-64-DAG: [[T_VAR_CONV:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
314 // CHECK-64-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CONV]],
315 // CHECK-32-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR]],
316 // CHECK-DAG: store {{.+}} [[T_VAR_ADDR_VAL]], {{.+}} [[T_VAR_PRIV]],
317 
318 // vec
319 // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
320 // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
321 // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
322 // CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]],
323 
324 // s_arr
325 // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
326 // CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]],
327 // CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to
328 // CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]],
329 // CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]]
330 // CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]]
331 // CHECK-DAG: [[CPY_BODY]]:
332 // CHECK-DAG: call void @llvm.memcpy{{.+}}(
333 // CHECK-DAG: [[CPY_DONE]]:
334 
335 // var
336 // CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
337 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
338 // CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to
339 // CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]],
340 // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
341 
342 // svar
343 // CHECK-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to
344 // CHECK-64-DAG: [[SVAR_CONV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV]],
345 // CHECK-32-DAG: [[SVAR_CONV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_ADDR]],
346 // CHECK-DAG: store {{.+}} [[SVAR_CONV_VAL]], {{.+}}* [[SVAR_PRIV]],
347 
348 // CHECK: call void @__kmpc_for_static_init_4(
349 // pass private alloca's to fork
350 // CHECK-DAG: [[T_VAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_PRIV]],
351 // not dag to distinguish with S_VAR_CAST
352 // CHECK-64: [[T_VAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[T_VAR_CAST:%.+]] to
353 // CHECK-64-DAG: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST_CONV]],
354 // CHECK-32: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST:%.+]],
355 // CHECK-DAG: [[T_VAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CAST]],
356 // CHECK-DAG: [[TMP_PRIV_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
357 // CHECK-DAG: [[SVAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_PRIV]],
358 // CHECK-64-DAG: [[SVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SVAR_CAST:%.+]] to
359 // CHECK-64-DAG: store {{.+}} [[SVAR_PRIV_VAL]], {{.+}}* [[SVAR_CAST_CONV]],
360 // CHECK-32-DAG: store {{.+}} [[SVAR_PRIV_VAL]], {{.+}}* [[SVAR_CAST:%.+]],
361 // CHECK-DAG: [[SVAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CAST]],
362 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} [[T_VAR_CAST_VAL]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], [[S_FLOAT_TY]]* [[TMP_PRIV_VAL]], i{{[0-9]+}} [[SVAR_CAST_VAL]])
363 // CHECK: call void @__kmpc_for_static_fini(
364 
365 // call destructors: var..
366 // CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
367 
368 // ..and s_arr
369 // CHECK: {{.+}}:
370 // CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]*
371 // CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
372 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
373 
374 // CHECK: ret void
375 
376 // By OpenMP specifications, 'firstprivate' applies to both distribute and parallel for.
377 // However, the support for 'firstprivate' of 'parallel' is only used when 'parallel'
378 // is found alone. Therefore we only have one 'firstprivate' support for 'parallel for'
379 // in combination
380 // CHECK: define internal void [[OMP_PARFOR_OUTLINED_0]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]])
381 
382 // addr alloca's
383 // CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
384 // CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
385 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
386 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
387 // CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
388 
389 // skip loop alloca's
390 // CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
391 // CHECK: [[OMP_LB:.omp.lb+]] = alloca i{{[0-9]+}},
392 // CHECK: [[OMP_UB:.omp.ub+]] = alloca i{{[0-9]+}},
393 // CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}},
394 // CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}},
395 
396 // private alloca's
397 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
398 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
399 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
400 // CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
401 
402 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
403 
404 // init addr alloca's with input values
405 // CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
406 // CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]],
407 // CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
408 // CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
409 // CHECK-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
410 
411 // init private alloca's with addr alloca's
412 // vec
413 // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
414 // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
415 // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
416 // CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]],
417 
418 // s_arr
419 // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
420 // CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]],
421 // CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to
422 // CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]],
423 // CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]]
424 // CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]]
425 // CHECK-DAG: [[CPY_BODY]]:
426 // CHECK-DAG: call void @llvm.memcpy{{.+}}(
427 // CHECK-DAG: [[CPY_DONE]]:
428 
429 // var
430 // CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]],
431 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
432 // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to
433 // CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]],
434 // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
435 
436 // CHECK: call void @__kmpc_for_static_init_4(
437 // CHECK: call void @__kmpc_for_static_fini(
438 
439 // call destructors: var..
440 // CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
441 
442 // ..and s_arr
443 // CHECK: {{.+}}:
444 // CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]*
445 // CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
446 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
447 
448 // CHECK: ret void
449 
450 // template tmain with S_INT_TY
451 // CHECK-LABEL: define{{.*}} i{{[0-9]+}} @{{.+}}tmain{{.+}}()
452 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
453 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
454 // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
455 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
456 // CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
457 
458 // CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]])
459 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_0:@.+]] to void
460 // CHECK: ret
461 
462 // CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]])
463 
464 // addr alloca's
465 // CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
466 // CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
467 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
468 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
469 // CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
470 
471 // skip loop alloca's
472 // CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
473 // CHECK: [[OMP_LB:.omp.comb.lb+]] = alloca i{{[0-9]+}},
474 // CHECK: [[OMP_UB:.omp.comb.ub+]] = alloca i{{[0-9]+}},
475 // CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}},
476 // CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}},
477 
478 // private alloca's
479 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
480 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
481 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
482 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
483 // CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*,
484 
485 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
486 
487 // init addr alloca's with input values
488 // CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]],
489 // CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
490 // CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
491 // CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
492 
493 // init private alloca's with addr alloca's
494 // t-var
495 // CHECK-64-DAG: [[T_VAR_CONV:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
496 // CHECK-64-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CONV]],
497 // CHECK-32-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR]],
498 // CHECK-DAG: store {{.+}} [[T_VAR_ADDR_VAL]], {{.+}} [[T_VAR_PRIV]],
499 
500 // vec
501 // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
502 // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
503 // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
504 // CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]],
505 
506 // s_arr
507 // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
508 // CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]],
509 // CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to
510 // CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]],
511 // CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]]
512 // CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]]
513 // CHECK-DAG: [[CPY_BODY]]:
514 // CHECK-DAG: call void @llvm.memcpy{{.+}}(
515 // CHECK-DAG: [[CPY_DONE]]:
516 
517 // var
518 // CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
519 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
520 // CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to
521 // CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]],
522 // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
523 
524 // CHECK: call void @__kmpc_for_static_init_4(
525 // pass private alloca's to fork
526 // CHECK-DAG: [[T_VAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_PRIV]],
527 // not dag to distinguish with S_VAR_CAST
528 // CHECK-64: [[T_VAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[T_VAR_CAST:%.+]] to
529 // CHECK-64-DAG: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST_CONV]],
530 // CHECK-32: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST:%.+]],
531 // CHECK-DAG: [[T_VAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CAST]],
532 // CHECK-DAG: [[TMP_PRIV_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
533 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} [[T_VAR_CAST_VAL]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], [[S_INT_TY]]* [[TMP_PRIV_VAL]])
534 // CHECK: call void @__kmpc_for_static_fini(
535 
536 // call destructors: var..
537 // CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
538 
539 // ..and s_arr
540 // CHECK: {{.+}}:
541 // CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]*
542 // CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
543 // CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
544 
545 // CHECK: ret void
546 
547 // By OpenMP specifications, 'firstprivate' applies to both distribute and parallel for.
548 // However, the support for 'firstprivate' of 'parallel' is only used when 'parallel'
549 // is found alone. Therefore we only have one 'firstprivate' support for 'parallel for'
550 // in combination
551 // CHECK: define internal void [[OMP_PARFOR_OUTLINED_0]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]])
552 
553 // addr alloca's
554 // CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
555 // CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
556 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
557 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
558 
559 // skip loop alloca's
560 // CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
561 // CHECK: [[OMP_LB:.omp.lb+]] = alloca i{{[0-9]+}},
562 // CHECK: [[OMP_UB:.omp.ub+]] = alloca i{{[0-9]+}},
563 // CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}},
564 // CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}},
565 
566 // private alloca's
567 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
568 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
569 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
570 // CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*,
571 
572 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
573 
574 // init addr alloca's with input values
575 // CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
576 // CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]],
577 // CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
578 // CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
579 
580 // init private alloca's with addr alloca's
581 // vec
582 // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
583 // CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
584 // CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
585 // CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]],
586 
587 // s_arr
588 // CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
589 // CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]],
590 // CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to
591 // CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]],
592 // CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]]
593 // CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]]
594 // CHECK-DAG: [[CPY_BODY]]:
595 // CHECK-DAG: call void @llvm.memcpy{{.+}}(
596 // CHECK-DAG: [[CPY_DONE]]:
597 
598 // var
599 // CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]],
600 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
601 // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to
602 // CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]],
603 // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
604 
605 // CHECK: call void @__kmpc_for_static_init_4(
606 // CHECK: call void @__kmpc_for_static_fini(
607 
608 // call destructors: var..
609 // CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
610 
611 // ..and s_arr
612 // CHECK: {{.+}}:
613 // CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]*
614 // CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
615 // CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
616 
617 // CHECK: ret void
618 
619 #endif
620