1 // Test host codegen.
2 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
3 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
4 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
5 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
7 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
8 
9 // Test target codegen - host bc file has to be created first.
10 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
11 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
12 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
13 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
14 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
15 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
16 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
17 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
18 
19 // expected-no-diagnostics
20 #ifndef HEADER
21 #define HEADER
22 
23 // CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
24 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
25 // CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
26 
27 // CHECK-DAG: [[S1:%.+]] = type { double }
28 // CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
29 // CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
30 // CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
31 
32 // TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
33 
34 // We have 6 target regions
35 
36 // CHECK-DAG: @{{.*}} = private constant i8 0
37 // CHECK-DAG: @{{.*}} = private constant i8 0
38 // CHECK-DAG: @{{.*}} = private constant i8 0
39 // CHECK-DAG: @{{.*}} = private constant i8 0
40 // CHECK-DAG: @{{.*}} = private constant i8 0
41 // CHECK-DAG: @{{.*}} = private constant i8 0
42 
43 // TCHECK: @{{.+}} = constant [[ENTTY]]
44 // TCHECK: @{{.+}} = constant [[ENTTY]]
45 // TCHECK: @{{.+}} = constant [[ENTTY]]
46 // TCHECK: @{{.+}} = constant [[ENTTY]]
47 // TCHECK: @{{.+}} = constant [[ENTTY]]
48 // TCHECK: @{{.+}} = constant [[ENTTY]]
49 
50 // Check if offloading descriptor is created.
51 // CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
52 // CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
53 // CHECK: [[DEVBEGIN:@.+]] = external constant i8
54 // CHECK: [[DEVEND:@.+]] = external constant i8
55 // CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }]
56 // CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }
57 
58 // Check target registration is registered as a Ctor.
59 // CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* [[REGFN:@.+]] to void ()*), i8* null }]
60 
61 
62 template<typename tx>
63 tx ftemplate(int n) {
64   tx a = 0;
65 
66   #pragma omp target parallel num_threads(tx(20))
67   {
68   }
69 
70   short b = 1;
71   #pragma omp target parallel num_threads(b)
72   {
73     a += b;
74   }
75 
76   return a;
77 }
78 
79 static
80 int fstatic(int n) {
81 
82   #pragma omp target parallel num_threads(n)
83   {
84   }
85 
86   #pragma omp target parallel num_threads(32+n)
87   {
88   }
89 
90   return n+1;
91 }
92 
93 struct S1 {
94   double a;
95 
96   int r1(int n){
97     int b = 1;
98 
99     #pragma omp target parallel num_threads(n-b)
100     {
101       this->a = (double)b + 1.5;
102     }
103 
104     #pragma omp target parallel num_threads(1024)
105     {
106       this->a = 2.5;
107     }
108 
109     return (int)a;
110   }
111 };
112 
113 // CHECK: define {{.*}}@{{.*}}bar{{.*}}
114 int bar(int n){
115   int a = 0;
116 
117   S1 S;
118   // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}})
119   a += S.r1(n);
120 
121   // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}})
122   a += fstatic(n);
123 
124   // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}})
125   a += ftemplate<int>(n);
126 
127   return a;
128 }
129 
130 
131 
132 //
133 // CHECK: define {{.*}}[[FS1]]([[S1]]* {{%.+}}, i32 {{[^%]*}}[[PARM:%.+]])
134 //
135 // CHECK-DAG:   store i32 [[PARM]], i32* [[N_ADDR:%.+]], align
136 // CHECK:       store i32 1, i32* [[B:%.+]], align
137 // CHECK:       [[NV:%.+]] = load i32, i32* [[N_ADDR]], align
138 // CHECK:       [[BV:%.+]] = load i32, i32* [[B]], align
139 // CHECK:       [[SUB:%.+]] = sub nsw i32 [[NV]], [[BV]]
140 // CHECK:       store i32 [[SUB]], i32* [[CAPE_ADDR:%.+]], align
141 // CHECK:       [[CEV:%.+]] = load i32, i32* [[CAPE_ADDR]], align
142 // CHECK-64:    [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i32*
143 // CHECK-64:    store i32 [[CEV]], i32* [[CONV]], align
144 // CHECK-32:    store i32 [[CEV]], i32* [[CAPEC_ADDR:%.+]], align
145 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
146 // CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
147 //
148 // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]])
149 // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
150 // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align
151 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
152 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
153 //
154 // CHECK:       [[FAIL]]
155 // CHECK:       call void [[HVT1:@.+]]([[S1]]* {{%.+}}, i[[SZ]] {{%.+}}, i[[SZ]] [[ARG]])
156 // CHECK:       br label {{%?}}[[END]]
157 // CHECK:       [[END]]
158 //
159 //
160 //
161 // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 1, i32 1024)
162 // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
163 // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align
164 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
165 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
166 //
167 // CHECK:       [[FAIL]]
168 // CHECK:       call void [[HVT2:@.+]]([[S1]]* {{[^,]+}})
169 // CHECK:       br label {{%?}}[[END]]
170 // CHECK:       [[END]]
171 //
172 
173 
174 
175 
176 
177 
178 //
179 // CHECK: define {{.*}}[[FSTATIC]](i32 {{[^%]*}}[[PARM:%.+]])
180 //
181 // CHECK-DAG:   store i32 [[PARM]], i32* [[N_ADDR:%.+]], align
182 // CHECK:       [[NV:%.+]] = load i32, i32* [[N_ADDR]], align
183 // CHECK:       store i32 [[NV]], i32* [[CAPE_ADDR:%.+]], align
184 // CHECK:       [[CEV:%.+]] = load i32, i32* [[CAPE_ADDR]], align
185 // CHECK-64:    [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i32*
186 // CHECK-64:    store i32 [[CEV]], i32* [[CONV]], align
187 // CHECK-32:    store i32 [[CEV]], i32* [[CAPEC_ADDR:%.+]], align
188 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
189 // CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
190 //
191 // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]])
192 // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
193 // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align
194 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
195 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
196 //
197 // CHECK:       [[FAIL]]
198 // CHECK:       call void [[HVT3:@.+]](i[[SZ]] [[ARG]])
199 // CHECK:       br label {{%?}}[[END]]
200 // CHECK:       [[END]]
201 //
202 //
203 //
204 // CHECK:       [[NV:%.+]] = load i32, i32* [[N_ADDR]], align
205 // CHECK:       [[ADD:%.+]] = add nsw i32 32, [[NV]]
206 // CHECK:       store i32 [[ADD]], i32* [[CAPE_ADDR:%.+]], align
207 // CHECK:       [[CEV:%.+]] = load i32, i32* [[CAPE_ADDR]], align
208 // CHECK-64:    [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i32*
209 // CHECK-64:    store i32 [[CEV]], i32* [[CONV]], align
210 // CHECK-32:    store i32 [[CEV]], i32* [[CAPEC_ADDR:%.+]], align
211 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
212 // CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
213 //
214 // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]])
215 // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
216 // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align
217 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
218 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
219 //
220 // CHECK:       [[FAIL]]
221 // CHECK:       call void [[HVT4:@.+]](i[[SZ]] [[ARG]])
222 // CHECK:       br label {{%?}}[[END]]
223 // CHECK:       [[END]]
224 //
225 
226 
227 
228 
229 
230 
231 //
232 // CHECK: define {{.*}}[[FTEMPLATE]]
233 //
234 // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 20)
235 // CHECK-NEXT:  store i32 [[RET]], i32* [[RHV:%.+]], align
236 // CHECK-NEXT:  [[RET2:%.+]] = load i32, i32* [[RHV]], align
237 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
238 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
239 //
240 // CHECK:       [[FAIL]]
241 // CHECK:       call void [[HVT5:@.+]]()
242 // CHECK:       br label {{%?}}[[END]]
243 //
244 // CHECK:       [[END]]
245 //
246 //
247 //
248 // CHECK:       store i16 1, i16* [[B:%.+]], align
249 // CHECK:       [[BV:%.+]] = load i16, i16* [[B]], align
250 // CHECK:       store i16 [[BV]], i16* [[CAPE_ADDR:%.+]], align
251 // CHECK:       [[CEV:%.+]] = load i16, i16* [[CAPE_ADDR]], align
252 // CHECK:       [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i16*
253 // CHECK:       store i16 [[CEV]], i16* [[CONV]], align
254 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
255 // CHECK:       [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align
256 // CHECK:       [[THREADS:%.+]] = sext i16 [[T]] to i32
257 //
258 // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]])
259 // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
260 // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align
261 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
262 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
263 //
264 // CHECK:       [[FAIL]]
265 // CHECK:       call void [[HVT6:@.+]](i[[SZ]] {{%.+}}, i[[SZ]] {{%.+}}, i[[SZ]] [[ARG]])
266 // CHECK:       br label {{%?}}[[END]]
267 // CHECK:       [[END]]
268 //
269 
270 
271 
272 
273 
274 
275 // Check that the offloading functions are emitted and that the parallel function
276 // is appropriately guarded.
277 
278 // CHECK:       define internal void [[HVT1]]([[S1]]* {{%.+}}, i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]])
279 // CHECK-DAG:   store i[[SZ]] [[PARM2]], i[[SZ]]* [[CAPE_ADDR:%.+]], align
280 // CHECK-64:    [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i32*
281 // CHECK-64:    [[NT:%.+]] = load i32, i32* [[CONV]], align
282 // CHECK-32:    [[NT:%.+]] = load i32, i32* [[CAPE_ADDR]], align
283 // CHECK:       call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]])
284 // CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2,
285 //
286 //
287 
288 
289 // CHECK:       define internal void [[HVT2]]([[S1]]* {{%.+}})
290 // CHECK:       call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1024)
291 // CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 1,
292 //
293 //
294 
295 
296 
297 
298 
299 
300 
301 
302 // CHECK:       define internal void [[HVT3]](i[[SZ]] [[PARM:%.+]])
303 // CHECK-DAG:   store i[[SZ]] [[PARM]], i[[SZ]]* [[CAPE_ADDR:%.+]], align
304 // CHECK-64:    [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i32*
305 // CHECK-64:    [[NT:%.+]] = load i32, i32* [[CONV]], align
306 // CHECK-32:    [[NT:%.+]] = load i32, i32* [[CAPE_ADDR]], align
307 // CHECK:       call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]])
308 // CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0,
309 //
310 //
311 // CHECK:       define internal void [[HVT4]](i[[SZ]] [[PARM:%.+]])
312 // CHECK-DAG:   store i[[SZ]] [[PARM]], i[[SZ]]* [[CAPE_ADDR:%.+]], align
313 // CHECK-64:    [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i32*
314 // CHECK-64:    [[NT:%.+]] = load i32, i32* [[CONV]], align
315 // CHECK-32:    [[NT:%.+]] = load i32, i32* [[CAPE_ADDR]], align
316 // CHECK:       call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]])
317 // CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0,
318 //
319 //
320 
321 
322 
323 
324 
325 // CHECK:       define internal void [[HVT5]](
326 // CHECK:       call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 20)
327 // CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0,
328 //
329 //
330 
331 
332 // CHECK:       define internal void [[HVT6]](i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]], i[[SZ]] [[PARM3:%.+]])
333 // CHECK-DAG:   store i[[SZ]] [[PARM3]], i[[SZ]]* [[CAPE_ADDR:%.+]], align
334 // CHECK:       [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i16*
335 // CHECK:       [[T:%.+]] = load i16, i16* [[CONV]], align
336 // CHECK:       [[NT:%.+]] = sext i16 [[T]] to i32
337 // CHECK:       call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]])
338 // CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2,
339 //
340 //
341 
342 
343 
344 #endif
345