1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // Test target codegen - host bc file has to be created first.
3 // RUN: %clang_cc1 -no-opaque-pointers -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
4 // RUN: %clang_cc1 -no-opaque-pointers -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1
5 // RUN: %clang_cc1 -no-opaque-pointers -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
6 // RUN: %clang_cc1 -no-opaque-pointers -DCK1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
7 // expected-no-diagnostics
8 #ifndef HEADER
9 #define HEADER
10 
11 #ifdef CK1
12 
13 template <typename T>
tmain(T argc)14 int tmain(T argc) {
15 #pragma omp target
16 #pragma omp teams
17   argc = 0;
18   return 0;
19 }
20 
21 
main(int argc,char ** argv)22 int main (int argc, char **argv) {
23 #pragma omp target
24 #pragma omp teams
25   {
26   argc = 0;
27   }
28   return tmain(argv);
29 }
30 
31 
32 // only nvptx side: do not outline teams region and do not call fork_teams
33 
34 
35 // target region in template
36 
37 
38 
39 #endif // CK1
40 
41 // Test target codegen - host bc file has to be created first.
42 // RUN: %clang_cc1 -no-opaque-pointers -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
43 // RUN: %clang_cc1 -no-opaque-pointers -DCK2 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK3
44 // RUN: %clang_cc1 -no-opaque-pointers -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
45 // RUN: %clang_cc1 -no-opaque-pointers -DCK2 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK4
46 // expected-no-diagnostics
47 #ifdef CK2
48 
49 template <typename T>
tmain(T argc)50 int tmain(T argc) {
51   int a = 10;
52   int b = 5;
53 #pragma omp target
54 #pragma omp teams num_teams(a) thread_limit(b)
55   {
56   argc = 0;
57   }
58   return 0;
59 }
60 
main(int argc,char ** argv)61 int main (int argc, char **argv) {
62   int a = 20;
63   int b = 5;
64 #pragma omp target
65 #pragma omp teams num_teams(a) thread_limit(b)
66   {
67   argc = 0;
68   }
69   return tmain(argv);
70 }
71 
72 
73 
74 
75 
76 
77 #endif // CK2
78 #endif
79 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
80 // CHECK1-SAME: (i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] {
81 // CHECK1-NEXT:  entry:
82 // CHECK1-NEXT:    [[ARGC_ADDR:%.*]] = alloca i64, align 8
83 // CHECK1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
84 // CHECK1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
85 // CHECK1-NEXT:    store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8
86 // CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32*
87 // CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
88 // CHECK1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
89 // CHECK1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
90 // CHECK1:       user_code.entry:
91 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4
92 // CHECK1-NEXT:    [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4)
93 // CHECK1-NEXT:    [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32*
94 // CHECK1-NEXT:    store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4
95 // CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
96 // CHECK1-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
97 // CHECK1-NEXT:    store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
98 // CHECK1-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3:[0-9]+]]
99 // CHECK1-NEXT:    call void @__kmpc_free_shared(i8* [[ARGC1]], i64 4)
100 // CHECK1-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
101 // CHECK1-NEXT:    ret void
102 // CHECK1:       worker.exit:
103 // CHECK1-NEXT:    ret void
104 //
105 //
106 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__
107 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] {
108 // CHECK1-NEXT:  entry:
109 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
110 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
111 // CHECK1-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
112 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
113 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
114 // CHECK1-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
115 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
116 // CHECK1-NEXT:    store i32 0, i32* [[TMP0]], align 4
117 // CHECK1-NEXT:    ret void
118 //
119 //
120 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15
121 // CHECK1-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR0]] {
122 // CHECK1-NEXT:  entry:
123 // CHECK1-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8**, align 8
124 // CHECK1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
125 // CHECK1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
126 // CHECK1-NEXT:    store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8
127 // CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
128 // CHECK1-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
129 // CHECK1-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
130 // CHECK1:       user_code.entry:
131 // CHECK1-NEXT:    [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8
132 // CHECK1-NEXT:    [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8)
133 // CHECK1-NEXT:    [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8***
134 // CHECK1-NEXT:    store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8
135 // CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
136 // CHECK1-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
137 // CHECK1-NEXT:    store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
138 // CHECK1-NEXT:    call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]]
139 // CHECK1-NEXT:    call void @__kmpc_free_shared(i8* [[ARGC1]], i64 8)
140 // CHECK1-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
141 // CHECK1-NEXT:    ret void
142 // CHECK1:       worker.exit:
143 // CHECK1-NEXT:    ret void
144 //
145 //
146 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1
147 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] {
148 // CHECK1-NEXT:  entry:
149 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
150 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
151 // CHECK1-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8***, align 8
152 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
153 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
154 // CHECK1-NEXT:    store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8
155 // CHECK1-NEXT:    [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8
156 // CHECK1-NEXT:    store i8** null, i8*** [[TMP0]], align 8
157 // CHECK1-NEXT:    ret void
158 //
159 //
160 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
161 // CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] {
162 // CHECK2-NEXT:  entry:
163 // CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
164 // CHECK2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
165 // CHECK2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
166 // CHECK2-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
167 // CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
168 // CHECK2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
169 // CHECK2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
170 // CHECK2:       user_code.entry:
171 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
172 // CHECK2-NEXT:    [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4)
173 // CHECK2-NEXT:    [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32*
174 // CHECK2-NEXT:    store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4
175 // CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
176 // CHECK2-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
177 // CHECK2-NEXT:    store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
178 // CHECK2-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3:[0-9]+]]
179 // CHECK2-NEXT:    call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4)
180 // CHECK2-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
181 // CHECK2-NEXT:    ret void
182 // CHECK2:       worker.exit:
183 // CHECK2-NEXT:    ret void
184 //
185 //
186 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__
187 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] {
188 // CHECK2-NEXT:  entry:
189 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
190 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
191 // CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 4
192 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
193 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
194 // CHECK2-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4
195 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4
196 // CHECK2-NEXT:    store i32 0, i32* [[TMP0]], align 4
197 // CHECK2-NEXT:    ret void
198 //
199 //
200 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15
201 // CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR0]] {
202 // CHECK2-NEXT:  entry:
203 // CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8**, align 4
204 // CHECK2-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
205 // CHECK2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
206 // CHECK2-NEXT:    store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4
207 // CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
208 // CHECK2-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
209 // CHECK2-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
210 // CHECK2:       user_code.entry:
211 // CHECK2-NEXT:    [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4
212 // CHECK2-NEXT:    [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4)
213 // CHECK2-NEXT:    [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8***
214 // CHECK2-NEXT:    store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4
215 // CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
216 // CHECK2-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
217 // CHECK2-NEXT:    store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
218 // CHECK2-NEXT:    call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]]
219 // CHECK2-NEXT:    call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4)
220 // CHECK2-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
221 // CHECK2-NEXT:    ret void
222 // CHECK2:       worker.exit:
223 // CHECK2-NEXT:    ret void
224 //
225 //
226 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1
227 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] {
228 // CHECK2-NEXT:  entry:
229 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
230 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
231 // CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8***, align 4
232 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
233 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
234 // CHECK2-NEXT:    store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4
235 // CHECK2-NEXT:    [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4
236 // CHECK2-NEXT:    store i8** null, i8*** [[TMP0]], align 4
237 // CHECK2-NEXT:    ret void
238 //
239 //
240 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64
241 // CHECK3-SAME: (i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] {
242 // CHECK3-NEXT:  entry:
243 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
244 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
245 // CHECK3-NEXT:    [[ARGC_ADDR:%.*]] = alloca i64, align 8
246 // CHECK3-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
247 // CHECK3-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
248 // CHECK3-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
249 // CHECK3-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
250 // CHECK3-NEXT:    store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8
251 // CHECK3-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
252 // CHECK3-NEXT:    [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*
253 // CHECK3-NEXT:    [[CONV2:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32*
254 // CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
255 // CHECK3-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
256 // CHECK3-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
257 // CHECK3:       user_code.entry:
258 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4
259 // CHECK3-NEXT:    [[ARGC3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4)
260 // CHECK3-NEXT:    [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC3]] to i32*
261 // CHECK3-NEXT:    store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4
262 // CHECK3-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
263 // CHECK3-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
264 // CHECK3-NEXT:    store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
265 // CHECK3-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3:[0-9]+]]
266 // CHECK3-NEXT:    call void @__kmpc_free_shared(i8* [[ARGC3]], i64 4)
267 // CHECK3-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
268 // CHECK3-NEXT:    ret void
269 // CHECK3:       worker.exit:
270 // CHECK3-NEXT:    ret void
271 //
272 //
273 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__
274 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] {
275 // CHECK3-NEXT:  entry:
276 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
277 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
278 // CHECK3-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
279 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
280 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
281 // CHECK3-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
282 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
283 // CHECK3-NEXT:    store i32 0, i32* [[TMP0]], align 4
284 // CHECK3-NEXT:    ret void
285 //
286 //
287 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53
288 // CHECK3-SAME: (i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i8** noundef [[ARGC:%.*]]) #[[ATTR0]] {
289 // CHECK3-NEXT:  entry:
290 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
291 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
292 // CHECK3-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8**, align 8
293 // CHECK3-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
294 // CHECK3-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
295 // CHECK3-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
296 // CHECK3-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
297 // CHECK3-NEXT:    store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8
298 // CHECK3-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
299 // CHECK3-NEXT:    [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*
300 // CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
301 // CHECK3-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
302 // CHECK3-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
303 // CHECK3:       user_code.entry:
304 // CHECK3-NEXT:    [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8
305 // CHECK3-NEXT:    [[ARGC2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8)
306 // CHECK3-NEXT:    [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC2]] to i8***
307 // CHECK3-NEXT:    store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8
308 // CHECK3-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
309 // CHECK3-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
310 // CHECK3-NEXT:    store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
311 // CHECK3-NEXT:    call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]]
312 // CHECK3-NEXT:    call void @__kmpc_free_shared(i8* [[ARGC2]], i64 8)
313 // CHECK3-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
314 // CHECK3-NEXT:    ret void
315 // CHECK3:       worker.exit:
316 // CHECK3-NEXT:    ret void
317 //
318 //
319 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1
320 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] {
321 // CHECK3-NEXT:  entry:
322 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
323 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
324 // CHECK3-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8***, align 8
325 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
326 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
327 // CHECK3-NEXT:    store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8
328 // CHECK3-NEXT:    [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8
329 // CHECK3-NEXT:    store i8** null, i8*** [[TMP0]], align 8
330 // CHECK3-NEXT:    ret void
331 //
332 //
333 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64
334 // CHECK4-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] {
335 // CHECK4-NEXT:  entry:
336 // CHECK4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
337 // CHECK4-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
338 // CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
339 // CHECK4-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
340 // CHECK4-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
341 // CHECK4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
342 // CHECK4-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
343 // CHECK4-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
344 // CHECK4-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
345 // CHECK4-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
346 // CHECK4-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
347 // CHECK4:       user_code.entry:
348 // CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
349 // CHECK4-NEXT:    [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4)
350 // CHECK4-NEXT:    [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32*
351 // CHECK4-NEXT:    store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4
352 // CHECK4-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
353 // CHECK4-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
354 // CHECK4-NEXT:    store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
355 // CHECK4-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3:[0-9]+]]
356 // CHECK4-NEXT:    call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4)
357 // CHECK4-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
358 // CHECK4-NEXT:    ret void
359 // CHECK4:       worker.exit:
360 // CHECK4-NEXT:    ret void
361 //
362 //
363 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__
364 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] {
365 // CHECK4-NEXT:  entry:
366 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
367 // CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
368 // CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 4
369 // CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
370 // CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
371 // CHECK4-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4
372 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4
373 // CHECK4-NEXT:    store i32 0, i32* [[TMP0]], align 4
374 // CHECK4-NEXT:    ret void
375 //
376 //
377 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53
378 // CHECK4-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i8** noundef [[ARGC:%.*]]) #[[ATTR0]] {
379 // CHECK4-NEXT:  entry:
380 // CHECK4-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
381 // CHECK4-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
382 // CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8**, align 4
383 // CHECK4-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
384 // CHECK4-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
385 // CHECK4-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
386 // CHECK4-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
387 // CHECK4-NEXT:    store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4
388 // CHECK4-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
389 // CHECK4-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
390 // CHECK4-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
391 // CHECK4:       user_code.entry:
392 // CHECK4-NEXT:    [[TMP1:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4
393 // CHECK4-NEXT:    [[ARGC1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4)
394 // CHECK4-NEXT:    [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8***
395 // CHECK4-NEXT:    store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4
396 // CHECK4-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
397 // CHECK4-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
398 // CHECK4-NEXT:    store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4
399 // CHECK4-NEXT:    call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]]
400 // CHECK4-NEXT:    call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4)
401 // CHECK4-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
402 // CHECK4-NEXT:    ret void
403 // CHECK4:       worker.exit:
404 // CHECK4-NEXT:    ret void
405 //
406 //
407 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1
408 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] {
409 // CHECK4-NEXT:  entry:
410 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
411 // CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
412 // CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8***, align 4
413 // CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
414 // CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
415 // CHECK4-NEXT:    store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4
416 // CHECK4-NEXT:    [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4
417 // CHECK4-NEXT:    store i8** null, i8*** [[TMP0]], align 4
418 // CHECK4-NEXT:    ret void
419 //
420