1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
3; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
4
5target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
6
7declare i32 @llvm.amdgcn.workgroup.id.x() #0
8declare i32 @llvm.amdgcn.workgroup.id.y() #0
9declare i32 @llvm.amdgcn.workgroup.id.z() #0
10
11declare i32 @llvm.amdgcn.workitem.id.x() #0
12declare i32 @llvm.amdgcn.workitem.id.y() #0
13declare i32 @llvm.amdgcn.workitem.id.z() #0
14
15declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
16declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
17declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
18
19declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #2
20declare i1 @llvm.amdgcn.is.private(i8* nocapture) #2
21
22define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
23; HSA-LABEL: define {{[^@]+}}@use_tgid_x
24; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
25; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
26; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
27; HSA-NEXT:    ret void
28;
29  %val = call i32 @llvm.amdgcn.workgroup.id.x()
30  store i32 %val, i32 addrspace(1)* %ptr
31  ret void
32}
33
34define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
35; HSA-LABEL: define {{[^@]+}}@use_tgid_y
36; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
37; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
38; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
39; HSA-NEXT:    ret void
40;
41  %val = call i32 @llvm.amdgcn.workgroup.id.y()
42  store i32 %val, i32 addrspace(1)* %ptr
43  ret void
44}
45
46define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
47; HSA-LABEL: define {{[^@]+}}@multi_use_tgid_y
48; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] {
49; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
50; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
51; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
52; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
53; HSA-NEXT:    ret void
54;
55  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
56  store volatile i32 %val0, i32 addrspace(1)* %ptr
57  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
58  store volatile i32 %val1, i32 addrspace(1)* %ptr
59  ret void
60}
61
62define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
63; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y
64; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] {
65; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
66; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
67; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
68; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
69; HSA-NEXT:    ret void
70;
71  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
72  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
73  store volatile i32 %val0, i32 addrspace(1)* %ptr
74  store volatile i32 %val1, i32 addrspace(1)* %ptr
75  ret void
76}
77
78define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
79; HSA-LABEL: define {{[^@]+}}@use_tgid_z
80; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
81; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
82; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
83; HSA-NEXT:    ret void
84;
85  %val = call i32 @llvm.amdgcn.workgroup.id.z()
86  store i32 %val, i32 addrspace(1)* %ptr
87  ret void
88}
89
90define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
91; HSA-LABEL: define {{[^@]+}}@use_tgid_x_z
92; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3]] {
93; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
94; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
95; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
96; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
97; HSA-NEXT:    ret void
98;
99  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
100  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
101  store volatile i32 %val0, i32 addrspace(1)* %ptr
102  store volatile i32 %val1, i32 addrspace(1)* %ptr
103  ret void
104}
105
106define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
107; HSA-LABEL: define {{[^@]+}}@use_tgid_y_z
108; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4:[0-9]+]] {
109; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
110; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
111; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
112; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
113; HSA-NEXT:    ret void
114;
115  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
116  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
117  store volatile i32 %val0, i32 addrspace(1)* %ptr
118  store volatile i32 %val1, i32 addrspace(1)* %ptr
119  ret void
120}
121
122define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
123; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y_z
124; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4]] {
125; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
126; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
127; HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
128; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
129; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
130; HSA-NEXT:    store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
131; HSA-NEXT:    ret void
132;
133  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
134  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
135  %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
136  store volatile i32 %val0, i32 addrspace(1)* %ptr
137  store volatile i32 %val1, i32 addrspace(1)* %ptr
138  store volatile i32 %val2, i32 addrspace(1)* %ptr
139  ret void
140}
141
142define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
143; HSA-LABEL: define {{[^@]+}}@use_tidig_x
144; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
145; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
146; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
147; HSA-NEXT:    ret void
148;
149  %val = call i32 @llvm.amdgcn.workitem.id.x()
150  store i32 %val, i32 addrspace(1)* %ptr
151  ret void
152}
153
154define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
155; HSA-LABEL: define {{[^@]+}}@use_tidig_y
156; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR5:[0-9]+]] {
157; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
158; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
159; HSA-NEXT:    ret void
160;
161  %val = call i32 @llvm.amdgcn.workitem.id.y()
162  store i32 %val, i32 addrspace(1)* %ptr
163  ret void
164}
165
166define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
167; HSA-LABEL: define {{[^@]+}}@use_tidig_z
168; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR6:[0-9]+]] {
169; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
170; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
171; HSA-NEXT:    ret void
172;
173  %val = call i32 @llvm.amdgcn.workitem.id.z()
174  store i32 %val, i32 addrspace(1)* %ptr
175  ret void
176}
177
178define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
179; HSA-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x
180; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
181; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
182; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
183; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
184; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
185; HSA-NEXT:    ret void
186;
187  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
188  %val1 = call i32 @llvm.amdgcn.workgroup.id.x()
189  store volatile i32 %val0, i32 addrspace(1)* %ptr
190  store volatile i32 %val1, i32 addrspace(1)* %ptr
191  ret void
192}
193
194define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
195; HSA-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y
196; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR7:[0-9]+]] {
197; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
198; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
199; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
200; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
201; HSA-NEXT:    ret void
202;
203  %val0 = call i32 @llvm.amdgcn.workitem.id.y()
204  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
205  store volatile i32 %val0, i32 addrspace(1)* %ptr
206  store volatile i32 %val1, i32 addrspace(1)* %ptr
207  ret void
208}
209
210define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
211; HSA-LABEL: define {{[^@]+}}@use_tidig_x_y_z
212; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR8:[0-9]+]] {
213; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
214; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
215; HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
216; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
217; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
218; HSA-NEXT:    store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
219; HSA-NEXT:    ret void
220;
221  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
222  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
223  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
224  store volatile i32 %val0, i32 addrspace(1)* %ptr
225  store volatile i32 %val1, i32 addrspace(1)* %ptr
226  store volatile i32 %val2, i32 addrspace(1)* %ptr
227  ret void
228}
229
230define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
231; HSA-LABEL: define {{[^@]+}}@use_all_workitems
232; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR9:[0-9]+]] {
233; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
234; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
235; HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
236; HSA-NEXT:    [[VAL3:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
237; HSA-NEXT:    [[VAL4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
238; HSA-NEXT:    [[VAL5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
239; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
240; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
241; HSA-NEXT:    store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
242; HSA-NEXT:    store volatile i32 [[VAL3]], i32 addrspace(1)* [[PTR]], align 4
243; HSA-NEXT:    store volatile i32 [[VAL4]], i32 addrspace(1)* [[PTR]], align 4
244; HSA-NEXT:    store volatile i32 [[VAL5]], i32 addrspace(1)* [[PTR]], align 4
245; HSA-NEXT:    ret void
246;
247  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
248  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
249  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
250  %val3 = call i32 @llvm.amdgcn.workgroup.id.x()
251  %val4 = call i32 @llvm.amdgcn.workgroup.id.y()
252  %val5 = call i32 @llvm.amdgcn.workgroup.id.z()
253  store volatile i32 %val0, i32 addrspace(1)* %ptr
254  store volatile i32 %val1, i32 addrspace(1)* %ptr
255  store volatile i32 %val2, i32 addrspace(1)* %ptr
256  store volatile i32 %val3, i32 addrspace(1)* %ptr
257  store volatile i32 %val4, i32 addrspace(1)* %ptr
258  store volatile i32 %val5, i32 addrspace(1)* %ptr
259  ret void
260}
261
262define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
263; HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr
264; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR10:[0-9]+]] {
265; HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
266; HSA-NEXT:    [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
267; HSA-NEXT:    [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4
268; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
269; HSA-NEXT:    ret void
270;
271  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
272  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
273  %val = load i32, i32 addrspace(4)* %bc
274  store i32 %val, i32 addrspace(1)* %ptr
275  ret void
276}
277
278define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
279; HSA-LABEL: define {{[^@]+}}@use_queue_ptr
280; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR11:[0-9]+]] {
281; HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
282; HSA-NEXT:    [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
283; HSA-NEXT:    [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4
284; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
285; HSA-NEXT:    ret void
286;
287  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
288  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
289  %val = load i32, i32 addrspace(4)* %bc
290  store i32 %val, i32 addrspace(1)* %ptr
291  ret void
292}
293
294define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
295; HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
296; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
297; HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
298; HSA-NEXT:    [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
299; HSA-NEXT:    [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4
300; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
301; HSA-NEXT:    ret void
302;
303  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
304  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
305  %val = load i32, i32 addrspace(4)* %bc
306  store i32 %val, i32 addrspace(1)* %ptr
307  ret void
308}
309
310define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
311; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
312; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR11]] {
313; HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32*
314; HSA-NEXT:    store volatile i32 0, i32* [[STOF]], align 4
315; HSA-NEXT:    ret void
316;
317  %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
318  store volatile i32 0, i32* %stof
319  ret void
320}
321
322define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 {
323; HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast
324; HSA-SAME: (i32 addrspace(5)* [[PTR:%.*]]) #[[ATTR11]] {
325; HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(5)* [[PTR]] to i32*
326; HSA-NEXT:    store volatile i32 0, i32* [[STOF]], align 4
327; HSA-NEXT:    ret void
328;
329  %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
330  store volatile i32 0, i32* %stof
331  ret void
332}
333
334define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
335; HSA-LABEL: define {{[^@]+}}@use_flat_to_group_addrspacecast
336; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
337; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(3)*
338; HSA-NEXT:    store volatile i32 0, i32 addrspace(3)* [[FTOS]], align 4
339; HSA-NEXT:    ret void
340;
341  %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
342  store volatile i32 0, i32 addrspace(3)* %ftos
343  ret void
344}
345
346define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
347; HSA-LABEL: define {{[^@]+}}@use_flat_to_private_addrspacecast
348; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
349; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(5)*
350; HSA-NEXT:    store volatile i32 0, i32 addrspace(5)* [[FTOS]], align 4
351; HSA-NEXT:    ret void
352;
353  %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
354  store volatile i32 0, i32 addrspace(5)* %ftos
355  ret void
356}
357
358; No-op addrspacecast should not use queue ptr
359define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
360; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
361; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
362; HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(1)* [[PTR]] to i32*
363; HSA-NEXT:    store volatile i32 0, i32* [[STOF]], align 4
364; HSA-NEXT:    ret void
365;
366  %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
367  store volatile i32 0, i32* %stof
368  ret void
369}
370
371define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
372; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
373; HSA-SAME: (i32 addrspace(4)* [[PTR:%.*]]) #[[ATTR1]] {
374; HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(4)* [[PTR]] to i32*
375; HSA-NEXT:    [[LD:%.*]] = load volatile i32, i32* [[STOF]], align 4
376; HSA-NEXT:    ret void
377;
378  %stof = addrspacecast i32 addrspace(4)* %ptr to i32*
379  %ld = load volatile i32, i32* %stof
380  ret void
381}
382
383define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
384; HSA-LABEL: define {{[^@]+}}@use_flat_to_global_addrspacecast
385; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
386; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(1)*
387; HSA-NEXT:    store volatile i32 0, i32 addrspace(1)* [[FTOS]], align 4
388; HSA-NEXT:    ret void
389;
390  %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
391  store volatile i32 0, i32 addrspace(1)* %ftos
392  ret void
393}
394
395define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
396; HSA-LABEL: define {{[^@]+}}@use_flat_to_constant_addrspacecast
397; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
398; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(4)*
399; HSA-NEXT:    [[LD:%.*]] = load volatile i32, i32 addrspace(4)* [[FTOS]], align 4
400; HSA-NEXT:    ret void
401;
402  %ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
403  %ld = load volatile i32, i32 addrspace(4)* %ftos
404  ret void
405}
406
407define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 {
408; HSA-LABEL: define {{[^@]+}}@use_is_shared
409; HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] {
410; HSA-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[PTR]])
411; HSA-NEXT:    [[EXT:%.*]] = zext i1 [[IS_SHARED]] to i32
412; HSA-NEXT:    store i32 [[EXT]], i32 addrspace(1)* undef, align 4
413; HSA-NEXT:    ret void
414;
415  %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
416  %ext = zext i1 %is.shared to i32
417  store i32 %ext, i32 addrspace(1)* undef
418  ret void
419}
420
421define amdgpu_kernel void @use_is_private(i8* %ptr) #1 {
422; HSA-LABEL: define {{[^@]+}}@use_is_private
423; HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] {
424; HSA-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[PTR]])
425; HSA-NEXT:    [[EXT:%.*]] = zext i1 [[IS_PRIVATE]] to i32
426; HSA-NEXT:    store i32 [[EXT]], i32 addrspace(1)* undef, align 4
427; HSA-NEXT:    ret void
428;
429  %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
430  %ext = zext i1 %is.private to i32
431  store i32 %ext, i32 addrspace(1)* undef
432  ret void
433}
434
435define amdgpu_kernel void @use_alloca() #1 {
436; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca
437; AKF_HSA-SAME: () #[[ATTR12:[0-9]+]] {
438; AKF_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
439; AKF_HSA-NEXT:    store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
440; AKF_HSA-NEXT:    ret void
441;
442; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca
443; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
444; ATTRIBUTOR_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
445; ATTRIBUTOR_HSA-NEXT:    store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
446; ATTRIBUTOR_HSA-NEXT:    ret void
447;
448  %alloca = alloca i32, addrspace(5)
449  store i32 0, i32 addrspace(5)* %alloca
450  ret void
451}
452
453define amdgpu_kernel void @use_alloca_non_entry_block() #1 {
454; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block
455; AKF_HSA-SAME: () #[[ATTR12]] {
456; AKF_HSA-NEXT:  entry:
457; AKF_HSA-NEXT:    br label [[BB:%.*]]
458; AKF_HSA:       bb:
459; AKF_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
460; AKF_HSA-NEXT:    store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
461; AKF_HSA-NEXT:    ret void
462;
463; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block
464; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
465; ATTRIBUTOR_HSA-NEXT:  entry:
466; ATTRIBUTOR_HSA-NEXT:    br label [[BB:%.*]]
467; ATTRIBUTOR_HSA:       bb:
468; ATTRIBUTOR_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
469; ATTRIBUTOR_HSA-NEXT:    store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
470; ATTRIBUTOR_HSA-NEXT:    ret void
471;
472entry:
473  br label %bb
474
475bb:
476  %alloca = alloca i32, addrspace(5)
477  store i32 0, i32 addrspace(5)* %alloca
478  ret void
479}
480
481define void @use_alloca_func() #1 {
482; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca_func
483; AKF_HSA-SAME: () #[[ATTR12]] {
484; AKF_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
485; AKF_HSA-NEXT:    store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
486; AKF_HSA-NEXT:    ret void
487;
488; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_func
489; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
490; ATTRIBUTOR_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
491; ATTRIBUTOR_HSA-NEXT:    store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
492; ATTRIBUTOR_HSA-NEXT:    ret void
493;
494  %alloca = alloca i32, addrspace(5)
495  store i32 0, i32 addrspace(5)* %alloca
496  ret void
497}
498
499attributes #0 = { nounwind readnone speculatable }
500attributes #1 = { nounwind }
501
502;.
503; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
504; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
505; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" }
506; AKF_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" }
507; AKF_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
508; AKF_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" }
509; AKF_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" }
510; AKF_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
511; AKF_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
512; AKF_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
513; AKF_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" }
514; AKF_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" }
515; AKF_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-stack-objects" }
516;.
517; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
518; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
519; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
520; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
521; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
522; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
523; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
524; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
525; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
526; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
527; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
528; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
529;.
530