1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
3; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
4
5target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
6
7declare i32 @llvm.amdgcn.workgroup.id.x() #0
8declare i32 @llvm.amdgcn.workgroup.id.y() #0
9declare i32 @llvm.amdgcn.workgroup.id.z() #0
10
11declare i32 @llvm.amdgcn.workitem.id.x() #0
12declare i32 @llvm.amdgcn.workitem.id.y() #0
13declare i32 @llvm.amdgcn.workitem.id.z() #0
14
15declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
16declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
17declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
18
19declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #2
20declare i1 @llvm.amdgcn.is.private(i8* nocapture) #2
21
22define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
23; HSA-LABEL: define {{[^@]+}}@use_tgid_x
24; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
25; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
26; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
27; HSA-NEXT:    ret void
28;
29  %val = call i32 @llvm.amdgcn.workgroup.id.x()
30  store i32 %val, i32 addrspace(1)* %ptr
31  ret void
32}
33
34define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
35; HSA-LABEL: define {{[^@]+}}@use_tgid_y
36; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
37; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
38; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
39; HSA-NEXT:    ret void
40;
41  %val = call i32 @llvm.amdgcn.workgroup.id.y()
42  store i32 %val, i32 addrspace(1)* %ptr
43  ret void
44}
45
46define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
47; HSA-LABEL: define {{[^@]+}}@multi_use_tgid_y
48; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] {
49; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
50; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
51; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
52; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
53; HSA-NEXT:    ret void
54;
55  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
56  store volatile i32 %val0, i32 addrspace(1)* %ptr
57  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
58  store volatile i32 %val1, i32 addrspace(1)* %ptr
59  ret void
60}
61
62define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
63; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y
64; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] {
65; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
66; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
67; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
68; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
69; HSA-NEXT:    ret void
70;
71  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
72  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
73  store volatile i32 %val0, i32 addrspace(1)* %ptr
74  store volatile i32 %val1, i32 addrspace(1)* %ptr
75  ret void
76}
77
78define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
79; HSA-LABEL: define {{[^@]+}}@use_tgid_z
80; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
81; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
82; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
83; HSA-NEXT:    ret void
84;
85  %val = call i32 @llvm.amdgcn.workgroup.id.z()
86  store i32 %val, i32 addrspace(1)* %ptr
87  ret void
88}
89
90define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
91; HSA-LABEL: define {{[^@]+}}@use_tgid_x_z
92; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3]] {
93; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
94; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
95; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
96; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
97; HSA-NEXT:    ret void
98;
99  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
100  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
101  store volatile i32 %val0, i32 addrspace(1)* %ptr
102  store volatile i32 %val1, i32 addrspace(1)* %ptr
103  ret void
104}
105
106define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
107; HSA-LABEL: define {{[^@]+}}@use_tgid_y_z
108; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4:[0-9]+]] {
109; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
110; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
111; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
112; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
113; HSA-NEXT:    ret void
114;
115  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
116  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
117  store volatile i32 %val0, i32 addrspace(1)* %ptr
118  store volatile i32 %val1, i32 addrspace(1)* %ptr
119  ret void
120}
121
122define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
123; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y_z
124; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4]] {
125; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
126; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
127; HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
128; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
129; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
130; HSA-NEXT:    store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
131; HSA-NEXT:    ret void
132;
133  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
134  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
135  %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
136  store volatile i32 %val0, i32 addrspace(1)* %ptr
137  store volatile i32 %val1, i32 addrspace(1)* %ptr
138  store volatile i32 %val2, i32 addrspace(1)* %ptr
139  ret void
140}
141
142define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
143; HSA-LABEL: define {{[^@]+}}@use_tidig_x
144; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
145; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
146; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
147; HSA-NEXT:    ret void
148;
149  %val = call i32 @llvm.amdgcn.workitem.id.x()
150  store i32 %val, i32 addrspace(1)* %ptr
151  ret void
152}
153
154define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
155; HSA-LABEL: define {{[^@]+}}@use_tidig_y
156; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR5:[0-9]+]] {
157; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
158; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
159; HSA-NEXT:    ret void
160;
161  %val = call i32 @llvm.amdgcn.workitem.id.y()
162  store i32 %val, i32 addrspace(1)* %ptr
163  ret void
164}
165
166define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
167; HSA-LABEL: define {{[^@]+}}@use_tidig_z
168; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR6:[0-9]+]] {
169; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
170; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
171; HSA-NEXT:    ret void
172;
173  %val = call i32 @llvm.amdgcn.workitem.id.z()
174  store i32 %val, i32 addrspace(1)* %ptr
175  ret void
176}
177
178define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
179; HSA-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x
180; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
181; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
182; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
183; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
184; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
185; HSA-NEXT:    ret void
186;
187  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
188  %val1 = call i32 @llvm.amdgcn.workgroup.id.x()
189  store volatile i32 %val0, i32 addrspace(1)* %ptr
190  store volatile i32 %val1, i32 addrspace(1)* %ptr
191  ret void
192}
193
194define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
195; HSA-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y
196; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR7:[0-9]+]] {
197; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
198; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
199; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
200; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
201; HSA-NEXT:    ret void
202;
203  %val0 = call i32 @llvm.amdgcn.workitem.id.y()
204  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
205  store volatile i32 %val0, i32 addrspace(1)* %ptr
206  store volatile i32 %val1, i32 addrspace(1)* %ptr
207  ret void
208}
209
210define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
211; HSA-LABEL: define {{[^@]+}}@use_tidig_x_y_z
212; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR8:[0-9]+]] {
213; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
214; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
215; HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
216; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
217; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
218; HSA-NEXT:    store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
219; HSA-NEXT:    ret void
220;
221  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
222  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
223  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
224  store volatile i32 %val0, i32 addrspace(1)* %ptr
225  store volatile i32 %val1, i32 addrspace(1)* %ptr
226  store volatile i32 %val2, i32 addrspace(1)* %ptr
227  ret void
228}
229
230define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
231; HSA-LABEL: define {{[^@]+}}@use_all_workitems
232; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR9:[0-9]+]] {
233; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
234; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
235; HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
236; HSA-NEXT:    [[VAL3:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
237; HSA-NEXT:    [[VAL4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
238; HSA-NEXT:    [[VAL5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
239; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
240; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
241; HSA-NEXT:    store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
242; HSA-NEXT:    store volatile i32 [[VAL3]], i32 addrspace(1)* [[PTR]], align 4
243; HSA-NEXT:    store volatile i32 [[VAL4]], i32 addrspace(1)* [[PTR]], align 4
244; HSA-NEXT:    store volatile i32 [[VAL5]], i32 addrspace(1)* [[PTR]], align 4
245; HSA-NEXT:    ret void
246;
247  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
248  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
249  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
250  %val3 = call i32 @llvm.amdgcn.workgroup.id.x()
251  %val4 = call i32 @llvm.amdgcn.workgroup.id.y()
252  %val5 = call i32 @llvm.amdgcn.workgroup.id.z()
253  store volatile i32 %val0, i32 addrspace(1)* %ptr
254  store volatile i32 %val1, i32 addrspace(1)* %ptr
255  store volatile i32 %val2, i32 addrspace(1)* %ptr
256  store volatile i32 %val3, i32 addrspace(1)* %ptr
257  store volatile i32 %val4, i32 addrspace(1)* %ptr
258  store volatile i32 %val5, i32 addrspace(1)* %ptr
259  ret void
260}
261
262define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
263; HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr
264; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR10:[0-9]+]] {
265; HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
266; HSA-NEXT:    [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
267; HSA-NEXT:    [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4
268; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
269; HSA-NEXT:    ret void
270;
271  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
272  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
273  %val = load i32, i32 addrspace(4)* %bc
274  store i32 %val, i32 addrspace(1)* %ptr
275  ret void
276}
277
278define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
279; HSA-LABEL: define {{[^@]+}}@use_queue_ptr
280; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR11:[0-9]+]] {
281; HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
282; HSA-NEXT:    [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
283; HSA-NEXT:    [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4
284; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
285; HSA-NEXT:    ret void
286;
287  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
288  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
289  %val = load i32, i32 addrspace(4)* %bc
290  store i32 %val, i32 addrspace(1)* %ptr
291  ret void
292}
293
294define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
295; HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
296; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
297; HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
298; HSA-NEXT:    [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
299; HSA-NEXT:    [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4
300; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
301; HSA-NEXT:    ret void
302;
303  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
304  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
305  %val = load i32, i32 addrspace(4)* %bc
306  store i32 %val, i32 addrspace(1)* %ptr
307  ret void
308}
309
310define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
311; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
312; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR11]] {
313; HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32*
314; HSA-NEXT:    store volatile i32 0, i32* [[STOF]], align 4
315; HSA-NEXT:    ret void
316;
317  %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
318  store volatile i32 0, i32* %stof
319  ret void
320}
321
322define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 {
323; HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast
324; HSA-SAME: (i32 addrspace(5)* [[PTR:%.*]]) #[[ATTR11]] {
325; HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(5)* [[PTR]] to i32*
326; HSA-NEXT:    store volatile i32 0, i32* [[STOF]], align 4
327; HSA-NEXT:    ret void
328;
329  %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
330  store volatile i32 0, i32* %stof
331  ret void
332}
333
334define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
335; HSA-LABEL: define {{[^@]+}}@use_flat_to_group_addrspacecast
336; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
337; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(3)*
338; HSA-NEXT:    store volatile i32 0, i32 addrspace(3)* [[FTOS]], align 4
339; HSA-NEXT:    ret void
340;
341  %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
342  store volatile i32 0, i32 addrspace(3)* %ftos
343  ret void
344}
345
346define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
347; HSA-LABEL: define {{[^@]+}}@use_flat_to_private_addrspacecast
348; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
349; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(5)*
350; HSA-NEXT:    store volatile i32 0, i32 addrspace(5)* [[FTOS]], align 4
351; HSA-NEXT:    ret void
352;
353  %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
354  store volatile i32 0, i32 addrspace(5)* %ftos
355  ret void
356}
357
358; No-op addrspacecast should not use queue ptr
359define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
360; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
361; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
362; HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(1)* [[PTR]] to i32*
363; HSA-NEXT:    store volatile i32 0, i32* [[STOF]], align 4
364; HSA-NEXT:    ret void
365;
366  %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
367  store volatile i32 0, i32* %stof
368  ret void
369}
370
371define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
372; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
373; HSA-SAME: (i32 addrspace(4)* [[PTR:%.*]]) #[[ATTR1]] {
374; HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(4)* [[PTR]] to i32*
375; HSA-NEXT:    [[LD:%.*]] = load volatile i32, i32* [[STOF]], align 4
376; HSA-NEXT:    ret void
377;
378  %stof = addrspacecast i32 addrspace(4)* %ptr to i32*
379  %ld = load volatile i32, i32* %stof
380  ret void
381}
382
383define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
384; HSA-LABEL: define {{[^@]+}}@use_flat_to_global_addrspacecast
385; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
386; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(1)*
387; HSA-NEXT:    store volatile i32 0, i32 addrspace(1)* [[FTOS]], align 4
388; HSA-NEXT:    ret void
389;
390  %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
391  store volatile i32 0, i32 addrspace(1)* %ftos
392  ret void
393}
394
395define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
396; HSA-LABEL: define {{[^@]+}}@use_flat_to_constant_addrspacecast
397; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
398; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(4)*
399; HSA-NEXT:    [[LD:%.*]] = load volatile i32, i32 addrspace(4)* [[FTOS]], align 4
400; HSA-NEXT:    ret void
401;
402  %ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
403  %ld = load volatile i32, i32 addrspace(4)* %ftos
404  ret void
405}
406
407define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 {
408; HSA-LABEL: define {{[^@]+}}@use_is_shared
409; HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] {
410; HSA-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[PTR]])
411; HSA-NEXT:    [[EXT:%.*]] = zext i1 [[IS_SHARED]] to i32
412; HSA-NEXT:    store i32 [[EXT]], i32 addrspace(1)* undef, align 4
413; HSA-NEXT:    ret void
414;
415  %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
416  %ext = zext i1 %is.shared to i32
417  store i32 %ext, i32 addrspace(1)* undef
418  ret void
419}
420
421define amdgpu_kernel void @use_is_private(i8* %ptr) #1 {
422; HSA-LABEL: define {{[^@]+}}@use_is_private
423; HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] {
424; HSA-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[PTR]])
425; HSA-NEXT:    [[EXT:%.*]] = zext i1 [[IS_PRIVATE]] to i32
426; HSA-NEXT:    store i32 [[EXT]], i32 addrspace(1)* undef, align 4
427; HSA-NEXT:    ret void
428;
429  %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
430  %ext = zext i1 %is.private to i32
431  store i32 %ext, i32 addrspace(1)* undef
432  ret void
433}
434
435define amdgpu_kernel void @use_alloca() #1 {
436; HSA-LABEL: define {{[^@]+}}@use_alloca
437; HSA-SAME: () #[[ATTR13:[0-9]+]] {
438; HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
439; HSA-NEXT:    store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
440; HSA-NEXT:    ret void
441;
442  %alloca = alloca i32, addrspace(5)
443  store i32 0, i32 addrspace(5)* %alloca
444  ret void
445}
446
447define amdgpu_kernel void @use_alloca_non_entry_block() #1 {
448; HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block
449; HSA-SAME: () #[[ATTR13]] {
450; HSA-NEXT:  entry:
451; HSA-NEXT:    br label [[BB:%.*]]
452; HSA:       bb:
453; HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
454; HSA-NEXT:    store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
455; HSA-NEXT:    ret void
456;
457entry:
458  br label %bb
459
460bb:
461  %alloca = alloca i32, addrspace(5)
462  store i32 0, i32 addrspace(5)* %alloca
463  ret void
464}
465
466define void @use_alloca_func() #1 {
467; HSA-LABEL: define {{[^@]+}}@use_alloca_func
468; HSA-SAME: () #[[ATTR13]] {
469; HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
470; HSA-NEXT:    store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
471; HSA-NEXT:    ret void
472;
473  %alloca = alloca i32, addrspace(5)
474  store i32 0, i32 addrspace(5)* %alloca
475  ret void
476}
477
478attributes #0 = { nounwind readnone speculatable }
479attributes #1 = { nounwind }
480
481;.
482; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
483; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
484; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" }
485; AKF_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" }
486; AKF_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
487; AKF_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" }
488; AKF_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" }
489; AKF_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
490; AKF_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
491; AKF_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
492; AKF_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" }
493; AKF_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" }
494; AKF_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" }
495; AKF_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" }
496;.
497; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" }
498; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "uniform-work-group-size"="false" }
499; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" "uniform-work-group-size"="false" }
500; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
501; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
502; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
503; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
504; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
505; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
506; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
507; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" "uniform-work-group-size"="false" }
508; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" "uniform-work-group-size"="false" }
509; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" "uniform-work-group-size"="false" }
510; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" "uniform-work-group-size"="false" }
511;.
512