1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
3; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
4
5target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
6
7declare i32 @llvm.amdgcn.workgroup.id.x() #0
8declare i32 @llvm.amdgcn.workgroup.id.y() #0
9declare i32 @llvm.amdgcn.workgroup.id.z() #0
10
11declare i32 @llvm.amdgcn.workitem.id.x() #0
12declare i32 @llvm.amdgcn.workitem.id.y() #0
13declare i32 @llvm.amdgcn.workitem.id.z() #0
14
15declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
16declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
17declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
18
19declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #2
20declare i1 @llvm.amdgcn.is.private(i8* nocapture) #2
21
22define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
23; HSA-LABEL: define {{[^@]+}}@use_tgid_x
24; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
25; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
26; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
27; HSA-NEXT:    ret void
28;
29  %val = call i32 @llvm.amdgcn.workgroup.id.x()
30  store i32 %val, i32 addrspace(1)* %ptr
31  ret void
32}
33
34define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
35; HSA-LABEL: define {{[^@]+}}@use_tgid_y
36; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
37; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
38; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
39; HSA-NEXT:    ret void
40;
41  %val = call i32 @llvm.amdgcn.workgroup.id.y()
42  store i32 %val, i32 addrspace(1)* %ptr
43  ret void
44}
45
46define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
47; HSA-LABEL: define {{[^@]+}}@multi_use_tgid_y
48; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] {
49; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
50; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
51; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
52; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
53; HSA-NEXT:    ret void
54;
55  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
56  store volatile i32 %val0, i32 addrspace(1)* %ptr
57  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
58  store volatile i32 %val1, i32 addrspace(1)* %ptr
59  ret void
60}
61
62define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
63; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y
64; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] {
65; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
66; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
67; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
68; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
69; HSA-NEXT:    ret void
70;
71  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
72  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
73  store volatile i32 %val0, i32 addrspace(1)* %ptr
74  store volatile i32 %val1, i32 addrspace(1)* %ptr
75  ret void
76}
77
78define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
79; HSA-LABEL: define {{[^@]+}}@use_tgid_z
80; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
81; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
82; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
83; HSA-NEXT:    ret void
84;
85  %val = call i32 @llvm.amdgcn.workgroup.id.z()
86  store i32 %val, i32 addrspace(1)* %ptr
87  ret void
88}
89
90define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
91; HSA-LABEL: define {{[^@]+}}@use_tgid_x_z
92; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3]] {
93; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
94; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
95; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
96; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
97; HSA-NEXT:    ret void
98;
99  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
100  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
101  store volatile i32 %val0, i32 addrspace(1)* %ptr
102  store volatile i32 %val1, i32 addrspace(1)* %ptr
103  ret void
104}
105
106define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
107; HSA-LABEL: define {{[^@]+}}@use_tgid_y_z
108; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4:[0-9]+]] {
109; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
110; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
111; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
112; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
113; HSA-NEXT:    ret void
114;
115  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
116  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
117  store volatile i32 %val0, i32 addrspace(1)* %ptr
118  store volatile i32 %val1, i32 addrspace(1)* %ptr
119  ret void
120}
121
122define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
123; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y_z
124; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4]] {
125; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
126; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
127; HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
128; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
129; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
130; HSA-NEXT:    store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
131; HSA-NEXT:    ret void
132;
133  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
134  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
135  %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
136  store volatile i32 %val0, i32 addrspace(1)* %ptr
137  store volatile i32 %val1, i32 addrspace(1)* %ptr
138  store volatile i32 %val2, i32 addrspace(1)* %ptr
139  ret void
140}
141
142define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
143; HSA-LABEL: define {{[^@]+}}@use_tidig_x
144; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
145; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
146; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
147; HSA-NEXT:    ret void
148;
149  %val = call i32 @llvm.amdgcn.workitem.id.x()
150  store i32 %val, i32 addrspace(1)* %ptr
151  ret void
152}
153
154define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
155; HSA-LABEL: define {{[^@]+}}@use_tidig_y
156; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR5:[0-9]+]] {
157; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
158; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
159; HSA-NEXT:    ret void
160;
161  %val = call i32 @llvm.amdgcn.workitem.id.y()
162  store i32 %val, i32 addrspace(1)* %ptr
163  ret void
164}
165
166define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
167; HSA-LABEL: define {{[^@]+}}@use_tidig_z
168; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR6:[0-9]+]] {
169; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
170; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
171; HSA-NEXT:    ret void
172;
173  %val = call i32 @llvm.amdgcn.workitem.id.z()
174  store i32 %val, i32 addrspace(1)* %ptr
175  ret void
176}
177
178define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
179; HSA-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x
180; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
181; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
182; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
183; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
184; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
185; HSA-NEXT:    ret void
186;
187  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
188  %val1 = call i32 @llvm.amdgcn.workgroup.id.x()
189  store volatile i32 %val0, i32 addrspace(1)* %ptr
190  store volatile i32 %val1, i32 addrspace(1)* %ptr
191  ret void
192}
193
194define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
195; HSA-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y
196; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR7:[0-9]+]] {
197; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
198; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
199; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
200; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
201; HSA-NEXT:    ret void
202;
203  %val0 = call i32 @llvm.amdgcn.workitem.id.y()
204  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
205  store volatile i32 %val0, i32 addrspace(1)* %ptr
206  store volatile i32 %val1, i32 addrspace(1)* %ptr
207  ret void
208}
209
210define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
211; HSA-LABEL: define {{[^@]+}}@use_tidig_x_y_z
212; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR8:[0-9]+]] {
213; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
214; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
215; HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
216; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
217; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
218; HSA-NEXT:    store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
219; HSA-NEXT:    ret void
220;
221  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
222  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
223  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
224  store volatile i32 %val0, i32 addrspace(1)* %ptr
225  store volatile i32 %val1, i32 addrspace(1)* %ptr
226  store volatile i32 %val2, i32 addrspace(1)* %ptr
227  ret void
228}
229
230define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
231; HSA-LABEL: define {{[^@]+}}@use_all_workitems
232; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR9:[0-9]+]] {
233; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
234; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
235; HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
236; HSA-NEXT:    [[VAL3:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
237; HSA-NEXT:    [[VAL4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
238; HSA-NEXT:    [[VAL5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
239; HSA-NEXT:    store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
240; HSA-NEXT:    store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
241; HSA-NEXT:    store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
242; HSA-NEXT:    store volatile i32 [[VAL3]], i32 addrspace(1)* [[PTR]], align 4
243; HSA-NEXT:    store volatile i32 [[VAL4]], i32 addrspace(1)* [[PTR]], align 4
244; HSA-NEXT:    store volatile i32 [[VAL5]], i32 addrspace(1)* [[PTR]], align 4
245; HSA-NEXT:    ret void
246;
247  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
248  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
249  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
250  %val3 = call i32 @llvm.amdgcn.workgroup.id.x()
251  %val4 = call i32 @llvm.amdgcn.workgroup.id.y()
252  %val5 = call i32 @llvm.amdgcn.workgroup.id.z()
253  store volatile i32 %val0, i32 addrspace(1)* %ptr
254  store volatile i32 %val1, i32 addrspace(1)* %ptr
255  store volatile i32 %val2, i32 addrspace(1)* %ptr
256  store volatile i32 %val3, i32 addrspace(1)* %ptr
257  store volatile i32 %val4, i32 addrspace(1)* %ptr
258  store volatile i32 %val5, i32 addrspace(1)* %ptr
259  ret void
260}
261
262define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
263; HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr
264; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR10:[0-9]+]] {
265; HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
266; HSA-NEXT:    [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
267; HSA-NEXT:    [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4
268; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
269; HSA-NEXT:    ret void
270;
271  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
272  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
273  %val = load i32, i32 addrspace(4)* %bc
274  store i32 %val, i32 addrspace(1)* %ptr
275  ret void
276}
277
278define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
279; HSA-LABEL: define {{[^@]+}}@use_queue_ptr
280; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR11:[0-9]+]] {
281; HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
282; HSA-NEXT:    [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
283; HSA-NEXT:    [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4
284; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
285; HSA-NEXT:    ret void
286;
287  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
288  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
289  %val = load i32, i32 addrspace(4)* %bc
290  store i32 %val, i32 addrspace(1)* %ptr
291  ret void
292}
293
294define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
295; HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
296; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
297; HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
298; HSA-NEXT:    [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
299; HSA-NEXT:    [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4
300; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
301; HSA-NEXT:    ret void
302;
303  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
304  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
305  %val = load i32, i32 addrspace(4)* %bc
306  store i32 %val, i32 addrspace(1)* %ptr
307  ret void
308}
309
310define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
311; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
312; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR11]] {
313; HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32*
314; HSA-NEXT:    store volatile i32 0, i32* [[STOF]], align 4
315; HSA-NEXT:    ret void
316;
317  %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
318  store volatile i32 0, i32* %stof
319  ret void
320}
321
322define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 {
323; HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast
324; HSA-SAME: (i32 addrspace(5)* [[PTR:%.*]]) #[[ATTR11]] {
325; HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(5)* [[PTR]] to i32*
326; HSA-NEXT:    store volatile i32 0, i32* [[STOF]], align 4
327; HSA-NEXT:    ret void
328;
329  %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
330  store volatile i32 0, i32* %stof
331  ret void
332}
333
334define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
335; HSA-LABEL: define {{[^@]+}}@use_flat_to_group_addrspacecast
336; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
337; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(3)*
338; HSA-NEXT:    store volatile i32 0, i32 addrspace(3)* [[FTOS]], align 4
339; HSA-NEXT:    ret void
340;
341  %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
342  store volatile i32 0, i32 addrspace(3)* %ftos
343  ret void
344}
345
346define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
347; HSA-LABEL: define {{[^@]+}}@use_flat_to_private_addrspacecast
348; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
349; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(5)*
350; HSA-NEXT:    store volatile i32 0, i32 addrspace(5)* [[FTOS]], align 4
351; HSA-NEXT:    ret void
352;
353  %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
354  store volatile i32 0, i32 addrspace(5)* %ftos
355  ret void
356}
357
358; No-op addrspacecast should not use queue ptr
359define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
360; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
361; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
362; HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(1)* [[PTR]] to i32*
363; HSA-NEXT:    store volatile i32 0, i32* [[STOF]], align 4
364; HSA-NEXT:    ret void
365;
366  %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
367  store volatile i32 0, i32* %stof
368  ret void
369}
370
371define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
372; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
373; HSA-SAME: (i32 addrspace(4)* [[PTR:%.*]]) #[[ATTR1]] {
374; HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(4)* [[PTR]] to i32*
375; HSA-NEXT:    [[LD:%.*]] = load volatile i32, i32* [[STOF]], align 4
376; HSA-NEXT:    ret void
377;
378  %stof = addrspacecast i32 addrspace(4)* %ptr to i32*
379  %ld = load volatile i32, i32* %stof
380  ret void
381}
382
383define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
384; HSA-LABEL: define {{[^@]+}}@use_flat_to_global_addrspacecast
385; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
386; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(1)*
387; HSA-NEXT:    store volatile i32 0, i32 addrspace(1)* [[FTOS]], align 4
388; HSA-NEXT:    ret void
389;
390  %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
391  store volatile i32 0, i32 addrspace(1)* %ftos
392  ret void
393}
394
395define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
396; HSA-LABEL: define {{[^@]+}}@use_flat_to_constant_addrspacecast
397; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
398; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(4)*
399; HSA-NEXT:    [[LD:%.*]] = load volatile i32, i32 addrspace(4)* [[FTOS]], align 4
400; HSA-NEXT:    ret void
401;
402  %ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
403  %ld = load volatile i32, i32 addrspace(4)* %ftos
404  ret void
405}
406
407define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 {
408; AKF_HSA-LABEL: define {{[^@]+}}@use_is_shared
409; AKF_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] {
410; AKF_HSA-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[PTR]])
411; AKF_HSA-NEXT:    [[EXT:%.*]] = zext i1 [[IS_SHARED]] to i32
412; AKF_HSA-NEXT:    store i32 [[EXT]], i32 addrspace(1)* undef, align 4
413; AKF_HSA-NEXT:    ret void
414;
415; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_is_shared
416; ATTRIBUTOR_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR1]] {
417; ATTRIBUTOR_HSA-NEXT:    ret void
418;
419  %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
420  %ext = zext i1 %is.shared to i32
421  store i32 %ext, i32 addrspace(1)* undef
422  ret void
423}
424
425define amdgpu_kernel void @use_is_private(i8* %ptr) #1 {
426; AKF_HSA-LABEL: define {{[^@]+}}@use_is_private
427; AKF_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] {
428; AKF_HSA-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[PTR]])
429; AKF_HSA-NEXT:    [[EXT:%.*]] = zext i1 [[IS_PRIVATE]] to i32
430; AKF_HSA-NEXT:    store i32 [[EXT]], i32 addrspace(1)* undef, align 4
431; AKF_HSA-NEXT:    ret void
432;
433; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_is_private
434; ATTRIBUTOR_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR1]] {
435; ATTRIBUTOR_HSA-NEXT:    ret void
436;
437  %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
438  %ext = zext i1 %is.private to i32
439  store i32 %ext, i32 addrspace(1)* undef
440  ret void
441}
442
443define amdgpu_kernel void @use_alloca() #1 {
444; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca
445; AKF_HSA-SAME: () #[[ATTR13:[0-9]+]] {
446; AKF_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
447; AKF_HSA-NEXT:    store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
448; AKF_HSA-NEXT:    ret void
449;
450; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca
451; ATTRIBUTOR_HSA-SAME: () #[[ATTR13:[0-9]+]] {
452; ATTRIBUTOR_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
453; ATTRIBUTOR_HSA-NEXT:    ret void
454;
455  %alloca = alloca i32, addrspace(5)
456  store i32 0, i32 addrspace(5)* %alloca
457  ret void
458}
459
460define amdgpu_kernel void @use_alloca_non_entry_block() #1 {
461; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block
462; AKF_HSA-SAME: () #[[ATTR13]] {
463; AKF_HSA-NEXT:  entry:
464; AKF_HSA-NEXT:    br label [[BB:%.*]]
465; AKF_HSA:       bb:
466; AKF_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
467; AKF_HSA-NEXT:    store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
468; AKF_HSA-NEXT:    ret void
469;
470; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block
471; ATTRIBUTOR_HSA-SAME: () #[[ATTR13]] {
472; ATTRIBUTOR_HSA-NEXT:  entry:
473; ATTRIBUTOR_HSA-NEXT:    br label [[BB:%.*]]
474; ATTRIBUTOR_HSA:       bb:
475; ATTRIBUTOR_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
476; ATTRIBUTOR_HSA-NEXT:    ret void
477;
478entry:
479  br label %bb
480
481bb:
482  %alloca = alloca i32, addrspace(5)
483  store i32 0, i32 addrspace(5)* %alloca
484  ret void
485}
486
487define void @use_alloca_func() #1 {
488; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca_func
489; AKF_HSA-SAME: () #[[ATTR13]] {
490; AKF_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
491; AKF_HSA-NEXT:    store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
492; AKF_HSA-NEXT:    ret void
493;
494; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_func
495; ATTRIBUTOR_HSA-SAME: () #[[ATTR13]] {
496; ATTRIBUTOR_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
497; ATTRIBUTOR_HSA-NEXT:    ret void
498;
499  %alloca = alloca i32, addrspace(5)
500  store i32 0, i32 addrspace(5)* %alloca
501  ret void
502}
503
504attributes #0 = { nounwind readnone speculatable }
505attributes #1 = { nounwind }
506
507;.
508; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
509; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
510; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" }
511; AKF_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" }
512; AKF_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
513; AKF_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" }
514; AKF_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" }
515; AKF_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
516; AKF_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
517; AKF_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
518; AKF_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" }
519; AKF_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" }
520; AKF_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" }
521; AKF_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" }
522;.
523; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
524; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "uniform-work-group-size"="false" }
525; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" "uniform-work-group-size"="false" }
526; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
527; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
528; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
529; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
530; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
531; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
532; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
533; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" "uniform-work-group-size"="false" }
534; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" "uniform-work-group-size"="false" }
535; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" "uniform-work-group-size"="false" }
536; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" "uniform-work-group-size"="false" }
537;.
538