1; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
2
3declare i32 @llvm.amdgcn.workgroup.id.x() #0
4declare i32 @llvm.amdgcn.workgroup.id.y() #0
5declare i32 @llvm.amdgcn.workgroup.id.z() #0
6
7declare i32 @llvm.amdgcn.workitem.id.x() #0
8declare i32 @llvm.amdgcn.workitem.id.y() #0
9declare i32 @llvm.amdgcn.workitem.id.z() #0
10
11declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
12declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
13declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
14
15declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #2
16declare i1 @llvm.amdgcn.is.private(i8* nocapture) #2
17
18; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
19define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
20  %val = call i32 @llvm.amdgcn.workgroup.id.x()
21  store i32 %val, i32 addrspace(1)* %ptr
22  ret void
23}
24
25; HSA: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
26define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
27  %val = call i32 @llvm.amdgcn.workgroup.id.y()
28  store i32 %val, i32 addrspace(1)* %ptr
29  ret void
30}
31
32; HSA: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
33define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
34  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
35  store volatile i32 %val0, i32 addrspace(1)* %ptr
36  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
37  store volatile i32 %val1, i32 addrspace(1)* %ptr
38  ret void
39}
40
41; HSA: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
42define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
43  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
44  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
45  store volatile i32 %val0, i32 addrspace(1)* %ptr
46  store volatile i32 %val1, i32 addrspace(1)* %ptr
47  ret void
48}
49
50; HSA: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
51define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
52  %val = call i32 @llvm.amdgcn.workgroup.id.z()
53  store i32 %val, i32 addrspace(1)* %ptr
54  ret void
55}
56
57; HSA: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
58define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
59  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
60  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
61  store volatile i32 %val0, i32 addrspace(1)* %ptr
62  store volatile i32 %val1, i32 addrspace(1)* %ptr
63  ret void
64}
65
66; HSA: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
67define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
68  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
69  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
70  store volatile i32 %val0, i32 addrspace(1)* %ptr
71  store volatile i32 %val1, i32 addrspace(1)* %ptr
72  ret void
73}
74
75; HSA: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
76define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
77  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
78  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
79  %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
80  store volatile i32 %val0, i32 addrspace(1)* %ptr
81  store volatile i32 %val1, i32 addrspace(1)* %ptr
82  store volatile i32 %val2, i32 addrspace(1)* %ptr
83  ret void
84}
85
86; HSA: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
87define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
88  %val = call i32 @llvm.amdgcn.workitem.id.x()
89  store i32 %val, i32 addrspace(1)* %ptr
90  ret void
91}
92
93; HSA: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
94define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
95  %val = call i32 @llvm.amdgcn.workitem.id.y()
96  store i32 %val, i32 addrspace(1)* %ptr
97  ret void
98}
99
100; HSA: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
101define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
102  %val = call i32 @llvm.amdgcn.workitem.id.z()
103  store i32 %val, i32 addrspace(1)* %ptr
104  ret void
105}
106
107; HSA: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
108define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
109  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
110  %val1 = call i32 @llvm.amdgcn.workgroup.id.x()
111  store volatile i32 %val0, i32 addrspace(1)* %ptr
112  store volatile i32 %val1, i32 addrspace(1)* %ptr
113  ret void
114}
115
116; HSA: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
117define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
118  %val0 = call i32 @llvm.amdgcn.workitem.id.y()
119  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
120  store volatile i32 %val0, i32 addrspace(1)* %ptr
121  store volatile i32 %val1, i32 addrspace(1)* %ptr
122  ret void
123}
124
125; HSA: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
126define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
127  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
128  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
129  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
130  store volatile i32 %val0, i32 addrspace(1)* %ptr
131  store volatile i32 %val1, i32 addrspace(1)* %ptr
132  store volatile i32 %val2, i32 addrspace(1)* %ptr
133  ret void
134}
135
136; HSA: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
137define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
138  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
139  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
140  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
141  %val3 = call i32 @llvm.amdgcn.workgroup.id.x()
142  %val4 = call i32 @llvm.amdgcn.workgroup.id.y()
143  %val5 = call i32 @llvm.amdgcn.workgroup.id.z()
144  store volatile i32 %val0, i32 addrspace(1)* %ptr
145  store volatile i32 %val1, i32 addrspace(1)* %ptr
146  store volatile i32 %val2, i32 addrspace(1)* %ptr
147  store volatile i32 %val3, i32 addrspace(1)* %ptr
148  store volatile i32 %val4, i32 addrspace(1)* %ptr
149  store volatile i32 %val5, i32 addrspace(1)* %ptr
150  ret void
151}
152
153; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 {
154define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
155  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
156  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
157  %val = load i32, i32 addrspace(4)* %bc
158  store i32 %val, i32 addrspace(1)* %ptr
159  ret void
160}
161
162; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
163define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
164  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
165  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
166  %val = load i32, i32 addrspace(4)* %bc
167  store i32 %val, i32 addrspace(1)* %ptr
168  ret void
169}
170
171; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 {
172define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
173  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
174  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
175  %val = load i32, i32 addrspace(4)* %bc
176  store i32 %val, i32 addrspace(1)* %ptr
177  ret void
178}
179
180; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
181define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
182  %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
183  store volatile i32 0, i32* %stof
184  ret void
185}
186
187; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #11 {
188define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 {
189  %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
190  store volatile i32 0, i32* %stof
191  ret void
192}
193
194; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
195define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
196  %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
197  store volatile i32 0, i32 addrspace(3)* %ftos
198  ret void
199}
200
201; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
202define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
203  %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
204  store volatile i32 0, i32 addrspace(5)* %ftos
205  ret void
206}
207
208; No-op addrspacecast should not use queue ptr
209; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
210define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
211  %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
212  store volatile i32 0, i32* %stof
213  ret void
214}
215
216; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
217define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
218  %stof = addrspacecast i32 addrspace(4)* %ptr to i32*
219  %ld = load volatile i32, i32* %stof
220  ret void
221}
222
223; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
224define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
225  %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
226  store volatile i32 0, i32 addrspace(1)* %ftos
227  ret void
228}
229
230; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
231define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
232  %ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
233  %ld = load volatile i32, i32 addrspace(4)* %ftos
234  ret void
235}
236
237; HSA: define amdgpu_kernel void @use_is_shared(i8* %ptr) #11 {
238define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 {
239  %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
240  %ext = zext i1 %is.shared to i32
241  store i32 %ext, i32 addrspace(1)* undef
242  ret void
243}
244
245; HSA: define amdgpu_kernel void @use_is_private(i8* %ptr) #11 {
246define amdgpu_kernel void @use_is_private(i8* %ptr) #1 {
247  %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
248  %ext = zext i1 %is.private to i32
249  store i32 %ext, i32 addrspace(1)* undef
250  ret void
251}
252
253attributes #0 = { nounwind readnone speculatable }
254attributes #1 = { nounwind }
255
256; HSA: attributes #0 = { nounwind readnone speculatable }
257; HSA: attributes #1 = { nounwind }
258; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
259; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
260; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
261; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
262; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
263; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
264; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
265; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
266; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
267; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" }
268; HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" }
269