1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s
3
4declare void @function1()
5
6declare void @function2() #0
7
8; Function Attrs: noinline
9define void @function3(i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink) #2 {
10  store i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink, align 8
11  ret void
12}
13
14; Function Attrs: noinline
15define void @function4(i64 %arg, i64* %a) #2 {
16  store i64 %arg, i64* %a
17  ret void
18}
19
20; Function Attrs: noinline
21define void @function5(i8 addrspace(4)* %ptr, i64* %sink) #2 {
22  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 168
23  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
24  %x = load i64, i64 addrspace(4)* %cast
25  store i64 %x, i64* %sink
26  ret void
27}
28
29; Function Attrs: nounwind readnone speculatable willreturn
30declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
31
32; CHECK: amdhsa.kernels:
33; CHECK:  - .args:
34; CHECK-NOT: hidden_queue_ptr
35; CHECK-LABEL:    .name:           test_kernel10
36define amdgpu_kernel void @test_kernel10(i8* %a) {
37  store i8 3, i8* %a, align 1
38  ret void
39}
40
41; Call to an extern function
42
43; CHECK:  - .args:
44; CHECK: hidden_queue_ptr
45; CHECK-LABEL:    .name:           test_kernel20
46define amdgpu_kernel void @test_kernel20(i8* %a) {
47  call void @function1()
48  store i8 3, i8* %a, align 1
49  ret void
50}
51
52; Explicit attribute on kernel
53
54; CHECK:  - .args:
55; CHECK-NOT: hidden_queue_ptr
56; CHECK-LABEL:    .name:           test_kernel21
57define amdgpu_kernel void @test_kernel21(i8* %a) #0 {
58  call void @function1()
59  store i8 3, i8* %a, align 1
60  ret void
61}
62
63; Explicit attribute on extern callee
64
65; CHECK:  - .args:
66; CHECK-NOT: hidden_queue_ptr
67; CHECK-LABEL:    .name:           test_kernel22
68define amdgpu_kernel void @test_kernel22(i8* %a) {
69  call void @function2()
70  store i8 3, i8* %a, align 1
71  ret void
72}
73
74; Access more bytes than the pointer size
75
76; CHECK:  - .args:
77; CHECK: hidden_queue_ptr
78; CHECK-LABEL:    .name:           test_kernel30
79define amdgpu_kernel void @test_kernel30(i128* %a) {
80  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
81  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 192
82  %cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)*
83  %x = load i128, i128 addrspace(4)* %cast
84  store i128 %x, i128* %a
85  ret void
86}
87
88; Typical load of queue pointer
89
90; CHECK:  - .args:
91; CHECK: hidden_queue_ptr
92; CHECK-LABEL:    .name:           test_kernel40
93define amdgpu_kernel void @test_kernel40(i64* %a) {
94  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
95  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200
96  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
97  %x = load i64, i64 addrspace(4)* %cast
98  store i64 %x, i64* %a
99  ret void
100}
101
102; Typical usage, overriden by explicit attribute on kernel
103
104; CHECK:  - .args:
105; CHECK-NOT: hidden_queue_ptr
106; CHECK-LABEL:    .name:           test_kernel41
107define amdgpu_kernel void @test_kernel41(i64* %a) #0 {
108  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
109  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200
110  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
111  %x = load i64, i64 addrspace(4)* %cast
112  store i64 %x, i64* %a
113  ret void
114}
115
116; Access to implicit arg before the queue pointer
117
118; CHECK:  - .args:
119; CHECK-NOT: hidden_queue_ptr
120; CHECK-LABEL:    .name:           test_kernel42
121define amdgpu_kernel void @test_kernel42(i64* %a) {
122  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
123  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 192
124  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
125  %x = load i64, i64 addrspace(4)* %cast
126  store i64 %x, i64* %a
127  ret void
128}
129
130; Access to implicit arg after the queue pointer
131
132; CHECK:  - .args:
133; CHECK-NOT: hidden_queue_ptr
134; CHECK-LABEL:    .name:           test_kernel43
135define amdgpu_kernel void @test_kernel43(i64* %a) {
136  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
137  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 208
138  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
139  %x = load i64, i64 addrspace(4)* %cast
140  store i64 %x, i64* %a
141  ret void
142}
143
144; Accessing a byte just before the queue pointer
145
146; CHECK:  - .args:
147; CHECK-NOT: hidden_queue_ptr
148; CHECK-LABEL:    .name:           test_kernel44
149define amdgpu_kernel void @test_kernel44(i8* %a) {
150  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
151  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 199
152  %x = load i8, i8 addrspace(4)* %gep, align 1
153  store i8 %x, i8* %a, align 1
154  ret void
155}
156
157; Accessing a byte inside the queue pointer
158
159; CHECK:  - .args:
160; CHECK: hidden_queue_ptr
161; CHECK-LABEL:    .name:           test_kernel45
162define amdgpu_kernel void @test_kernel45(i8* %a) {
163  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
164  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200
165  %x = load i8, i8 addrspace(4)* %gep, align 1
166  store i8 %x, i8* %a, align 1
167  ret void
168}
169
170; Accessing a byte inside the queue pointer
171
172; CHECK:  - .args:
173; CHECK: hidden_queue_ptr
174; CHECK-LABEL:    .name:           test_kernel46
175define amdgpu_kernel void @test_kernel46(i8* %a) {
176  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
177  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 207
178  %x = load i8, i8 addrspace(4)* %gep, align 1
179  store i8 %x, i8* %a, align 1
180  ret void
181}
182
183; Accessing a byte just after the queue pointer
184
185; CHECK:  - .args:
186; CHECK-NOT: hidden_queue_ptr
187; CHECK-LABEL:    .name:           test_kernel47
188define amdgpu_kernel void @test_kernel47(i8* %a) {
189  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
190  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 208
191  %x = load i8, i8 addrspace(4)* %gep, align 1
192  store i8 %x, i8* %a, align 1
193  ret void
194}
195
196; Access with an unknown offset
197
198; CHECK:  - .args:
199; CHECK: hidden_queue_ptr
200; CHECK-LABEL:    .name:           test_kernel50
201define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) {
202  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
203  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b
204  %x = load i8, i8 addrspace(4)* %gep, align 1
205  store i8 %x, i8* %a, align 1
206  ret void
207}
208
209; Multiple geps reaching the queue pointer argument.
210
211; CHECK:  - .args:
212; CHECK: hidden_queue_ptr
213; CHECK-LABEL:    .name:           test_kernel51
214define amdgpu_kernel void @test_kernel51(i8* %a) {
215  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
216  %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
217  %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 184
218  %x = load i8, i8 addrspace(4)* %gep2, align 1
219  store i8 %x, i8* %a, align 1
220  ret void
221}
222
223; Multiple geps not reaching the queue pointer argument.
224
225; CHECK:  - .args:
226; CHECK-NOT: hidden_queue_ptr
227; CHECK-LABEL:    .name:           test_kernel52
228define amdgpu_kernel void @test_kernel52(i8* %a) {
229  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
230  %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
231  %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16
232  %x = load i8, i8 addrspace(4)* %gep2, align 1
233  store i8 %x, i8* %a, align 1
234  ret void
235}
236
237; Queue pointer used inside a function call
238
239; CHECK:  - .args:
240; CHECK: hidden_queue_ptr
241; CHECK-LABEL:    .name:           test_kernel60
242define amdgpu_kernel void @test_kernel60(i64* %a) #2 {
243  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
244  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200
245  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
246  %x = load i64, i64 addrspace(4)* %cast
247  call void @function4(i64 %x, i64* %a)
248  ret void
249}
250
251; Queue pointer retrieved inside a function call; chain of geps
252
253; CHECK:  - .args:
254; CHECK: hidden_queue_ptr
255; CHECK-LABEL:    .name:           test_kernel61
256define amdgpu_kernel void @test_kernel61(i64* %a) #2 {
257  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
258  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32
259  call void @function5(i8 addrspace(4)* %gep, i64* %a)
260  ret void
261}
262
263; Pointer captured
264
265; CHECK:  - .args:
266; CHECK: hidden_queue_ptr
267; CHECK-LABEL:    .name:           test_kernel70
268define amdgpu_kernel void @test_kernel70(i8 addrspace(4)* addrspace(1)* %sink) #2 {
269  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
270  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
271  store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink, align 8
272  ret void
273}
274
275; Pointer captured inside function call
276
277; CHECK:  - .args:
278; CHECK: hidden_queue_ptr
279; CHECK-LABEL:    .name:           test_kernel71
280define amdgpu_kernel void @test_kernel71(i8 addrspace(4)* addrspace(1)* %sink) #2 {
281  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
282  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
283  call void @function3(i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink)
284  ret void
285}
286
287; Ineffective pointer capture
288
289; CHECK:  - .args:
290; CHECK-NOT: hidden_queue_ptr
291; CHECK-LABEL:    .name:           test_kernel72
292define amdgpu_kernel void @test_kernel72() #2 {
293  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
294  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
295  store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* undef, align 8
296  ret void
297}
298
299attributes #0 = { "amdgpu-no-queue-ptr" }
300attributes #1 = { nounwind readnone speculatable willreturn }
301attributes #2 = { noinline }
302