1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s
3; RUN: llc -global-isel -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s
4
5; Test with gfx803 so that
6; addrspacecast/llvm.amdgcn.is.shared/llvm.amdgcn.is.private require
7; the queue ptr.  Tests with code object v3 to test
8; llvm.trap/llvm.debugtrap that require the queue ptr.
9
10
11declare hidden void @requires_all_inputs()
12
13; This function incorrectly is marked with the hints that the callee
14; does not require the implicit arguments to the function. Make sure
15; we do not crash.
16define void @parent_func_missing_inputs() #0 {
17; FIXEDABI-LABEL: parent_func_missing_inputs:
18; FIXEDABI:       ; %bb.0:
19; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; FIXEDABI-NEXT:    s_or_saveexec_b64 s[16:17], -1
21; FIXEDABI-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
22; FIXEDABI-NEXT:    s_mov_b64 exec, s[16:17]
23; FIXEDABI-NEXT:    v_writelane_b32 v40, s33, 2
24; FIXEDABI-NEXT:    s_mov_b32 s33, s32
25; FIXEDABI-NEXT:    s_addk_i32 s32, 0x400
26; FIXEDABI-NEXT:    v_writelane_b32 v40, s30, 0
27; FIXEDABI-NEXT:    v_writelane_b32 v40, s31, 1
28; FIXEDABI-NEXT:    s_getpc_b64 s[16:17]
29; FIXEDABI-NEXT:    s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4
30; FIXEDABI-NEXT:    s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12
31; FIXEDABI-NEXT:    s_swappc_b64 s[30:31], s[16:17]
32; FIXEDABI-NEXT:    v_readlane_b32 s31, v40, 1
33; FIXEDABI-NEXT:    v_readlane_b32 s30, v40, 0
34; FIXEDABI-NEXT:    s_addk_i32 s32, 0xfc00
35; FIXEDABI-NEXT:    v_readlane_b32 s33, v40, 2
36; FIXEDABI-NEXT:    s_or_saveexec_b64 s[4:5], -1
37; FIXEDABI-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
38; FIXEDABI-NEXT:    s_mov_b64 exec, s[4:5]
39; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
40; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
41  call void @requires_all_inputs()
42  ret void
43}
44
45define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
46; FIXEDABI-SDAG-LABEL: parent_kernel_missing_inputs:
47; FIXEDABI-SDAG:       ; %bb.0:
48; FIXEDABI-SDAG-NEXT:    s_add_i32 s4, s4, s9
49; FIXEDABI-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
50; FIXEDABI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
51; FIXEDABI-SDAG-NEXT:    s_add_u32 s0, s0, s9
52; FIXEDABI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
53; FIXEDABI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
54; FIXEDABI-SDAG-NEXT:    s_addc_u32 s1, s1, 0
55; FIXEDABI-SDAG-NEXT:    s_mov_b32 s14, s8
56; FIXEDABI-SDAG-NEXT:    v_or_b32_e32 v31, v0, v2
57; FIXEDABI-SDAG-NEXT:    s_mov_b64 s[8:9], 0
58; FIXEDABI-SDAG-NEXT:    s_mov_b32 s12, s6
59; FIXEDABI-SDAG-NEXT:    s_mov_b32 s13, s7
60; FIXEDABI-SDAG-NEXT:    s_mov_b32 s32, 0
61; FIXEDABI-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s5
62; FIXEDABI-SDAG-NEXT:    s_getpc_b64 s[4:5]
63; FIXEDABI-SDAG-NEXT:    s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
64; FIXEDABI-SDAG-NEXT:    s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
65; FIXEDABI-SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
66; FIXEDABI-SDAG-NEXT:    s_endpgm
67;
68; FIXEDABI-GISEL-LABEL: parent_kernel_missing_inputs:
69; FIXEDABI-GISEL:       ; %bb.0:
70; FIXEDABI-GISEL-NEXT:    s_add_i32 s4, s4, s9
71; FIXEDABI-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
72; FIXEDABI-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
73; FIXEDABI-GISEL-NEXT:    s_add_u32 s0, s0, s9
74; FIXEDABI-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
75; FIXEDABI-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
76; FIXEDABI-GISEL-NEXT:    s_addc_u32 s1, s1, 0
77; FIXEDABI-GISEL-NEXT:    s_mov_b32 s14, s8
78; FIXEDABI-GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
79; FIXEDABI-GISEL-NEXT:    s_mov_b64 s[8:9], 0
80; FIXEDABI-GISEL-NEXT:    s_mov_b32 s12, s6
81; FIXEDABI-GISEL-NEXT:    s_mov_b32 s13, s7
82; FIXEDABI-GISEL-NEXT:    s_mov_b32 s32, 0
83; FIXEDABI-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s5
84; FIXEDABI-GISEL-NEXT:    s_getpc_b64 s[4:5]
85; FIXEDABI-GISEL-NEXT:    s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
86; FIXEDABI-GISEL-NEXT:    s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
87; FIXEDABI-GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
88; FIXEDABI-GISEL-NEXT:    s_endpgm
89  call void @requires_all_inputs()
90  ret void
91}
92
93; Function is marked with amdgpu-no-workitem-id-* but uses them anyway
94define void @marked_func_use_workitem_id(i32 addrspace(1)* %ptr) #0 {
95; FIXEDABI-SDAG-LABEL: marked_func_use_workitem_id:
96; FIXEDABI-SDAG:       ; %bb.0:
97; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98; FIXEDABI-SDAG-NEXT:    v_and_b32_e32 v2, 0x3ff, v31
99; FIXEDABI-SDAG-NEXT:    flat_store_dword v[0:1], v2
100; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
101; FIXEDABI-SDAG-NEXT:    v_bfe_u32 v2, v31, 10, 10
102; FIXEDABI-SDAG-NEXT:    flat_store_dword v[0:1], v2
103; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
104; FIXEDABI-SDAG-NEXT:    v_bfe_u32 v2, v31, 20, 10
105; FIXEDABI-SDAG-NEXT:    flat_store_dword v[0:1], v2
106; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
107; FIXEDABI-SDAG-NEXT:    s_setpc_b64 s[30:31]
108;
109; FIXEDABI-GISEL-LABEL: marked_func_use_workitem_id:
110; FIXEDABI-GISEL:       ; %bb.0:
111; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112; FIXEDABI-GISEL-NEXT:    v_and_b32_e32 v2, 0x3ff, v31
113; FIXEDABI-GISEL-NEXT:    v_bfe_u32 v3, v31, 10, 10
114; FIXEDABI-GISEL-NEXT:    v_bfe_u32 v4, v31, 20, 10
115; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v2
116; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
117; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v3
118; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
119; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v4
120; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
121; FIXEDABI-GISEL-NEXT:    s_setpc_b64 s[30:31]
122  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
123  %id.y = call i32 @llvm.amdgcn.workitem.id.y()
124  %id.z = call i32 @llvm.amdgcn.workitem.id.z()
125  store volatile i32 %id.x, i32 addrspace(1)* %ptr
126  store volatile i32 %id.y, i32 addrspace(1)* %ptr
127  store volatile i32 %id.z, i32 addrspace(1)* %ptr
128  ret void
129}
130
131; Function is marked with amdgpu-no-workitem-id-* but uses them anyway
132define amdgpu_kernel void @marked_kernel_use_workitem_id(i32 addrspace(1)* %ptr) #0 {
133; FIXEDABI-LABEL: marked_kernel_use_workitem_id:
134; FIXEDABI:       ; %bb.0:
135; FIXEDABI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
136; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
137; FIXEDABI-NEXT:    v_mov_b32_e32 v4, s1
138; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s0
139; FIXEDABI-NEXT:    flat_store_dword v[3:4], v0
140; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
141; FIXEDABI-NEXT:    flat_store_dword v[3:4], v1
142; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
143; FIXEDABI-NEXT:    flat_store_dword v[3:4], v2
144; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
145; FIXEDABI-NEXT:    s_endpgm
146  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
147  %id.y = call i32 @llvm.amdgcn.workitem.id.y()
148  %id.z = call i32 @llvm.amdgcn.workitem.id.z()
149  store volatile i32 %id.x, i32 addrspace(1)* %ptr
150  store volatile i32 %id.y, i32 addrspace(1)* %ptr
151  store volatile i32 %id.z, i32 addrspace(1)* %ptr
152  ret void
153}
154
155define void @marked_func_use_workgroup_id(i32 addrspace(1)* %ptr) #0 {
156; FIXEDABI-LABEL: marked_func_use_workgroup_id:
157; FIXEDABI:       ; %bb.0:
158; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s12
160; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
161; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
162; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s13
163; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
164; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
165; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s14
166; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
167; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
168; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
169  %id.x = call i32 @llvm.amdgcn.workgroup.id.x()
170  %id.y = call i32 @llvm.amdgcn.workgroup.id.y()
171  %id.z = call i32 @llvm.amdgcn.workgroup.id.z()
172  store volatile i32 %id.x, i32 addrspace(1)* %ptr
173  store volatile i32 %id.y, i32 addrspace(1)* %ptr
174  store volatile i32 %id.z, i32 addrspace(1)* %ptr
175  ret void
176}
177
178define amdgpu_kernel void @marked_kernel_use_workgroup_id(i32 addrspace(1)* %ptr) #0 {
179; FIXEDABI-LABEL: marked_kernel_use_workgroup_id:
180; FIXEDABI:       ; %bb.0:
181; FIXEDABI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
182; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s6
183; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
184; FIXEDABI-NEXT:    v_mov_b32_e32 v0, s0
185; FIXEDABI-NEXT:    v_mov_b32_e32 v1, s1
186; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
187; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
188; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s7
189; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
190; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
191; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s8
192; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
193; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
194; FIXEDABI-NEXT:    s_endpgm
195  %id.x = call i32 @llvm.amdgcn.workgroup.id.x()
196  %id.y = call i32 @llvm.amdgcn.workgroup.id.y()
197  %id.z = call i32 @llvm.amdgcn.workgroup.id.z()
198  store volatile i32 %id.x, i32 addrspace(1)* %ptr
199  store volatile i32 %id.y, i32 addrspace(1)* %ptr
200  store volatile i32 %id.z, i32 addrspace(1)* %ptr
201  ret void
202}
203
204define void @marked_func_use_other_sgpr(i64 addrspace(1)* %ptr) #0 {
205; FIXEDABI-LABEL: marked_func_use_other_sgpr:
206; FIXEDABI:       ; %bb.0:
207; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s6
209; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s7
210; FIXEDABI-NEXT:    flat_load_ubyte v2, v[2:3] glc
211; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
212; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s8
213; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s9
214; FIXEDABI-NEXT:    flat_load_ubyte v2, v[2:3] glc
215; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
216; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s4
217; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s5
218; FIXEDABI-NEXT:    flat_load_ubyte v2, v[2:3] glc
219; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
220; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s10
221; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s11
222; FIXEDABI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
223; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
224; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
225  %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
226  %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
227  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
228  %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
229  %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr
230  %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr
231  %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr
232  store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr
233  ret void
234}
235
236define amdgpu_kernel void @marked_kernel_use_other_sgpr(i64 addrspace(1)* %ptr) #0 {
237; FIXEDABI-LABEL: marked_kernel_use_other_sgpr:
238; FIXEDABI:       ; %bb.0:
239; FIXEDABI-NEXT:    s_add_u32 s0, s4, 8
240; FIXEDABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
241; FIXEDABI-NEXT:    s_addc_u32 s1, s5, 0
242; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
243; FIXEDABI-NEXT:    v_mov_b32_e32 v0, s0
244; FIXEDABI-NEXT:    v_mov_b32_e32 v1, s1
245; FIXEDABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
246; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
247; FIXEDABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
248; FIXEDABI-NEXT:    s_endpgm
249  %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
250  %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
251  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
252  %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
253  %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr
254  %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr
255  %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr
256  store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr
257  ret void
258}
259
260define amdgpu_kernel void @marked_kernel_nokernargs_implicitarg_ptr() #0 {
261; FIXEDABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr:
262; FIXEDABI:       ; %bb.0:
263; FIXEDABI-NEXT:    v_mov_b32_e32 v0, 0
264; FIXEDABI-NEXT:    v_mov_b32_e32 v1, 0
265; FIXEDABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
266; FIXEDABI-NEXT:    s_endpgm
267  %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
268  %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr
269  ret void
270}
271
272; On gfx8, the queue ptr is required for this addrspacecast.
273define void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) #0 {
274; FIXEDABI-SDAG-LABEL: addrspacecast_requires_queue_ptr:
275; FIXEDABI-SDAG:       ; %bb.0:
276; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
277; FIXEDABI-SDAG-NEXT:    s_load_dword s4, s[6:7], 0x44
278; FIXEDABI-SDAG-NEXT:    s_load_dword s5, s[6:7], 0x40
279; FIXEDABI-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
280; FIXEDABI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
281; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v2, s4
282; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
283; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
284; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v0, s5
285; FIXEDABI-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
286; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v0, vcc
287; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v0, 1
288; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
289; FIXEDABI-SDAG-NEXT:    flat_store_dword v[2:3], v0
290; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
291; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v0, 2
292; FIXEDABI-SDAG-NEXT:    flat_store_dword v[4:5], v0
293; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
294; FIXEDABI-SDAG-NEXT:    s_setpc_b64 s[30:31]
295;
296; FIXEDABI-GISEL-LABEL: addrspacecast_requires_queue_ptr:
297; FIXEDABI-GISEL:       ; %bb.0:
298; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299; FIXEDABI-GISEL-NEXT:    s_load_dword s4, s[6:7], 0x44
300; FIXEDABI-GISEL-NEXT:    s_load_dword s5, s[6:7], 0x40
301; FIXEDABI-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
302; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
303; FIXEDABI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
304; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v3, s4
305; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
306; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v4, s5
307; FIXEDABI-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
308; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
309; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
310; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v4, 1
311; FIXEDABI-GISEL-NEXT:    flat_store_dword v[2:3], v4
312; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
313; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v2, 2
314; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v2
315; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
316; FIXEDABI-GISEL-NEXT:    s_setpc_b64 s[30:31]
317  %flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32*
318  %flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32*
319  store volatile i32 1, i32* %flat.private
320  store volatile i32 2, i32* %flat.local
321  ret void
322}
323
324define void @is_shared_requires_queue_ptr(i8* %ptr) #0 {
325; FIXEDABI-LABEL: is_shared_requires_queue_ptr:
326; FIXEDABI:       ; %bb.0:
327; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328; FIXEDABI-NEXT:    s_load_dword s4, s[6:7], 0x40
329; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
330; FIXEDABI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
331; FIXEDABI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
332; FIXEDABI-NEXT:    flat_store_dword v[0:1], v0
333; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
334; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
335  %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
336  %zext = zext i1 %is.shared to i32
337  store volatile i32 %zext, i32 addrspace(1)* undef
338  ret void
339}
340
341define void @is_private_requires_queue_ptr(i8* %ptr) #0 {
342; FIXEDABI-LABEL: is_private_requires_queue_ptr:
343; FIXEDABI:       ; %bb.0:
344; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345; FIXEDABI-NEXT:    s_load_dword s4, s[6:7], 0x44
346; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
347; FIXEDABI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
348; FIXEDABI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
349; FIXEDABI-NEXT:    flat_store_dword v[0:1], v0
350; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
351; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
352  %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
353  %zext = zext i1 %is.private to i32
354  store volatile i32 %zext, i32 addrspace(1)* undef
355  ret void
356}
357
358define void @trap_requires_queue() #0 {
359; FIXEDABI-LABEL: trap_requires_queue:
360; FIXEDABI:       ; %bb.0:
361; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362; FIXEDABI-NEXT:    s_mov_b64 s[0:1], s[6:7]
363; FIXEDABI-NEXT:    s_trap 2
364  call void @llvm.trap()
365  unreachable
366}
367
368define void @debugtrap_requires_queue() #0 {
369; FIXEDABI-LABEL: debugtrap_requires_queue:
370; FIXEDABI:       ; %bb.0:
371; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372; FIXEDABI-NEXT:    s_trap 3
373  call void @llvm.debugtrap()
374  unreachable
375}
376
377declare i32 @llvm.amdgcn.workitem.id.x()
378declare i32 @llvm.amdgcn.workitem.id.y()
379declare i32 @llvm.amdgcn.workitem.id.z()
380declare i32 @llvm.amdgcn.workgroup.id.x()
381declare i32 @llvm.amdgcn.workgroup.id.y()
382declare i32 @llvm.amdgcn.workgroup.id.z()
383declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
384declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
385declare i64 @llvm.amdgcn.dispatch.id()
386declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
387declare i1 @llvm.amdgcn.is.shared(i8*)
388declare i1 @llvm.amdgcn.is.private(i8*)
389declare void @llvm.trap()
390declare void @llvm.debugtrap()
391
392attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
393