1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=GFX8V3 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=GFX8V4 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=GFX8V5 %s
5
6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefixes=GFX9V3 %s
7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=GFX9V4 %s
8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=GFX9V5 %s
9
10define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) {
11; GFX8V3-LABEL: addrspacecast:
12; GFX8V3:       ; %bb.0:
13; GFX8V3-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
14; GFX8V3-NEXT:    s_load_dword s2, s[4:5], 0x44
15; GFX8V3-NEXT:    s_load_dword s3, s[4:5], 0x40
16; GFX8V3-NEXT:    v_mov_b32_e32 v4, 1
17; GFX8V3-NEXT:    s_waitcnt lgkmcnt(0)
18; GFX8V3-NEXT:    s_cmp_lg_u32 s0, -1
19; GFX8V3-NEXT:    v_mov_b32_e32 v0, s2
20; GFX8V3-NEXT:    s_cselect_b64 vcc, -1, 0
21; GFX8V3-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
22; GFX8V3-NEXT:    v_mov_b32_e32 v0, s0
23; GFX8V3-NEXT:    s_cmp_lg_u32 s1, -1
24; GFX8V3-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
25; GFX8V3-NEXT:    v_mov_b32_e32 v2, s3
26; GFX8V3-NEXT:    s_cselect_b64 vcc, -1, 0
27; GFX8V3-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
28; GFX8V3-NEXT:    v_mov_b32_e32 v2, s1
29; GFX8V3-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
30; GFX8V3-NEXT:    flat_store_dword v[0:1], v4
31; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
32; GFX8V3-NEXT:    v_mov_b32_e32 v0, 2
33; GFX8V3-NEXT:    flat_store_dword v[2:3], v0
34; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
35; GFX8V3-NEXT:    s_endpgm
36;
37; GFX8V4-LABEL: addrspacecast:
38; GFX8V4:       ; %bb.0:
39; GFX8V4-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
40; GFX8V4-NEXT:    s_load_dword s2, s[4:5], 0x44
41; GFX8V4-NEXT:    s_load_dword s3, s[4:5], 0x40
42; GFX8V4-NEXT:    v_mov_b32_e32 v4, 1
43; GFX8V4-NEXT:    s_waitcnt lgkmcnt(0)
44; GFX8V4-NEXT:    s_cmp_lg_u32 s0, -1
45; GFX8V4-NEXT:    v_mov_b32_e32 v0, s2
46; GFX8V4-NEXT:    s_cselect_b64 vcc, -1, 0
47; GFX8V4-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
48; GFX8V4-NEXT:    v_mov_b32_e32 v0, s0
49; GFX8V4-NEXT:    s_cmp_lg_u32 s1, -1
50; GFX8V4-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
51; GFX8V4-NEXT:    v_mov_b32_e32 v2, s3
52; GFX8V4-NEXT:    s_cselect_b64 vcc, -1, 0
53; GFX8V4-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
54; GFX8V4-NEXT:    v_mov_b32_e32 v2, s1
55; GFX8V4-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
56; GFX8V4-NEXT:    flat_store_dword v[0:1], v4
57; GFX8V4-NEXT:    s_waitcnt vmcnt(0)
58; GFX8V4-NEXT:    v_mov_b32_e32 v0, 2
59; GFX8V4-NEXT:    flat_store_dword v[2:3], v0
60; GFX8V4-NEXT:    s_waitcnt vmcnt(0)
61; GFX8V4-NEXT:    s_endpgm
62;
63; GFX8V5-LABEL: addrspacecast:
64; GFX8V5:       ; %bb.0:
65; GFX8V5-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
66; GFX8V5-NEXT:    s_load_dword s2, s[4:5], 0xc8
67; GFX8V5-NEXT:    s_load_dword s3, s[4:5], 0xcc
68; GFX8V5-NEXT:    v_mov_b32_e32 v4, 1
69; GFX8V5-NEXT:    s_waitcnt lgkmcnt(0)
70; GFX8V5-NEXT:    s_cmp_lg_u32 s0, -1
71; GFX8V5-NEXT:    v_mov_b32_e32 v0, s2
72; GFX8V5-NEXT:    s_cselect_b64 vcc, -1, 0
73; GFX8V5-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
74; GFX8V5-NEXT:    v_mov_b32_e32 v0, s0
75; GFX8V5-NEXT:    s_cmp_lg_u32 s1, -1
76; GFX8V5-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
77; GFX8V5-NEXT:    v_mov_b32_e32 v2, s3
78; GFX8V5-NEXT:    s_cselect_b64 vcc, -1, 0
79; GFX8V5-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
80; GFX8V5-NEXT:    v_mov_b32_e32 v2, s1
81; GFX8V5-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
82; GFX8V5-NEXT:    flat_store_dword v[0:1], v4
83; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
84; GFX8V5-NEXT:    v_mov_b32_e32 v0, 2
85; GFX8V5-NEXT:    flat_store_dword v[2:3], v0
86; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
87; GFX8V5-NEXT:    s_endpgm
88;
89; GFX9V3-LABEL: addrspacecast:
90; GFX9V3:       ; %bb.0:
91; GFX9V3-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
92; GFX9V3-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
93; GFX9V3-NEXT:    s_lshl_b32 s2, s2, 16
94; GFX9V3-NEXT:    v_mov_b32_e32 v0, s2
95; GFX9V3-NEXT:    v_mov_b32_e32 v4, 1
96; GFX9V3-NEXT:    s_waitcnt lgkmcnt(0)
97; GFX9V3-NEXT:    s_cmp_lg_u32 s0, -1
98; GFX9V3-NEXT:    s_cselect_b64 vcc, -1, 0
99; GFX9V3-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
100; GFX9V3-NEXT:    v_mov_b32_e32 v0, s0
101; GFX9V3-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
102; GFX9V3-NEXT:    s_lshl_b32 s0, s0, 16
103; GFX9V3-NEXT:    s_cmp_lg_u32 s1, -1
104; GFX9V3-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
105; GFX9V3-NEXT:    v_mov_b32_e32 v2, s0
106; GFX9V3-NEXT:    s_cselect_b64 vcc, -1, 0
107; GFX9V3-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
108; GFX9V3-NEXT:    v_mov_b32_e32 v2, s1
109; GFX9V3-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
110; GFX9V3-NEXT:    flat_store_dword v[0:1], v4
111; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
112; GFX9V3-NEXT:    v_mov_b32_e32 v0, 2
113; GFX9V3-NEXT:    flat_store_dword v[2:3], v0
114; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
115; GFX9V3-NEXT:    s_endpgm
116;
117; GFX9V4-LABEL: addrspacecast:
118; GFX9V4:       ; %bb.0:
119; GFX9V4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
120; GFX9V4-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
121; GFX9V4-NEXT:    s_lshl_b32 s2, s2, 16
122; GFX9V4-NEXT:    v_mov_b32_e32 v0, s2
123; GFX9V4-NEXT:    v_mov_b32_e32 v4, 1
124; GFX9V4-NEXT:    s_waitcnt lgkmcnt(0)
125; GFX9V4-NEXT:    s_cmp_lg_u32 s0, -1
126; GFX9V4-NEXT:    s_cselect_b64 vcc, -1, 0
127; GFX9V4-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
128; GFX9V4-NEXT:    v_mov_b32_e32 v0, s0
129; GFX9V4-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
130; GFX9V4-NEXT:    s_lshl_b32 s0, s0, 16
131; GFX9V4-NEXT:    s_cmp_lg_u32 s1, -1
132; GFX9V4-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
133; GFX9V4-NEXT:    v_mov_b32_e32 v2, s0
134; GFX9V4-NEXT:    s_cselect_b64 vcc, -1, 0
135; GFX9V4-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
136; GFX9V4-NEXT:    v_mov_b32_e32 v2, s1
137; GFX9V4-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
138; GFX9V4-NEXT:    flat_store_dword v[0:1], v4
139; GFX9V4-NEXT:    s_waitcnt vmcnt(0)
140; GFX9V4-NEXT:    v_mov_b32_e32 v0, 2
141; GFX9V4-NEXT:    flat_store_dword v[2:3], v0
142; GFX9V4-NEXT:    s_waitcnt vmcnt(0)
143; GFX9V4-NEXT:    s_endpgm
144;
145; GFX9V5-LABEL: addrspacecast:
146; GFX9V5:       ; %bb.0:
147; GFX9V5-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
148; GFX9V5-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
149; GFX9V5-NEXT:    s_lshl_b32 s2, s2, 16
150; GFX9V5-NEXT:    v_mov_b32_e32 v0, s2
151; GFX9V5-NEXT:    v_mov_b32_e32 v4, 1
152; GFX9V5-NEXT:    s_waitcnt lgkmcnt(0)
153; GFX9V5-NEXT:    s_cmp_lg_u32 s0, -1
154; GFX9V5-NEXT:    s_cselect_b64 vcc, -1, 0
155; GFX9V5-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
156; GFX9V5-NEXT:    v_mov_b32_e32 v0, s0
157; GFX9V5-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
158; GFX9V5-NEXT:    s_lshl_b32 s0, s0, 16
159; GFX9V5-NEXT:    s_cmp_lg_u32 s1, -1
160; GFX9V5-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
161; GFX9V5-NEXT:    v_mov_b32_e32 v2, s0
162; GFX9V5-NEXT:    s_cselect_b64 vcc, -1, 0
163; GFX9V5-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
164; GFX9V5-NEXT:    v_mov_b32_e32 v2, s1
165; GFX9V5-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
166; GFX9V5-NEXT:    flat_store_dword v[0:1], v4
167; GFX9V5-NEXT:    s_waitcnt vmcnt(0)
168; GFX9V5-NEXT:    v_mov_b32_e32 v0, 2
169; GFX9V5-NEXT:    flat_store_dword v[2:3], v0
170; GFX9V5-NEXT:    s_waitcnt vmcnt(0)
171; GFX9V5-NEXT:    s_endpgm
172  %flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32*
173  %flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32*
174  store volatile i32 1, i32* %flat.private
175  store volatile i32 2, i32* %flat.local
176  ret void
177}
178
179define amdgpu_kernel void @llvm_amdgcn_is_shared(i8* %ptr) {
180; GFX8V3-LABEL: llvm_amdgcn_is_shared:
181; GFX8V3:       ; %bb.0:
182; GFX8V3-NEXT:    s_load_dword s0, s[4:5], 0x40
183; GFX8V3-NEXT:    s_load_dword s1, s[6:7], 0x4
184; GFX8V3-NEXT:    s_waitcnt lgkmcnt(0)
185; GFX8V3-NEXT:    s_cmp_eq_u32 s1, s0
186; GFX8V3-NEXT:    s_cselect_b64 s[0:1], -1, 0
187; GFX8V3-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
188; GFX8V3-NEXT:    flat_store_dword v[0:1], v0
189; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
190; GFX8V3-NEXT:    s_endpgm
191;
192; GFX8V4-LABEL: llvm_amdgcn_is_shared:
193; GFX8V4:       ; %bb.0:
194; GFX8V4-NEXT:    s_load_dword s0, s[4:5], 0x40
195; GFX8V4-NEXT:    s_load_dword s1, s[6:7], 0x4
196; GFX8V4-NEXT:    s_waitcnt lgkmcnt(0)
197; GFX8V4-NEXT:    s_cmp_eq_u32 s1, s0
198; GFX8V4-NEXT:    s_cselect_b64 s[0:1], -1, 0
199; GFX8V4-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
200; GFX8V4-NEXT:    flat_store_dword v[0:1], v0
201; GFX8V4-NEXT:    s_waitcnt vmcnt(0)
202; GFX8V4-NEXT:    s_endpgm
203;
204; GFX8V5-LABEL: llvm_amdgcn_is_shared:
205; GFX8V5:       ; %bb.0:
206; GFX8V5-NEXT:    s_load_dword s0, s[4:5], 0xcc
207; GFX8V5-NEXT:    s_load_dword s1, s[4:5], 0x4
208; GFX8V5-NEXT:    s_waitcnt lgkmcnt(0)
209; GFX8V5-NEXT:    s_cmp_eq_u32 s1, s0
210; GFX8V5-NEXT:    s_cselect_b64 s[0:1], -1, 0
211; GFX8V5-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
212; GFX8V5-NEXT:    flat_store_dword v[0:1], v0
213; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
214; GFX8V5-NEXT:    s_endpgm
215;
216; GFX9V3-LABEL: llvm_amdgcn_is_shared:
217; GFX9V3:       ; %bb.0:
218; GFX9V3-NEXT:    s_load_dword s0, s[4:5], 0x4
219; GFX9V3-NEXT:    s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
220; GFX9V3-NEXT:    s_lshl_b32 s1, s1, 16
221; GFX9V3-NEXT:    s_waitcnt lgkmcnt(0)
222; GFX9V3-NEXT:    s_cmp_eq_u32 s0, s1
223; GFX9V3-NEXT:    s_cselect_b64 s[0:1], -1, 0
224; GFX9V3-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
225; GFX9V3-NEXT:    global_store_dword v[0:1], v0, off
226; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
227; GFX9V3-NEXT:    s_endpgm
228;
229; GFX9V4-LABEL: llvm_amdgcn_is_shared:
230; GFX9V4:       ; %bb.0:
231; GFX9V4-NEXT:    s_load_dword s0, s[4:5], 0x4
232; GFX9V4-NEXT:    s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
233; GFX9V4-NEXT:    s_lshl_b32 s1, s1, 16
234; GFX9V4-NEXT:    s_waitcnt lgkmcnt(0)
235; GFX9V4-NEXT:    s_cmp_eq_u32 s0, s1
236; GFX9V4-NEXT:    s_cselect_b64 s[0:1], -1, 0
237; GFX9V4-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
238; GFX9V4-NEXT:    global_store_dword v[0:1], v0, off
239; GFX9V4-NEXT:    s_waitcnt vmcnt(0)
240; GFX9V4-NEXT:    s_endpgm
241;
242; GFX9V5-LABEL: llvm_amdgcn_is_shared:
243; GFX9V5:       ; %bb.0:
244; GFX9V5-NEXT:    s_load_dword s0, s[4:5], 0x4
245; GFX9V5-NEXT:    s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
246; GFX9V5-NEXT:    s_lshl_b32 s1, s1, 16
247; GFX9V5-NEXT:    s_waitcnt lgkmcnt(0)
248; GFX9V5-NEXT:    s_cmp_eq_u32 s0, s1
249; GFX9V5-NEXT:    s_cselect_b64 s[0:1], -1, 0
250; GFX9V5-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
251; GFX9V5-NEXT:    global_store_dword v[0:1], v0, off
252; GFX9V5-NEXT:    s_waitcnt vmcnt(0)
253; GFX9V5-NEXT:    s_endpgm
254  %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
255  %zext = zext i1 %is.shared to i32
256  store volatile i32 %zext, i32 addrspace(1)* undef
257  ret void
258}
259
260define amdgpu_kernel void @llvm_amdgcn_is_private(i8* %ptr) {
261; GFX8V3-LABEL: llvm_amdgcn_is_private:
262; GFX8V3:       ; %bb.0:
263; GFX8V3-NEXT:    s_load_dword s0, s[4:5], 0x44
264; GFX8V3-NEXT:    s_load_dword s1, s[6:7], 0x4
265; GFX8V3-NEXT:    s_waitcnt lgkmcnt(0)
266; GFX8V3-NEXT:    s_cmp_eq_u32 s1, s0
267; GFX8V3-NEXT:    s_cselect_b64 s[0:1], -1, 0
268; GFX8V3-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
269; GFX8V3-NEXT:    flat_store_dword v[0:1], v0
270; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
271; GFX8V3-NEXT:    s_endpgm
272;
273; GFX8V4-LABEL: llvm_amdgcn_is_private:
274; GFX8V4:       ; %bb.0:
275; GFX8V4-NEXT:    s_load_dword s0, s[4:5], 0x44
276; GFX8V4-NEXT:    s_load_dword s1, s[6:7], 0x4
277; GFX8V4-NEXT:    s_waitcnt lgkmcnt(0)
278; GFX8V4-NEXT:    s_cmp_eq_u32 s1, s0
279; GFX8V4-NEXT:    s_cselect_b64 s[0:1], -1, 0
280; GFX8V4-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
281; GFX8V4-NEXT:    flat_store_dword v[0:1], v0
282; GFX8V4-NEXT:    s_waitcnt vmcnt(0)
283; GFX8V4-NEXT:    s_endpgm
284;
285; GFX8V5-LABEL: llvm_amdgcn_is_private:
286; GFX8V5:       ; %bb.0:
287; GFX8V5-NEXT:    s_load_dword s0, s[4:5], 0xc8
288; GFX8V5-NEXT:    s_load_dword s1, s[4:5], 0x4
289; GFX8V5-NEXT:    s_waitcnt lgkmcnt(0)
290; GFX8V5-NEXT:    s_cmp_eq_u32 s1, s0
291; GFX8V5-NEXT:    s_cselect_b64 s[0:1], -1, 0
292; GFX8V5-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
293; GFX8V5-NEXT:    flat_store_dword v[0:1], v0
294; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
295; GFX8V5-NEXT:    s_endpgm
296;
297; GFX9V3-LABEL: llvm_amdgcn_is_private:
298; GFX9V3:       ; %bb.0:
299; GFX9V3-NEXT:    s_load_dword s0, s[4:5], 0x4
300; GFX9V3-NEXT:    s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
301; GFX9V3-NEXT:    s_lshl_b32 s1, s1, 16
302; GFX9V3-NEXT:    s_waitcnt lgkmcnt(0)
303; GFX9V3-NEXT:    s_cmp_eq_u32 s0, s1
304; GFX9V3-NEXT:    s_cselect_b64 s[0:1], -1, 0
305; GFX9V3-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
306; GFX9V3-NEXT:    global_store_dword v[0:1], v0, off
307; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
308; GFX9V3-NEXT:    s_endpgm
309;
310; GFX9V4-LABEL: llvm_amdgcn_is_private:
311; GFX9V4:       ; %bb.0:
312; GFX9V4-NEXT:    s_load_dword s0, s[4:5], 0x4
313; GFX9V4-NEXT:    s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
314; GFX9V4-NEXT:    s_lshl_b32 s1, s1, 16
315; GFX9V4-NEXT:    s_waitcnt lgkmcnt(0)
316; GFX9V4-NEXT:    s_cmp_eq_u32 s0, s1
317; GFX9V4-NEXT:    s_cselect_b64 s[0:1], -1, 0
318; GFX9V4-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
319; GFX9V4-NEXT:    global_store_dword v[0:1], v0, off
320; GFX9V4-NEXT:    s_waitcnt vmcnt(0)
321; GFX9V4-NEXT:    s_endpgm
322;
323; GFX9V5-LABEL: llvm_amdgcn_is_private:
324; GFX9V5:       ; %bb.0:
325; GFX9V5-NEXT:    s_load_dword s0, s[4:5], 0x4
326; GFX9V5-NEXT:    s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
327; GFX9V5-NEXT:    s_lshl_b32 s1, s1, 16
328; GFX9V5-NEXT:    s_waitcnt lgkmcnt(0)
329; GFX9V5-NEXT:    s_cmp_eq_u32 s0, s1
330; GFX9V5-NEXT:    s_cselect_b64 s[0:1], -1, 0
331; GFX9V5-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
332; GFX9V5-NEXT:    global_store_dword v[0:1], v0, off
333; GFX9V5-NEXT:    s_waitcnt vmcnt(0)
334; GFX9V5-NEXT:    s_endpgm
335  %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
336  %zext = zext i1 %is.private to i32
337  store volatile i32 %zext, i32 addrspace(1)* undef
338  ret void
339}
340
341define amdgpu_kernel void @llvm_trap() {
342; GFX8V3-LABEL: llvm_trap:
343; GFX8V3:       ; %bb.0:
344; GFX8V3-NEXT:    s_mov_b64 s[0:1], s[4:5]
345; GFX8V3-NEXT:    s_trap 2
346;
347; GFX8V4-LABEL: llvm_trap:
348; GFX8V4:       ; %bb.0:
349; GFX8V4-NEXT:    s_mov_b64 s[0:1], s[4:5]
350; GFX8V4-NEXT:    s_trap 2
351;
352; GFX8V5-LABEL: llvm_trap:
353; GFX8V5:       ; %bb.0:
354; GFX8V5-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xc8
355; GFX8V5-NEXT:    s_waitcnt lgkmcnt(0)
356; GFX8V5-NEXT:    s_trap 2
357;
358; GFX9V3-LABEL: llvm_trap:
359; GFX9V3:       ; %bb.0:
360; GFX9V3-NEXT:    s_mov_b64 s[0:1], s[4:5]
361; GFX9V3-NEXT:    s_trap 2
362;
363; GFX9V4-LABEL: llvm_trap:
364; GFX9V4:       ; %bb.0:
365; GFX9V4-NEXT:    s_trap 2
366;
367; GFX9V5-LABEL: llvm_trap:
368; GFX9V5:       ; %bb.0:
369; GFX9V5-NEXT:    s_trap 2
370  call void @llvm.trap()
371  unreachable
372}
373
374define amdgpu_kernel void @llvm_debugtrap() {
375; GFX8V3-LABEL: llvm_debugtrap:
376; GFX8V3:       ; %bb.0:
377; GFX8V3-NEXT:    s_trap 3
378;
379; GFX8V4-LABEL: llvm_debugtrap:
380; GFX8V4:       ; %bb.0:
381; GFX8V4-NEXT:    s_trap 3
382;
383; GFX8V5-LABEL: llvm_debugtrap:
384; GFX8V5:       ; %bb.0:
385; GFX8V5-NEXT:    s_trap 3
386;
387; GFX9V3-LABEL: llvm_debugtrap:
388; GFX9V3:       ; %bb.0:
389; GFX9V3-NEXT:    s_trap 3
390;
391; GFX9V4-LABEL: llvm_debugtrap:
392; GFX9V4:       ; %bb.0:
393; GFX9V4-NEXT:    s_trap 3
394;
395; GFX9V5-LABEL: llvm_debugtrap:
396; GFX9V5:       ; %bb.0:
397; GFX9V5-NEXT:    s_trap 3
398  call void @llvm.debugtrap()
399  unreachable
400}
401
402define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr)  {
403; GFX8V3-LABEL: llvm_amdgcn_queue_ptr:
404; GFX8V3:       ; %bb.0:
405; GFX8V3-NEXT:    v_mov_b32_e32 v0, s6
406; GFX8V3-NEXT:    v_mov_b32_e32 v1, s7
407; GFX8V3-NEXT:    s_add_u32 s0, s8, 8
408; GFX8V3-NEXT:    flat_load_ubyte v0, v[0:1] glc
409; GFX8V3-NEXT:    s_addc_u32 s1, s9, 0
410; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
411; GFX8V3-NEXT:    v_mov_b32_e32 v0, s0
412; GFX8V3-NEXT:    v_mov_b32_e32 v1, s1
413; GFX8V3-NEXT:    flat_load_ubyte v0, v[0:1] glc
414; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
415; GFX8V3-NEXT:    v_mov_b32_e32 v0, s4
416; GFX8V3-NEXT:    v_mov_b32_e32 v1, s5
417; GFX8V3-NEXT:    flat_load_ubyte v0, v[0:1] glc
418; GFX8V3-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
419; GFX8V3-NEXT:    v_mov_b32_e32 v2, s10
420; GFX8V3-NEXT:    v_mov_b32_e32 v3, s11
421; GFX8V3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
422; GFX8V3-NEXT:    v_mov_b32_e32 v0, s0
423; GFX8V3-NEXT:    v_mov_b32_e32 v1, s1
424; GFX8V3-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
425; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
426; GFX8V3-NEXT:    s_endpgm
427;
428; GFX8V4-LABEL: llvm_amdgcn_queue_ptr:
429; GFX8V4:       ; %bb.0:
430; GFX8V4-NEXT:    v_mov_b32_e32 v0, s6
431; GFX8V4-NEXT:    v_mov_b32_e32 v1, s7
432; GFX8V4-NEXT:    s_add_u32 s0, s8, 8
433; GFX8V4-NEXT:    flat_load_ubyte v0, v[0:1] glc
434; GFX8V4-NEXT:    s_addc_u32 s1, s9, 0
435; GFX8V4-NEXT:    s_waitcnt vmcnt(0)
436; GFX8V4-NEXT:    v_mov_b32_e32 v0, s0
437; GFX8V4-NEXT:    v_mov_b32_e32 v1, s1
438; GFX8V4-NEXT:    flat_load_ubyte v0, v[0:1] glc
439; GFX8V4-NEXT:    s_waitcnt vmcnt(0)
440; GFX8V4-NEXT:    v_mov_b32_e32 v0, s4
441; GFX8V4-NEXT:    v_mov_b32_e32 v1, s5
442; GFX8V4-NEXT:    flat_load_ubyte v0, v[0:1] glc
443; GFX8V4-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
444; GFX8V4-NEXT:    v_mov_b32_e32 v2, s10
445; GFX8V4-NEXT:    v_mov_b32_e32 v3, s11
446; GFX8V4-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
447; GFX8V4-NEXT:    v_mov_b32_e32 v0, s0
448; GFX8V4-NEXT:    v_mov_b32_e32 v1, s1
449; GFX8V4-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
450; GFX8V4-NEXT:    s_waitcnt vmcnt(0)
451; GFX8V4-NEXT:    s_endpgm
452;
453; GFX8V5-LABEL: llvm_amdgcn_queue_ptr:
454; GFX8V5:       ; %bb.0:
455; GFX8V5-NEXT:    s_add_u32 s0, s6, 8
456; GFX8V5-NEXT:    flat_load_ubyte v0, v[0:1] glc
457; GFX8V5-NEXT:    s_addc_u32 s1, s7, 0
458; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
459; GFX8V5-NEXT:    v_mov_b32_e32 v0, s0
460; GFX8V5-NEXT:    v_mov_b32_e32 v1, s1
461; GFX8V5-NEXT:    flat_load_ubyte v0, v[0:1] glc
462; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
463; GFX8V5-NEXT:    v_mov_b32_e32 v0, s4
464; GFX8V5-NEXT:    v_mov_b32_e32 v1, s5
465; GFX8V5-NEXT:    flat_load_ubyte v0, v[0:1] glc
466; GFX8V5-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
467; GFX8V5-NEXT:    v_mov_b32_e32 v2, s8
468; GFX8V5-NEXT:    v_mov_b32_e32 v3, s9
469; GFX8V5-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
470; GFX8V5-NEXT:    v_mov_b32_e32 v0, s0
471; GFX8V5-NEXT:    v_mov_b32_e32 v1, s1
472; GFX8V5-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
473; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
474; GFX8V5-NEXT:    s_endpgm
475;
476; GFX9V3-LABEL: llvm_amdgcn_queue_ptr:
477; GFX9V3:       ; %bb.0:
478; GFX9V3-NEXT:    v_mov_b32_e32 v2, 0
479; GFX9V3-NEXT:    global_load_ubyte v0, v2, s[6:7] glc
480; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
481; GFX9V3-NEXT:    global_load_ubyte v0, v2, s[8:9] offset:8 glc
482; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
483; GFX9V3-NEXT:    global_load_ubyte v0, v2, s[4:5] glc
484; GFX9V3-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
485; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
486; GFX9V3-NEXT:    v_mov_b32_e32 v0, s10
487; GFX9V3-NEXT:    v_mov_b32_e32 v1, s11
488; GFX9V3-NEXT:    ; kill: killed $sgpr6_sgpr7
489; GFX9V3-NEXT:    ; kill: killed $sgpr4_sgpr5
490; GFX9V3-NEXT:    s_waitcnt lgkmcnt(0)
491; GFX9V3-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
492; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
493; GFX9V3-NEXT:    s_endpgm
494;
495; GFX9V4-LABEL: llvm_amdgcn_queue_ptr:
496; GFX9V4:       ; %bb.0:
497; GFX9V4-NEXT:    v_mov_b32_e32 v2, 0
498; GFX9V4-NEXT:    global_load_ubyte v0, v2, s[6:7] glc
499; GFX9V4-NEXT:    s_waitcnt vmcnt(0)
500; GFX9V4-NEXT:    global_load_ubyte v0, v2, s[8:9] offset:8 glc
501; GFX9V4-NEXT:    s_waitcnt vmcnt(0)
502; GFX9V4-NEXT:    global_load_ubyte v0, v2, s[4:5] glc
503; GFX9V4-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
504; GFX9V4-NEXT:    s_waitcnt vmcnt(0)
505; GFX9V4-NEXT:    v_mov_b32_e32 v0, s10
506; GFX9V4-NEXT:    v_mov_b32_e32 v1, s11
507; GFX9V4-NEXT:    ; kill: killed $sgpr6_sgpr7
508; GFX9V4-NEXT:    ; kill: killed $sgpr4_sgpr5
509; GFX9V4-NEXT:    s_waitcnt lgkmcnt(0)
510; GFX9V4-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
511; GFX9V4-NEXT:    s_waitcnt vmcnt(0)
512; GFX9V4-NEXT:    s_endpgm
513;
514; GFX9V5-LABEL: llvm_amdgcn_queue_ptr:
515; GFX9V5:       ; %bb.0:
516; GFX9V5-NEXT:    v_mov_b32_e32 v2, 0
517; GFX9V5-NEXT:    global_load_ubyte v0, v2, s[0:1] glc
518; GFX9V5-NEXT:    s_waitcnt vmcnt(0)
519; GFX9V5-NEXT:    global_load_ubyte v0, v2, s[6:7] offset:8 glc
520; GFX9V5-NEXT:    s_waitcnt vmcnt(0)
521; GFX9V5-NEXT:    global_load_ubyte v0, v2, s[4:5] glc
522; GFX9V5-NEXT:    ; kill: killed $sgpr0_sgpr1
523; GFX9V5-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
524; GFX9V5-NEXT:    s_waitcnt vmcnt(0)
525; GFX9V5-NEXT:    v_mov_b32_e32 v0, s8
526; GFX9V5-NEXT:    v_mov_b32_e32 v1, s9
527; GFX9V5-NEXT:    ; kill: killed $sgpr4_sgpr5
528; GFX9V5-NEXT:    s_waitcnt lgkmcnt(0)
529; GFX9V5-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
530; GFX9V5-NEXT:    s_waitcnt vmcnt(0)
531; GFX9V5-NEXT:    s_endpgm
532  %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
533  %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
534  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
535  %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
536  %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr
537  %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr
538  %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr
539  store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr
540  ret void
541}
542
543declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
544declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
545declare i64 @llvm.amdgcn.dispatch.id()
546declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
547declare i1 @llvm.amdgcn.is.shared(i8*)
548declare i1 @llvm.amdgcn.is.private(i8*)
549declare void @llvm.trap()
550declare void @llvm.debugtrap()
551