1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
5
6define i8 addrspace(1)* @v_ptrmask_global_variable_i64(i8 addrspace(1)* %ptr, i64 %mask) {
7; GCN-LABEL: v_ptrmask_global_variable_i64:
8; GCN:       ; %bb.0:
9; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GCN-NEXT:    v_and_b32_e32 v1, v1, v3
11; GCN-NEXT:    v_and_b32_e32 v0, v0, v2
12; GCN-NEXT:    s_setpc_b64 s[30:31]
13;
14; GFX10PLUS-LABEL: v_ptrmask_global_variable_i64:
15; GFX10PLUS:       ; %bb.0:
16; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
18; GFX10PLUS-NEXT:    v_and_b32_e32 v0, v0, v2
19; GFX10PLUS-NEXT:    v_and_b32_e32 v1, v1, v3
20; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
21  %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* %ptr, i64 %mask)
22  ret i8 addrspace(1)* %masked
23}
24
25define i8 addrspace(1)* @v_ptrmask_global_variable_i32(i8 addrspace(1)* %ptr, i32 %mask) {
26; GCN-LABEL: v_ptrmask_global_variable_i32:
27; GCN:       ; %bb.0:
28; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29; GCN-NEXT:    v_and_b32_e32 v0, v0, v2
30; GCN-NEXT:    v_mov_b32_e32 v1, 0
31; GCN-NEXT:    s_setpc_b64 s[30:31]
32;
33; GFX10-LABEL: v_ptrmask_global_variable_i32:
34; GFX10:       ; %bb.0:
35; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
37; GFX10-NEXT:    v_and_b32_e32 v0, v0, v2
38; GFX10-NEXT:    v_mov_b32_e32 v1, 0
39; GFX10-NEXT:    s_setpc_b64 s[30:31]
40;
41; GFX11-LABEL: v_ptrmask_global_variable_i32:
42; GFX11:       ; %bb.0:
43; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
45; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, v0, v2
46; GFX11-NEXT:    s_setpc_b64 s[30:31]
47  %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)* %ptr, i32 %mask)
48  ret i8 addrspace(1)* %masked
49}
50
51define i8 addrspace(1)* @v_ptrmask_global_variable_i16(i8 addrspace(1)* %ptr, i16 %mask) {
52; GCN-LABEL: v_ptrmask_global_variable_i16:
53; GCN:       ; %bb.0:
54; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55; GCN-NEXT:    v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
56; GCN-NEXT:    v_mov_b32_e32 v1, 0
57; GCN-NEXT:    s_setpc_b64 s[30:31]
58;
59; GFX10-LABEL: v_ptrmask_global_variable_i16:
60; GFX10:       ; %bb.0:
61; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
63; GFX10-NEXT:    v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
64; GFX10-NEXT:    v_mov_b32_e32 v1, 0
65; GFX10-NEXT:    s_setpc_b64 s[30:31]
66;
67; GFX11-LABEL: v_ptrmask_global_variable_i16:
68; GFX11:       ; %bb.0:
69; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
71; GFX11-NEXT:    v_and_b32_e32 v1, 0xffff, v2
72; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, v0, v1
73; GFX11-NEXT:    s_setpc_b64 s[30:31]
74  %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)* %ptr, i16 %mask)
75  ret i8 addrspace(1)* %masked
76}
77
78define i8 addrspace(3)* @v_ptrmask_local_variable_i64(i8 addrspace(3)* %ptr, i64 %mask) {
79; GCN-LABEL: v_ptrmask_local_variable_i64:
80; GCN:       ; %bb.0:
81; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82; GCN-NEXT:    v_and_b32_e32 v0, v0, v1
83; GCN-NEXT:    s_setpc_b64 s[30:31]
84;
85; GFX10PLUS-LABEL: v_ptrmask_local_variable_i64:
86; GFX10PLUS:       ; %bb.0:
87; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
89; GFX10PLUS-NEXT:    v_and_b32_e32 v0, v0, v1
90; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
91  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask)
92  ret i8 addrspace(3)* %masked
93}
94
95define i8 addrspace(3)* @v_ptrmask_local_variable_i32(i8 addrspace(3)* %ptr, i32 %mask) {
96; GCN-LABEL: v_ptrmask_local_variable_i32:
97; GCN:       ; %bb.0:
98; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99; GCN-NEXT:    v_and_b32_e32 v0, v0, v1
100; GCN-NEXT:    s_setpc_b64 s[30:31]
101;
102; GFX10PLUS-LABEL: v_ptrmask_local_variable_i32:
103; GFX10PLUS:       ; %bb.0:
104; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
106; GFX10PLUS-NEXT:    v_and_b32_e32 v0, v0, v1
107; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
108  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask)
109  ret i8 addrspace(3)* %masked
110}
111
112define i8 addrspace(3)* @v_ptrmask_local_variable_i16(i8 addrspace(3)* %ptr, i16 %mask) {
113; GCN-LABEL: v_ptrmask_local_variable_i16:
114; GCN:       ; %bb.0:
115; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116; GCN-NEXT:    v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
117; GCN-NEXT:    s_setpc_b64 s[30:31]
118;
119; GFX10-LABEL: v_ptrmask_local_variable_i16:
120; GFX10:       ; %bb.0:
121; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
123; GFX10-NEXT:    v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
124; GFX10-NEXT:    s_setpc_b64 s[30:31]
125;
126; GFX11-LABEL: v_ptrmask_local_variable_i16:
127; GFX11:       ; %bb.0:
128; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
130; GFX11-NEXT:    v_and_b32_e32 v1, 0xffff, v1
131; GFX11-NEXT:    v_and_b32_e32 v0, v0, v1
132; GFX11-NEXT:    s_setpc_b64 s[30:31]
133  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask)
134  ret i8 addrspace(3)* %masked
135}
136
137define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i64(i8 addrspace(1)* inreg %ptr, i64 inreg %mask) {
138; GCN-LABEL: s_ptrmask_global_variable_i64:
139; GCN:       ; %bb.0:
140; GCN-NEXT:    s_and_b64 s[0:1], s[2:3], s[4:5]
141; GCN-NEXT:    ; return to shader part epilog
142;
143; GFX10PLUS-LABEL: s_ptrmask_global_variable_i64:
144; GFX10PLUS:       ; %bb.0:
145; GFX10PLUS-NEXT:    s_and_b64 s[0:1], s[2:3], s[4:5]
146; GFX10PLUS-NEXT:    ; return to shader part epilog
147  %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* %ptr, i64 %mask)
148  ret i8 addrspace(1)* %masked
149}
150
151define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i32(i8 addrspace(1)* inreg %ptr, i32 inreg %mask) {
152; GCN-LABEL: s_ptrmask_global_variable_i32:
153; GCN:       ; %bb.0:
154; GCN-NEXT:    s_mov_b32 s5, 0
155; GCN-NEXT:    s_and_b64 s[0:1], s[2:3], s[4:5]
156; GCN-NEXT:    s_mov_b32 s1, 0
157; GCN-NEXT:    ; return to shader part epilog
158;
159; GFX10PLUS-LABEL: s_ptrmask_global_variable_i32:
160; GFX10PLUS:       ; %bb.0:
161; GFX10PLUS-NEXT:    s_mov_b32 s5, 0
162; GFX10PLUS-NEXT:    s_and_b64 s[0:1], s[2:3], s[4:5]
163; GFX10PLUS-NEXT:    s_mov_b32 s1, 0
164; GFX10PLUS-NEXT:    ; return to shader part epilog
165  %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)* %ptr, i32 %mask)
166  ret i8 addrspace(1)* %masked
167}
168
169define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i16(i8 addrspace(1)* inreg %ptr, i16 inreg %mask) {
170; GCN-LABEL: s_ptrmask_global_variable_i16:
171; GCN:       ; %bb.0:
172; GCN-NEXT:    s_and_b32 s0, s4, 0xffff
173; GCN-NEXT:    s_mov_b32 s1, 0
174; GCN-NEXT:    s_and_b64 s[0:1], s[2:3], s[0:1]
175; GCN-NEXT:    s_mov_b32 s1, 0
176; GCN-NEXT:    ; return to shader part epilog
177;
178; GFX10PLUS-LABEL: s_ptrmask_global_variable_i16:
179; GFX10PLUS:       ; %bb.0:
180; GFX10PLUS-NEXT:    s_mov_b32 s1, 0
181; GFX10PLUS-NEXT:    s_and_b32 s0, s4, 0xffff
182; GFX10PLUS-NEXT:    s_and_b64 s[0:1], s[2:3], s[0:1]
183; GFX10PLUS-NEXT:    s_mov_b32 s1, 0
184; GFX10PLUS-NEXT:    ; return to shader part epilog
185  %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)* %ptr, i16 %mask)
186  ret i8 addrspace(1)* %masked
187}
188
189define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i64(i8 addrspace(3)* inreg %ptr, i64 inreg %mask) {
190; GCN-LABEL: s_ptrmask_local_variable_i64:
191; GCN:       ; %bb.0:
192; GCN-NEXT:    s_and_b32 s0, s2, s3
193; GCN-NEXT:    ; return to shader part epilog
194;
195; GFX10PLUS-LABEL: s_ptrmask_local_variable_i64:
196; GFX10PLUS:       ; %bb.0:
197; GFX10PLUS-NEXT:    s_and_b32 s0, s2, s3
198; GFX10PLUS-NEXT:    ; return to shader part epilog
199  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask)
200  ret i8 addrspace(3)* %masked
201}
202
203define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i32(i8 addrspace(3)* inreg %ptr, i32 inreg %mask) {
204; GCN-LABEL: s_ptrmask_local_variable_i32:
205; GCN:       ; %bb.0:
206; GCN-NEXT:    s_and_b32 s0, s2, s3
207; GCN-NEXT:    ; return to shader part epilog
208;
209; GFX10PLUS-LABEL: s_ptrmask_local_variable_i32:
210; GFX10PLUS:       ; %bb.0:
211; GFX10PLUS-NEXT:    s_and_b32 s0, s2, s3
212; GFX10PLUS-NEXT:    ; return to shader part epilog
213  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask)
214  ret i8 addrspace(3)* %masked
215}
216
217define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i16(i8 addrspace(3)* inreg %ptr, i16 inreg %mask) {
218; GCN-LABEL: s_ptrmask_local_variable_i16:
219; GCN:       ; %bb.0:
220; GCN-NEXT:    s_and_b32 s0, 0xffff, s3
221; GCN-NEXT:    s_and_b32 s0, s2, s0
222; GCN-NEXT:    ; return to shader part epilog
223;
224; GFX10PLUS-LABEL: s_ptrmask_local_variable_i16:
225; GFX10PLUS:       ; %bb.0:
226; GFX10PLUS-NEXT:    s_and_b32 s0, 0xffff, s3
227; GFX10PLUS-NEXT:    s_and_b32 s0, s2, s0
228; GFX10PLUS-NEXT:    ; return to shader part epilog
229  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask)
230  ret i8 addrspace(3)* %masked
231}
232
233declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)*, i64) #0
234declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)*, i32) #0
235declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)*, i16) #0
236declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)*, i64) #0
237declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)*, i32) #0
238declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)*, i16) #0
239
240attributes #0 = { nounwind readnone speculatable willreturn }
241