1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
4
5define i8 addrspace(1)* @v_ptrmask_global_variable_i64(i8 addrspace(1)* %ptr, i64 %mask) {
6; GCN-LABEL: v_ptrmask_global_variable_i64:
7; GCN:       ; %bb.0:
8; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; GCN-NEXT:    v_and_b32_e32 v1, v1, v3
10; GCN-NEXT:    v_and_b32_e32 v0, v0, v2
11; GCN-NEXT:    s_setpc_b64 s[30:31]
12;
13; GFX10-LABEL: v_ptrmask_global_variable_i64:
14; GFX10:       ; %bb.0:
15; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
17; GFX10-NEXT:    v_and_b32_e32 v0, v0, v2
18; GFX10-NEXT:    v_and_b32_e32 v1, v1, v3
19; GFX10-NEXT:    s_setpc_b64 s[30:31]
20  %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* %ptr, i64 %mask)
21  ret i8 addrspace(1)* %masked
22}
23
24define i8 addrspace(1)* @v_ptrmask_global_variable_i32(i8 addrspace(1)* %ptr, i32 %mask) {
25; GCN-LABEL: v_ptrmask_global_variable_i32:
26; GCN:       ; %bb.0:
27; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; GCN-NEXT:    v_and_b32_e32 v0, v0, v2
29; GCN-NEXT:    v_mov_b32_e32 v1, 0
30; GCN-NEXT:    s_setpc_b64 s[30:31]
31;
32; GFX10-LABEL: v_ptrmask_global_variable_i32:
33; GFX10:       ; %bb.0:
34; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
36; GFX10-NEXT:    v_and_b32_e32 v0, v0, v2
37; GFX10-NEXT:    v_mov_b32_e32 v1, 0
38; GFX10-NEXT:    s_setpc_b64 s[30:31]
39  %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)* %ptr, i32 %mask)
40  ret i8 addrspace(1)* %masked
41}
42
43define i8 addrspace(1)* @v_ptrmask_global_variable_i16(i8 addrspace(1)* %ptr, i16 %mask) {
44; GCN-LABEL: v_ptrmask_global_variable_i16:
45; GCN:       ; %bb.0:
46; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47; GCN-NEXT:    v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
48; GCN-NEXT:    v_mov_b32_e32 v1, 0
49; GCN-NEXT:    s_setpc_b64 s[30:31]
50;
51; GFX10-LABEL: v_ptrmask_global_variable_i16:
52; GFX10:       ; %bb.0:
53; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
55; GFX10-NEXT:    v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
56; GFX10-NEXT:    v_mov_b32_e32 v1, 0
57; GFX10-NEXT:    s_setpc_b64 s[30:31]
58  %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)* %ptr, i16 %mask)
59  ret i8 addrspace(1)* %masked
60}
61
62define i8 addrspace(3)* @v_ptrmask_local_variable_i64(i8 addrspace(3)* %ptr, i64 %mask) {
63; GCN-LABEL: v_ptrmask_local_variable_i64:
64; GCN:       ; %bb.0:
65; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; GCN-NEXT:    v_and_b32_e32 v0, v0, v1
67; GCN-NEXT:    s_setpc_b64 s[30:31]
68;
69; GFX10-LABEL: v_ptrmask_local_variable_i64:
70; GFX10:       ; %bb.0:
71; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
73; GFX10-NEXT:    v_and_b32_e32 v0, v0, v1
74; GFX10-NEXT:    s_setpc_b64 s[30:31]
75  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask)
76  ret i8 addrspace(3)* %masked
77}
78
79define i8 addrspace(3)* @v_ptrmask_local_variable_i32(i8 addrspace(3)* %ptr, i32 %mask) {
80; GCN-LABEL: v_ptrmask_local_variable_i32:
81; GCN:       ; %bb.0:
82; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83; GCN-NEXT:    v_and_b32_e32 v0, v0, v1
84; GCN-NEXT:    s_setpc_b64 s[30:31]
85;
86; GFX10-LABEL: v_ptrmask_local_variable_i32:
87; GFX10:       ; %bb.0:
88; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
90; GFX10-NEXT:    v_and_b32_e32 v0, v0, v1
91; GFX10-NEXT:    s_setpc_b64 s[30:31]
92  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask)
93  ret i8 addrspace(3)* %masked
94}
95
96define i8 addrspace(3)* @v_ptrmask_local_variable_i16(i8 addrspace(3)* %ptr, i16 %mask) {
97; GCN-LABEL: v_ptrmask_local_variable_i16:
98; GCN:       ; %bb.0:
99; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GCN-NEXT:    v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
101; GCN-NEXT:    s_setpc_b64 s[30:31]
102;
103; GFX10-LABEL: v_ptrmask_local_variable_i16:
104; GFX10:       ; %bb.0:
105; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
107; GFX10-NEXT:    v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
108; GFX10-NEXT:    s_setpc_b64 s[30:31]
109  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask)
110  ret i8 addrspace(3)* %masked
111}
112
113define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i64(i8 addrspace(1)* inreg %ptr, i64 inreg %mask) {
114; GCN-LABEL: s_ptrmask_global_variable_i64:
115; GCN:       ; %bb.0:
116; GCN-NEXT:    s_and_b64 s[0:1], s[2:3], s[4:5]
117; GCN-NEXT:    ; return to shader part epilog
118;
119; GFX10-LABEL: s_ptrmask_global_variable_i64:
120; GFX10:       ; %bb.0:
121; GFX10-NEXT:    s_and_b64 s[0:1], s[2:3], s[4:5]
122; GFX10-NEXT:    ; return to shader part epilog
123  %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* %ptr, i64 %mask)
124  ret i8 addrspace(1)* %masked
125}
126
127define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i32(i8 addrspace(1)* inreg %ptr, i32 inreg %mask) {
128; GCN-LABEL: s_ptrmask_global_variable_i32:
129; GCN:       ; %bb.0:
130; GCN-NEXT:    s_mov_b32 s5, 0
131; GCN-NEXT:    s_and_b64 s[0:1], s[2:3], s[4:5]
132; GCN-NEXT:    s_mov_b32 s1, 0
133; GCN-NEXT:    ; return to shader part epilog
134;
135; GFX10-LABEL: s_ptrmask_global_variable_i32:
136; GFX10:       ; %bb.0:
137; GFX10-NEXT:    s_mov_b32 s5, 0
138; GFX10-NEXT:    s_and_b64 s[0:1], s[2:3], s[4:5]
139; GFX10-NEXT:    s_mov_b32 s1, 0
140; GFX10-NEXT:    ; return to shader part epilog
141  %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)* %ptr, i32 %mask)
142  ret i8 addrspace(1)* %masked
143}
144
145define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i16(i8 addrspace(1)* inreg %ptr, i16 inreg %mask) {
146; GCN-LABEL: s_ptrmask_global_variable_i16:
147; GCN:       ; %bb.0:
148; GCN-NEXT:    s_and_b32 s0, s4, 0xffff
149; GCN-NEXT:    s_mov_b32 s1, 0
150; GCN-NEXT:    s_and_b64 s[0:1], s[2:3], s[0:1]
151; GCN-NEXT:    s_mov_b32 s1, 0
152; GCN-NEXT:    ; return to shader part epilog
153;
154; GFX10-LABEL: s_ptrmask_global_variable_i16:
155; GFX10:       ; %bb.0:
156; GFX10-NEXT:    s_mov_b32 s1, 0
157; GFX10-NEXT:    s_and_b32 s0, s4, 0xffff
158; GFX10-NEXT:    s_and_b64 s[0:1], s[2:3], s[0:1]
159; GFX10-NEXT:    s_mov_b32 s1, 0
160; GFX10-NEXT:    ; return to shader part epilog
161  %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)* %ptr, i16 %mask)
162  ret i8 addrspace(1)* %masked
163}
164
165define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i64(i8 addrspace(3)* inreg %ptr, i64 inreg %mask) {
166; GCN-LABEL: s_ptrmask_local_variable_i64:
167; GCN:       ; %bb.0:
168; GCN-NEXT:    s_and_b32 s0, s2, s3
169; GCN-NEXT:    ; return to shader part epilog
170;
171; GFX10-LABEL: s_ptrmask_local_variable_i64:
172; GFX10:       ; %bb.0:
173; GFX10-NEXT:    s_and_b32 s0, s2, s3
174; GFX10-NEXT:    ; return to shader part epilog
175  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask)
176  ret i8 addrspace(3)* %masked
177}
178
179define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i32(i8 addrspace(3)* inreg %ptr, i32 inreg %mask) {
180; GCN-LABEL: s_ptrmask_local_variable_i32:
181; GCN:       ; %bb.0:
182; GCN-NEXT:    s_and_b32 s0, s2, s3
183; GCN-NEXT:    ; return to shader part epilog
184;
185; GFX10-LABEL: s_ptrmask_local_variable_i32:
186; GFX10:       ; %bb.0:
187; GFX10-NEXT:    s_and_b32 s0, s2, s3
188; GFX10-NEXT:    ; return to shader part epilog
189  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask)
190  ret i8 addrspace(3)* %masked
191}
192
193define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i16(i8 addrspace(3)* inreg %ptr, i16 inreg %mask) {
194; GCN-LABEL: s_ptrmask_local_variable_i16:
195; GCN:       ; %bb.0:
196; GCN-NEXT:    s_and_b32 s0, 0xffff, s3
197; GCN-NEXT:    s_and_b32 s0, s2, s0
198; GCN-NEXT:    ; return to shader part epilog
199;
200; GFX10-LABEL: s_ptrmask_local_variable_i16:
201; GFX10:       ; %bb.0:
202; GFX10-NEXT:    s_and_b32 s0, 0xffff, s3
203; GFX10-NEXT:    s_and_b32 s0, s2, s0
204; GFX10-NEXT:    ; return to shader part epilog
205  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask)
206  ret i8 addrspace(3)* %masked
207}
208
209declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)*, i64) #0
210declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)*, i32) #0
211declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)*, i16) #0
212declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)*, i64) #0
213declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)*, i32) #0
214declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)*, i16) #0
215
216attributes #0 = { nounwind readnone speculatable willreturn }
217