1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 5 6define i8 addrspace(1)* @v_ptrmask_global_variable_i64(i8 addrspace(1)* %ptr, i64 %mask) { 7; GCN-LABEL: v_ptrmask_global_variable_i64: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_and_b32_e32 v1, v1, v3 11; GCN-NEXT: v_and_b32_e32 v0, v0, v2 12; GCN-NEXT: s_setpc_b64 s[30:31] 13; 14; GFX10PLUS-LABEL: v_ptrmask_global_variable_i64: 15; GFX10PLUS: ; %bb.0: 16; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 18; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2 19; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3 20; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 21 %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* %ptr, i64 %mask) 22 ret i8 addrspace(1)* %masked 23} 24 25define i8 addrspace(1)* @v_ptrmask_global_variable_i32(i8 addrspace(1)* %ptr, i32 %mask) { 26; GCN-LABEL: v_ptrmask_global_variable_i32: 27; GCN: ; %bb.0: 28; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29; GCN-NEXT: v_and_b32_e32 v0, v0, v2 30; GCN-NEXT: v_mov_b32_e32 v1, 0 31; GCN-NEXT: s_setpc_b64 s[30:31] 32; 33; GFX10-LABEL: v_ptrmask_global_variable_i32: 34; GFX10: ; %bb.0: 35; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 37; GFX10-NEXT: v_and_b32_e32 v0, v0, v2 38; GFX10-NEXT: v_mov_b32_e32 v1, 0 39; GFX10-NEXT: s_setpc_b64 s[30:31] 40; 41; GFX11-LABEL: v_ptrmask_global_variable_i32: 42; GFX11: ; %bb.0: 43; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 45; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, v0, v2 46; GFX11-NEXT: s_setpc_b64 s[30:31] 47 %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)* %ptr, i32 %mask) 48 ret i8 addrspace(1)* %masked 49} 50 51define i8 addrspace(1)* @v_ptrmask_global_variable_i16(i8 addrspace(1)* %ptr, i16 %mask) { 52; GCN-LABEL: v_ptrmask_global_variable_i16: 53; GCN: ; %bb.0: 54; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 55; GCN-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 56; GCN-NEXT: v_mov_b32_e32 v1, 0 57; GCN-NEXT: s_setpc_b64 s[30:31] 58; 59; GFX10-LABEL: v_ptrmask_global_variable_i16: 60; GFX10: ; %bb.0: 61; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 63; GFX10-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 64; GFX10-NEXT: v_mov_b32_e32 v1, 0 65; GFX10-NEXT: s_setpc_b64 s[30:31] 66; 67; GFX11-LABEL: v_ptrmask_global_variable_i16: 68; GFX11: ; %bb.0: 69; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 70; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 71; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v2 72; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, v0, v1 73; GFX11-NEXT: s_setpc_b64 s[30:31] 74 %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)* %ptr, i16 %mask) 75 ret i8 addrspace(1)* %masked 76} 77 78define i8 addrspace(3)* @v_ptrmask_local_variable_i64(i8 addrspace(3)* %ptr, i64 %mask) { 79; GCN-LABEL: v_ptrmask_local_variable_i64: 80; GCN: ; %bb.0: 81; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82; GCN-NEXT: v_and_b32_e32 v0, v0, v1 83; GCN-NEXT: s_setpc_b64 s[30:31] 84; 85; GFX10PLUS-LABEL: v_ptrmask_local_variable_i64: 86; GFX10PLUS: ; %bb.0: 87; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 89; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1 90; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 91 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask) 92 ret i8 addrspace(3)* %masked 93} 94 95define i8 addrspace(3)* @v_ptrmask_local_variable_i32(i8 addrspace(3)* %ptr, i32 %mask) { 96; GCN-LABEL: v_ptrmask_local_variable_i32: 97; GCN: ; %bb.0: 98; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 99; GCN-NEXT: v_and_b32_e32 v0, v0, v1 100; GCN-NEXT: s_setpc_b64 s[30:31] 101; 102; GFX10PLUS-LABEL: v_ptrmask_local_variable_i32: 103; GFX10PLUS: ; %bb.0: 104; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 105; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 106; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1 107; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 108 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask) 109 ret i8 addrspace(3)* %masked 110} 111 112define i8 addrspace(3)* @v_ptrmask_local_variable_i16(i8 addrspace(3)* %ptr, i16 %mask) { 113; GCN-LABEL: v_ptrmask_local_variable_i16: 114; GCN: ; %bb.0: 115; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 116; GCN-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 117; GCN-NEXT: s_setpc_b64 s[30:31] 118; 119; GFX10-LABEL: v_ptrmask_local_variable_i16: 120; GFX10: ; %bb.0: 121; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 122; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 123; GFX10-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 124; GFX10-NEXT: s_setpc_b64 s[30:31] 125; 126; GFX11-LABEL: v_ptrmask_local_variable_i16: 127; GFX11: ; %bb.0: 128; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 130; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 131; GFX11-NEXT: v_and_b32_e32 v0, v0, v1 132; GFX11-NEXT: s_setpc_b64 s[30:31] 133 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask) 134 ret i8 addrspace(3)* %masked 135} 136 137define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i64(i8 addrspace(1)* inreg %ptr, i64 inreg %mask) { 138; GCN-LABEL: s_ptrmask_global_variable_i64: 139; GCN: ; %bb.0: 140; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 141; GCN-NEXT: ; return to shader part epilog 142; 143; GFX10PLUS-LABEL: s_ptrmask_global_variable_i64: 144; GFX10PLUS: ; %bb.0: 145; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 146; GFX10PLUS-NEXT: ; return to shader part epilog 147 %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* %ptr, i64 %mask) 148 ret i8 addrspace(1)* %masked 149} 150 151define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i32(i8 addrspace(1)* inreg %ptr, i32 inreg %mask) { 152; GCN-LABEL: s_ptrmask_global_variable_i32: 153; GCN: ; %bb.0: 154; GCN-NEXT: s_mov_b32 s5, 0 155; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 156; GCN-NEXT: s_mov_b32 s1, 0 157; GCN-NEXT: ; return to shader part epilog 158; 159; GFX10PLUS-LABEL: s_ptrmask_global_variable_i32: 160; GFX10PLUS: ; %bb.0: 161; GFX10PLUS-NEXT: s_mov_b32 s5, 0 162; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 163; GFX10PLUS-NEXT: s_mov_b32 s1, 0 164; GFX10PLUS-NEXT: ; return to shader part epilog 165 %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)* %ptr, i32 %mask) 166 ret i8 addrspace(1)* %masked 167} 168 169define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i16(i8 addrspace(1)* inreg %ptr, i16 inreg %mask) { 170; GCN-LABEL: s_ptrmask_global_variable_i16: 171; GCN: ; %bb.0: 172; GCN-NEXT: s_and_b32 s0, s4, 0xffff 173; GCN-NEXT: s_mov_b32 s1, 0 174; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 175; GCN-NEXT: s_mov_b32 s1, 0 176; GCN-NEXT: ; return to shader part epilog 177; 178; GFX10PLUS-LABEL: s_ptrmask_global_variable_i16: 179; GFX10PLUS: ; %bb.0: 180; GFX10PLUS-NEXT: s_mov_b32 s1, 0 181; GFX10PLUS-NEXT: s_and_b32 s0, s4, 0xffff 182; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 183; GFX10PLUS-NEXT: s_mov_b32 s1, 0 184; GFX10PLUS-NEXT: ; return to shader part epilog 185 %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)* %ptr, i16 %mask) 186 ret i8 addrspace(1)* %masked 187} 188 189define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i64(i8 addrspace(3)* inreg %ptr, i64 inreg %mask) { 190; GCN-LABEL: s_ptrmask_local_variable_i64: 191; GCN: ; %bb.0: 192; GCN-NEXT: s_and_b32 s0, s2, s3 193; GCN-NEXT: ; return to shader part epilog 194; 195; GFX10PLUS-LABEL: s_ptrmask_local_variable_i64: 196; GFX10PLUS: ; %bb.0: 197; GFX10PLUS-NEXT: s_and_b32 s0, s2, s3 198; GFX10PLUS-NEXT: ; return to shader part epilog 199 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask) 200 ret i8 addrspace(3)* %masked 201} 202 203define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i32(i8 addrspace(3)* inreg %ptr, i32 inreg %mask) { 204; GCN-LABEL: s_ptrmask_local_variable_i32: 205; GCN: ; %bb.0: 206; GCN-NEXT: s_and_b32 s0, s2, s3 207; GCN-NEXT: ; return to shader part epilog 208; 209; GFX10PLUS-LABEL: s_ptrmask_local_variable_i32: 210; GFX10PLUS: ; %bb.0: 211; GFX10PLUS-NEXT: s_and_b32 s0, s2, s3 212; GFX10PLUS-NEXT: ; return to shader part epilog 213 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask) 214 ret i8 addrspace(3)* %masked 215} 216 217define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i16(i8 addrspace(3)* inreg %ptr, i16 inreg %mask) { 218; GCN-LABEL: s_ptrmask_local_variable_i16: 219; GCN: ; %bb.0: 220; GCN-NEXT: s_and_b32 s0, 0xffff, s3 221; GCN-NEXT: s_and_b32 s0, s2, s0 222; GCN-NEXT: ; return to shader part epilog 223; 224; GFX10PLUS-LABEL: s_ptrmask_local_variable_i16: 225; GFX10PLUS: ; %bb.0: 226; GFX10PLUS-NEXT: s_and_b32 s0, 0xffff, s3 227; GFX10PLUS-NEXT: s_and_b32 s0, s2, s0 228; GFX10PLUS-NEXT: ; return to shader part epilog 229 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask) 230 ret i8 addrspace(3)* %masked 231} 232 233declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)*, i64) #0 234declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)*, i32) #0 235declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)*, i16) #0 236declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)*, i64) #0 237declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)*, i32) #0 238declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)*, i16) #0 239 240attributes #0 = { nounwind readnone speculatable willreturn } 241