1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s 4 5define i8 addrspace(1)* @v_ptrmask_global_variable_i64(i8 addrspace(1)* %ptr, i64 %mask) { 6; GCN-LABEL: v_ptrmask_global_variable_i64: 7; GCN: ; %bb.0: 8; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9; GCN-NEXT: v_and_b32_e32 v1, v1, v3 10; GCN-NEXT: v_and_b32_e32 v0, v0, v2 11; GCN-NEXT: s_setpc_b64 s[30:31] 12; 13; GFX10-LABEL: v_ptrmask_global_variable_i64: 14; GFX10: ; %bb.0: 15; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 17; GFX10-NEXT: v_and_b32_e32 v0, v0, v2 18; GFX10-NEXT: v_and_b32_e32 v1, v1, v3 19; GFX10-NEXT: s_setpc_b64 s[30:31] 20 %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* %ptr, i64 %mask) 21 ret i8 addrspace(1)* %masked 22} 23 24define i8 addrspace(1)* @v_ptrmask_global_variable_i32(i8 addrspace(1)* %ptr, i32 %mask) { 25; GCN-LABEL: v_ptrmask_global_variable_i32: 26; GCN: ; %bb.0: 27; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; GCN-NEXT: v_and_b32_e32 v0, v0, v2 29; GCN-NEXT: v_mov_b32_e32 v1, 0 30; GCN-NEXT: s_setpc_b64 s[30:31] 31; 32; GFX10-LABEL: v_ptrmask_global_variable_i32: 33; GFX10: ; %bb.0: 34; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 36; GFX10-NEXT: v_and_b32_e32 v0, v0, v2 37; GFX10-NEXT: v_mov_b32_e32 v1, 0 38; GFX10-NEXT: s_setpc_b64 s[30:31] 39 %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)* %ptr, i32 %mask) 40 ret i8 addrspace(1)* %masked 41} 42 43define i8 addrspace(1)* @v_ptrmask_global_variable_i16(i8 addrspace(1)* %ptr, i16 %mask) { 44; GCN-LABEL: v_ptrmask_global_variable_i16: 45; GCN: ; %bb.0: 46; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 47; GCN-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 48; GCN-NEXT: v_mov_b32_e32 v1, 0 49; GCN-NEXT: s_setpc_b64 s[30:31] 50; 51; GFX10-LABEL: v_ptrmask_global_variable_i16: 52; GFX10: ; %bb.0: 53; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 54; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 55; GFX10-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 56; GFX10-NEXT: v_mov_b32_e32 v1, 0 57; GFX10-NEXT: s_setpc_b64 s[30:31] 58 %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)* %ptr, i16 %mask) 59 ret i8 addrspace(1)* %masked 60} 61 62define i8 addrspace(3)* @v_ptrmask_local_variable_i64(i8 addrspace(3)* %ptr, i64 %mask) { 63; GCN-LABEL: v_ptrmask_local_variable_i64: 64; GCN: ; %bb.0: 65; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; GCN-NEXT: v_and_b32_e32 v0, v0, v1 67; GCN-NEXT: s_setpc_b64 s[30:31] 68; 69; GFX10-LABEL: v_ptrmask_local_variable_i64: 70; GFX10: ; %bb.0: 71; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 72; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 73; GFX10-NEXT: v_and_b32_e32 v0, v0, v1 74; GFX10-NEXT: s_setpc_b64 s[30:31] 75 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask) 76 ret i8 addrspace(3)* %masked 77} 78 79define i8 addrspace(3)* @v_ptrmask_local_variable_i32(i8 addrspace(3)* %ptr, i32 %mask) { 80; GCN-LABEL: v_ptrmask_local_variable_i32: 81; GCN: ; %bb.0: 82; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 83; GCN-NEXT: v_and_b32_e32 v0, v0, v1 84; GCN-NEXT: s_setpc_b64 s[30:31] 85; 86; GFX10-LABEL: v_ptrmask_local_variable_i32: 87; GFX10: ; %bb.0: 88; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 90; GFX10-NEXT: v_and_b32_e32 v0, v0, v1 91; GFX10-NEXT: s_setpc_b64 s[30:31] 92 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask) 93 ret i8 addrspace(3)* %masked 94} 95 96define i8 addrspace(3)* @v_ptrmask_local_variable_i16(i8 addrspace(3)* %ptr, i16 %mask) { 97; GCN-LABEL: v_ptrmask_local_variable_i16: 98; GCN: ; %bb.0: 99; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GCN-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 101; GCN-NEXT: s_setpc_b64 s[30:31] 102; 103; GFX10-LABEL: v_ptrmask_local_variable_i16: 104; GFX10: ; %bb.0: 105; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 107; GFX10-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 108; GFX10-NEXT: s_setpc_b64 s[30:31] 109 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask) 110 ret i8 addrspace(3)* %masked 111} 112 113define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i64(i8 addrspace(1)* inreg %ptr, i64 inreg %mask) { 114; GCN-LABEL: s_ptrmask_global_variable_i64: 115; GCN: ; %bb.0: 116; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 117; GCN-NEXT: ; return to shader part epilog 118; 119; GFX10-LABEL: s_ptrmask_global_variable_i64: 120; GFX10: ; %bb.0: 121; GFX10-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 122; GFX10-NEXT: ; return to shader part epilog 123 %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* %ptr, i64 %mask) 124 ret i8 addrspace(1)* %masked 125} 126 127define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i32(i8 addrspace(1)* inreg %ptr, i32 inreg %mask) { 128; GCN-LABEL: s_ptrmask_global_variable_i32: 129; GCN: ; %bb.0: 130; GCN-NEXT: s_mov_b32 s5, 0 131; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 132; GCN-NEXT: s_mov_b32 s1, 0 133; GCN-NEXT: ; return to shader part epilog 134; 135; GFX10-LABEL: s_ptrmask_global_variable_i32: 136; GFX10: ; %bb.0: 137; GFX10-NEXT: s_mov_b32 s5, 0 138; GFX10-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 139; GFX10-NEXT: s_mov_b32 s1, 0 140; GFX10-NEXT: ; return to shader part epilog 141 %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)* %ptr, i32 %mask) 142 ret i8 addrspace(1)* %masked 143} 144 145define amdgpu_ps i8 addrspace(1)* @s_ptrmask_global_variable_i16(i8 addrspace(1)* inreg %ptr, i16 inreg %mask) { 146; GCN-LABEL: s_ptrmask_global_variable_i16: 147; GCN: ; %bb.0: 148; GCN-NEXT: s_and_b32 s0, s4, 0xffff 149; GCN-NEXT: s_mov_b32 s1, 0 150; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 151; GCN-NEXT: s_mov_b32 s1, 0 152; GCN-NEXT: ; return to shader part epilog 153; 154; GFX10-LABEL: s_ptrmask_global_variable_i16: 155; GFX10: ; %bb.0: 156; GFX10-NEXT: s_mov_b32 s1, 0 157; GFX10-NEXT: s_and_b32 s0, s4, 0xffff 158; GFX10-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 159; GFX10-NEXT: s_mov_b32 s1, 0 160; GFX10-NEXT: ; return to shader part epilog 161 %masked = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)* %ptr, i16 %mask) 162 ret i8 addrspace(1)* %masked 163} 164 165define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i64(i8 addrspace(3)* inreg %ptr, i64 inreg %mask) { 166; GCN-LABEL: s_ptrmask_local_variable_i64: 167; GCN: ; %bb.0: 168; GCN-NEXT: s_and_b32 s0, s2, s3 169; GCN-NEXT: ; return to shader part epilog 170; 171; GFX10-LABEL: s_ptrmask_local_variable_i64: 172; GFX10: ; %bb.0: 173; GFX10-NEXT: s_and_b32 s0, s2, s3 174; GFX10-NEXT: ; return to shader part epilog 175 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask) 176 ret i8 addrspace(3)* %masked 177} 178 179define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i32(i8 addrspace(3)* inreg %ptr, i32 inreg %mask) { 180; GCN-LABEL: s_ptrmask_local_variable_i32: 181; GCN: ; %bb.0: 182; GCN-NEXT: s_and_b32 s0, s2, s3 183; GCN-NEXT: ; return to shader part epilog 184; 185; GFX10-LABEL: s_ptrmask_local_variable_i32: 186; GFX10: ; %bb.0: 187; GFX10-NEXT: s_and_b32 s0, s2, s3 188; GFX10-NEXT: ; return to shader part epilog 189 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask) 190 ret i8 addrspace(3)* %masked 191} 192 193define amdgpu_ps i8 addrspace(3)* @s_ptrmask_local_variable_i16(i8 addrspace(3)* inreg %ptr, i16 inreg %mask) { 194; GCN-LABEL: s_ptrmask_local_variable_i16: 195; GCN: ; %bb.0: 196; GCN-NEXT: s_and_b32 s0, 0xffff, s3 197; GCN-NEXT: s_and_b32 s0, s2, s0 198; GCN-NEXT: ; return to shader part epilog 199; 200; GFX10-LABEL: s_ptrmask_local_variable_i16: 201; GFX10: ; %bb.0: 202; GFX10-NEXT: s_and_b32 s0, 0xffff, s3 203; GFX10-NEXT: s_and_b32 s0, s2, s0 204; GFX10-NEXT: ; return to shader part epilog 205 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask) 206 ret i8 addrspace(3)* %masked 207} 208 209declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)*, i64) #0 210declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)*, i32) #0 211declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)*, i16) #0 212declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)*, i64) #0 213declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i32(i8 addrspace(1)*, i32) #0 214declare i8 addrspace(1)* @llvm.ptrmask.p1i8.i16(i8 addrspace(1)*, i16) #0 215 216attributes #0 = { nounwind readnone speculatable willreturn } 217