1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 4 5define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 6; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw: 7; GFX6: ; %bb.0: 8; GFX6-NEXT: s_mov_b32 s0, s2 9; GFX6-NEXT: s_mov_b32 s1, s3 10; GFX6-NEXT: s_mov_b32 s2, s4 11; GFX6-NEXT: s_mov_b32 s3, s5 12; GFX6-NEXT: s_mov_b32 s4, s6 13; GFX6-NEXT: s_mov_b32 s5, s7 14; GFX6-NEXT: s_mov_b32 s6, s8 15; GFX6-NEXT: s_mov_b32 s7, s9 16; GFX6-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da 17; GFX6-NEXT: s_waitcnt vmcnt(0) 18; GFX6-NEXT: ; return to shader part epilog 19; 20; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw: 21; GFX10: ; %bb.0: 22; GFX10-NEXT: s_mov_b32 s0, s2 23; GFX10-NEXT: s_mov_b32 s1, s3 24; GFX10-NEXT: s_mov_b32 s2, s4 25; GFX10-NEXT: s_mov_b32 s3, s5 26; GFX10-NEXT: s_mov_b32 s4, s6 27; GFX10-NEXT: s_mov_b32 s5, s7 28; GFX10-NEXT: s_mov_b32 s6, s8 29; GFX10-NEXT: s_mov_b32 s7, s9 30; GFX10-NEXT: ; implicit-def: $vcc_hi 31; GFX10-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm 32; GFX10-NEXT: s_waitcnt vmcnt(0) 33; GFX10-NEXT: ; return to shader part epilog 34 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 35 ret <4 x float> %v 36} 37 38define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 39; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 40; GFX6: ; %bb.0: 41; GFX6-NEXT: s_mov_b32 s0, s2 42; GFX6-NEXT: s_mov_b32 s1, s3 43; GFX6-NEXT: s_mov_b32 s2, s4 44; GFX6-NEXT: s_mov_b32 s3, s5 45; GFX6-NEXT: s_mov_b32 s4, s6 46; GFX6-NEXT: s_mov_b32 s5, s7 47; GFX6-NEXT: s_mov_b32 s6, s8 48; GFX6-NEXT: s_mov_b32 s7, s9 49; GFX6-NEXT: image_load v[0:4], v[0:3], s[0:7] dmask:0xf unorm tfe da 50; GFX6-NEXT: s_mov_b32 s8, s10 51; GFX6-NEXT: s_mov_b32 s9, s11 52; GFX6-NEXT: s_mov_b32 s10, -1 53; GFX6-NEXT: s_mov_b32 s11, 0xf000 54; GFX6-NEXT: s_waitcnt vmcnt(0) 55; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 56; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 57; GFX6-NEXT: ; return to shader part epilog 58; 59; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 60; GFX10: ; %bb.0: 61; GFX10-NEXT: s_mov_b32 s0, s2 62; GFX10-NEXT: s_mov_b32 s1, s3 63; GFX10-NEXT: s_mov_b32 s2, s4 64; GFX10-NEXT: s_mov_b32 s3, s5 65; GFX10-NEXT: s_mov_b32 s4, s6 66; GFX10-NEXT: s_mov_b32 s5, s7 67; GFX10-NEXT: s_mov_b32 s6, s8 68; GFX10-NEXT: s_mov_b32 s7, s9 69; GFX10-NEXT: v_mov_b32_e32 v5, 0 70; GFX10-NEXT: image_load v[0:4], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe 71; GFX10-NEXT: ; implicit-def: $vcc_hi 72; GFX10-NEXT: s_waitcnt vmcnt(0) 73; GFX10-NEXT: global_store_dword v5, v4, s[10:11] 74; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 75; GFX10-NEXT: ; return to shader part epilog 76 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) 77 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 78 %v.err = extractvalue { <4 x float>, i32 } %v, 1 79 store i32 %v.err, i32 addrspace(1)* %out, align 4 80 ret <4 x float> %v.vec 81} 82 83define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 84; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 85; GFX6: ; %bb.0: 86; GFX6-NEXT: s_mov_b32 s0, s2 87; GFX6-NEXT: s_mov_b32 s1, s3 88; GFX6-NEXT: s_mov_b32 s2, s4 89; GFX6-NEXT: s_mov_b32 s3, s5 90; GFX6-NEXT: s_mov_b32 s4, s6 91; GFX6-NEXT: s_mov_b32 s5, s7 92; GFX6-NEXT: s_mov_b32 s6, s8 93; GFX6-NEXT: s_mov_b32 s7, s9 94; GFX6-NEXT: image_load v[0:4], v[0:3], s[0:7] dmask:0xf unorm tfe lwe da 95; GFX6-NEXT: s_mov_b32 s8, s10 96; GFX6-NEXT: s_mov_b32 s9, s11 97; GFX6-NEXT: s_mov_b32 s10, -1 98; GFX6-NEXT: s_mov_b32 s11, 0xf000 99; GFX6-NEXT: s_waitcnt vmcnt(0) 100; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 101; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 102; GFX6-NEXT: ; return to shader part epilog 103; 104; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 105; GFX10: ; %bb.0: 106; GFX10-NEXT: s_mov_b32 s0, s2 107; GFX10-NEXT: s_mov_b32 s1, s3 108; GFX10-NEXT: s_mov_b32 s2, s4 109; GFX10-NEXT: s_mov_b32 s3, s5 110; GFX10-NEXT: s_mov_b32 s4, s6 111; GFX10-NEXT: s_mov_b32 s5, s7 112; GFX10-NEXT: s_mov_b32 s6, s8 113; GFX10-NEXT: s_mov_b32 s7, s9 114; GFX10-NEXT: v_mov_b32_e32 v5, 0 115; GFX10-NEXT: image_load v[0:4], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe 116; GFX10-NEXT: ; implicit-def: $vcc_hi 117; GFX10-NEXT: s_waitcnt vmcnt(0) 118; GFX10-NEXT: global_store_dword v5, v4, s[10:11] 119; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 120; GFX10-NEXT: ; return to shader part epilog 121 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0) 122 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 123 %v.err = extractvalue { <4 x float>, i32 } %v, 1 124 store i32 %v.err, i32 addrspace(1)* %out, align 4 125 ret <4 x float> %v.vec 126} 127 128declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 129declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 130 131attributes #0 = { nounwind readonly } 132