1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 5 6define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 7; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw: 8; GFX6: ; %bb.0: 9; GFX6-NEXT: s_mov_b32 s0, s2 10; GFX6-NEXT: s_mov_b32 s1, s3 11; GFX6-NEXT: s_mov_b32 s2, s4 12; GFX6-NEXT: s_mov_b32 s3, s5 13; GFX6-NEXT: s_mov_b32 s4, s6 14; GFX6-NEXT: s_mov_b32 s5, s7 15; GFX6-NEXT: s_mov_b32 s6, s8 16; GFX6-NEXT: s_mov_b32 s7, s9 17; GFX6-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da 18; GFX6-NEXT: s_waitcnt vmcnt(0) 19; GFX6-NEXT: ; return to shader part epilog 20; 21; GFX10PLUS-LABEL: load_2darraymsaa_v4f32_xyzw: 22; GFX10PLUS: ; %bb.0: 23; GFX10PLUS-NEXT: s_mov_b32 s0, s2 24; GFX10PLUS-NEXT: s_mov_b32 s1, s3 25; GFX10PLUS-NEXT: s_mov_b32 s2, s4 26; GFX10PLUS-NEXT: s_mov_b32 s3, s5 27; GFX10PLUS-NEXT: s_mov_b32 s4, s6 28; GFX10PLUS-NEXT: s_mov_b32 s5, s7 29; GFX10PLUS-NEXT: s_mov_b32 s6, s8 30; GFX10PLUS-NEXT: s_mov_b32 s7, s9 31; GFX10PLUS-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm 32; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 33; GFX10PLUS-NEXT: ; return to shader part epilog 34 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 35 ret <4 x float> %v 36} 37 38define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 39; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 40; GFX6: ; %bb.0: 41; GFX6-NEXT: v_mov_b32_e32 v5, v0 42; GFX6-NEXT: v_mov_b32_e32 v0, 0 43; GFX6-NEXT: s_mov_b32 s0, s2 44; GFX6-NEXT: s_mov_b32 s1, s3 45; GFX6-NEXT: s_mov_b32 s2, s4 46; GFX6-NEXT: s_mov_b32 s3, s5 47; GFX6-NEXT: s_mov_b32 s4, s6 48; GFX6-NEXT: s_mov_b32 s5, s7 49; GFX6-NEXT: s_mov_b32 s6, s8 50; GFX6-NEXT: s_mov_b32 s7, s9 51; GFX6-NEXT: v_mov_b32_e32 v6, v1 52; GFX6-NEXT: v_mov_b32_e32 v7, v2 53; GFX6-NEXT: v_mov_b32_e32 v8, v3 54; GFX6-NEXT: v_mov_b32_e32 v1, v0 55; GFX6-NEXT: v_mov_b32_e32 v2, v0 56; GFX6-NEXT: v_mov_b32_e32 v3, v0 57; GFX6-NEXT: v_mov_b32_e32 v4, v0 58; GFX6-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe da 59; GFX6-NEXT: s_mov_b32 s8, s10 60; GFX6-NEXT: s_mov_b32 s9, s11 61; GFX6-NEXT: s_mov_b32 s10, -1 62; GFX6-NEXT: s_mov_b32 s11, 0xf000 63; GFX6-NEXT: s_waitcnt vmcnt(0) 64; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 65; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 66; GFX6-NEXT: ; return to shader part epilog 67; 68; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 69; GFX10: ; %bb.0: 70; GFX10-NEXT: v_mov_b32_e32 v9, 0 71; GFX10-NEXT: v_mov_b32_e32 v5, v0 72; GFX10-NEXT: v_mov_b32_e32 v6, v1 73; GFX10-NEXT: v_mov_b32_e32 v7, v2 74; GFX10-NEXT: v_mov_b32_e32 v8, v3 75; GFX10-NEXT: v_mov_b32_e32 v10, v9 76; GFX10-NEXT: v_mov_b32_e32 v11, v9 77; GFX10-NEXT: v_mov_b32_e32 v12, v9 78; GFX10-NEXT: v_mov_b32_e32 v13, v9 79; GFX10-NEXT: s_mov_b32 s0, s2 80; GFX10-NEXT: s_mov_b32 s1, s3 81; GFX10-NEXT: s_mov_b32 s2, s4 82; GFX10-NEXT: s_mov_b32 s3, s5 83; GFX10-NEXT: s_mov_b32 s4, s6 84; GFX10-NEXT: s_mov_b32 s5, s7 85; GFX10-NEXT: s_mov_b32 s6, s8 86; GFX10-NEXT: s_mov_b32 s7, s9 87; GFX10-NEXT: v_mov_b32_e32 v0, v9 88; GFX10-NEXT: v_mov_b32_e32 v1, v10 89; GFX10-NEXT: v_mov_b32_e32 v2, v11 90; GFX10-NEXT: v_mov_b32_e32 v3, v12 91; GFX10-NEXT: v_mov_b32_e32 v4, v13 92; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe 93; GFX10-NEXT: s_waitcnt vmcnt(0) 94; GFX10-NEXT: global_store_dword v9, v4, s[10:11] 95; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 96; GFX10-NEXT: ; return to shader part epilog 97; 98; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 99; GFX11: ; %bb.0: 100; GFX11-NEXT: v_mov_b32_e32 v9, 0 101; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 102; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v8, v3 103; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 104; GFX11-NEXT: v_mov_b32_e32 v11, v9 105; GFX11-NEXT: v_mov_b32_e32 v13, v9 106; GFX11-NEXT: v_mov_b32_e32 v12, v9 107; GFX11-NEXT: v_mov_b32_e32 v10, v9 108; GFX11-NEXT: s_mov_b32 s0, s2 109; GFX11-NEXT: s_mov_b32 s1, s3 110; GFX11-NEXT: s_mov_b32 s2, s4 111; GFX11-NEXT: s_mov_b32 s3, s5 112; GFX11-NEXT: s_mov_b32 s4, s6 113; GFX11-NEXT: s_mov_b32 s5, s7 114; GFX11-NEXT: s_mov_b32 s6, s8 115; GFX11-NEXT: s_mov_b32 s7, s9 116; GFX11-NEXT: v_mov_b32_e32 v0, v9 117; GFX11-NEXT: v_dual_mov_b32 v2, v11 :: v_dual_mov_b32 v3, v12 118; GFX11-NEXT: v_dual_mov_b32 v1, v10 :: v_dual_mov_b32 v4, v13 119; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe 120; GFX11-NEXT: s_waitcnt vmcnt(0) 121; GFX11-NEXT: global_store_b32 v9, v4, s[10:11] 122; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 123; GFX11-NEXT: ; return to shader part epilog 124 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) 125 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 126 %v.err = extractvalue { <4 x float>, i32 } %v, 1 127 store i32 %v.err, i32 addrspace(1)* %out, align 4 128 ret <4 x float> %v.vec 129} 130 131define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 132; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 133; GFX6: ; %bb.0: 134; GFX6-NEXT: v_mov_b32_e32 v5, v0 135; GFX6-NEXT: v_mov_b32_e32 v0, 0 136; GFX6-NEXT: s_mov_b32 s0, s2 137; GFX6-NEXT: s_mov_b32 s1, s3 138; GFX6-NEXT: s_mov_b32 s2, s4 139; GFX6-NEXT: s_mov_b32 s3, s5 140; GFX6-NEXT: s_mov_b32 s4, s6 141; GFX6-NEXT: s_mov_b32 s5, s7 142; GFX6-NEXT: s_mov_b32 s6, s8 143; GFX6-NEXT: s_mov_b32 s7, s9 144; GFX6-NEXT: v_mov_b32_e32 v6, v1 145; GFX6-NEXT: v_mov_b32_e32 v7, v2 146; GFX6-NEXT: v_mov_b32_e32 v8, v3 147; GFX6-NEXT: v_mov_b32_e32 v1, v0 148; GFX6-NEXT: v_mov_b32_e32 v2, v0 149; GFX6-NEXT: v_mov_b32_e32 v3, v0 150; GFX6-NEXT: v_mov_b32_e32 v4, v0 151; GFX6-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe lwe da 152; GFX6-NEXT: s_mov_b32 s8, s10 153; GFX6-NEXT: s_mov_b32 s9, s11 154; GFX6-NEXT: s_mov_b32 s10, -1 155; GFX6-NEXT: s_mov_b32 s11, 0xf000 156; GFX6-NEXT: s_waitcnt vmcnt(0) 157; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 158; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 159; GFX6-NEXT: ; return to shader part epilog 160; 161; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 162; GFX10: ; %bb.0: 163; GFX10-NEXT: v_mov_b32_e32 v9, 0 164; GFX10-NEXT: v_mov_b32_e32 v5, v0 165; GFX10-NEXT: v_mov_b32_e32 v6, v1 166; GFX10-NEXT: v_mov_b32_e32 v7, v2 167; GFX10-NEXT: v_mov_b32_e32 v8, v3 168; GFX10-NEXT: v_mov_b32_e32 v10, v9 169; GFX10-NEXT: v_mov_b32_e32 v11, v9 170; GFX10-NEXT: v_mov_b32_e32 v12, v9 171; GFX10-NEXT: v_mov_b32_e32 v13, v9 172; GFX10-NEXT: s_mov_b32 s0, s2 173; GFX10-NEXT: s_mov_b32 s1, s3 174; GFX10-NEXT: s_mov_b32 s2, s4 175; GFX10-NEXT: s_mov_b32 s3, s5 176; GFX10-NEXT: s_mov_b32 s4, s6 177; GFX10-NEXT: s_mov_b32 s5, s7 178; GFX10-NEXT: s_mov_b32 s6, s8 179; GFX10-NEXT: s_mov_b32 s7, s9 180; GFX10-NEXT: v_mov_b32_e32 v0, v9 181; GFX10-NEXT: v_mov_b32_e32 v1, v10 182; GFX10-NEXT: v_mov_b32_e32 v2, v11 183; GFX10-NEXT: v_mov_b32_e32 v3, v12 184; GFX10-NEXT: v_mov_b32_e32 v4, v13 185; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe 186; GFX10-NEXT: s_waitcnt vmcnt(0) 187; GFX10-NEXT: global_store_dword v9, v4, s[10:11] 188; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 189; GFX10-NEXT: ; return to shader part epilog 190; 191; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 192; GFX11: ; %bb.0: 193; GFX11-NEXT: v_mov_b32_e32 v9, 0 194; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 195; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v8, v3 196; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 197; GFX11-NEXT: v_mov_b32_e32 v11, v9 198; GFX11-NEXT: v_mov_b32_e32 v13, v9 199; GFX11-NEXT: v_mov_b32_e32 v12, v9 200; GFX11-NEXT: v_mov_b32_e32 v10, v9 201; GFX11-NEXT: s_mov_b32 s0, s2 202; GFX11-NEXT: s_mov_b32 s1, s3 203; GFX11-NEXT: s_mov_b32 s2, s4 204; GFX11-NEXT: s_mov_b32 s3, s5 205; GFX11-NEXT: s_mov_b32 s4, s6 206; GFX11-NEXT: s_mov_b32 s5, s7 207; GFX11-NEXT: s_mov_b32 s6, s8 208; GFX11-NEXT: s_mov_b32 s7, s9 209; GFX11-NEXT: v_mov_b32_e32 v0, v9 210; GFX11-NEXT: v_dual_mov_b32 v2, v11 :: v_dual_mov_b32 v3, v12 211; GFX11-NEXT: v_dual_mov_b32 v1, v10 :: v_dual_mov_b32 v4, v13 212; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe 213; GFX11-NEXT: s_waitcnt vmcnt(0) 214; GFX11-NEXT: global_store_b32 v9, v4, s[10:11] 215; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 216; GFX11-NEXT: ; return to shader part epilog 217 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0) 218 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 219 %v.err = extractvalue { <4 x float>, i32 } %v, 1 220 store i32 %v.err, i32 addrspace(1)* %out, align 4 221 ret <4 x float> %v.vec 222} 223 224declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 225declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 226 227attributes #0 = { nounwind readonly } 228