1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 5 6define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 7; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw: 8; GFX6: ; %bb.0: 9; GFX6-NEXT: s_mov_b32 s0, s2 10; GFX6-NEXT: s_mov_b32 s1, s3 11; GFX6-NEXT: s_mov_b32 s2, s4 12; GFX6-NEXT: s_mov_b32 s3, s5 13; GFX6-NEXT: s_mov_b32 s4, s6 14; GFX6-NEXT: s_mov_b32 s5, s7 15; GFX6-NEXT: s_mov_b32 s6, s8 16; GFX6-NEXT: s_mov_b32 s7, s9 17; GFX6-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da 18; GFX6-NEXT: s_waitcnt vmcnt(0) 19; GFX6-NEXT: ; return to shader part epilog 20; 21; GFX10PLUS-LABEL: load_2darraymsaa_v4f32_xyzw: 22; GFX10PLUS: ; %bb.0: 23; GFX10PLUS-NEXT: s_mov_b32 s0, s2 24; GFX10PLUS-NEXT: s_mov_b32 s1, s3 25; GFX10PLUS-NEXT: s_mov_b32 s2, s4 26; GFX10PLUS-NEXT: s_mov_b32 s3, s5 27; GFX10PLUS-NEXT: s_mov_b32 s4, s6 28; GFX10PLUS-NEXT: s_mov_b32 s5, s7 29; GFX10PLUS-NEXT: s_mov_b32 s6, s8 30; GFX10PLUS-NEXT: s_mov_b32 s7, s9 31; GFX10PLUS-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm 32; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 33; GFX10PLUS-NEXT: ; return to shader part epilog 34 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 35 ret <4 x float> %v 36} 37 38define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 39; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 40; GFX6: ; %bb.0: 41; GFX6-NEXT: v_mov_b32_e32 v5, v0 42; GFX6-NEXT: v_mov_b32_e32 v0, 0 43; GFX6-NEXT: s_mov_b32 s0, s2 44; GFX6-NEXT: s_mov_b32 s1, s3 45; GFX6-NEXT: s_mov_b32 s2, s4 46; GFX6-NEXT: s_mov_b32 s3, s5 47; GFX6-NEXT: s_mov_b32 s4, s6 48; GFX6-NEXT: s_mov_b32 s5, s7 49; GFX6-NEXT: s_mov_b32 s6, s8 50; GFX6-NEXT: s_mov_b32 s7, s9 51; GFX6-NEXT: v_mov_b32_e32 v6, v1 52; GFX6-NEXT: v_mov_b32_e32 v7, v2 53; GFX6-NEXT: v_mov_b32_e32 v8, v3 54; GFX6-NEXT: v_mov_b32_e32 v1, v0 55; GFX6-NEXT: v_mov_b32_e32 v2, v0 56; GFX6-NEXT: v_mov_b32_e32 v3, v0 57; GFX6-NEXT: v_mov_b32_e32 v4, v0 58; GFX6-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe da 59; GFX6-NEXT: s_mov_b32 s8, s10 60; GFX6-NEXT: s_mov_b32 s9, s11 61; GFX6-NEXT: s_mov_b32 s10, -1 62; GFX6-NEXT: s_mov_b32 s11, 0xf000 63; GFX6-NEXT: s_waitcnt vmcnt(0) 64; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 65; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 66; GFX6-NEXT: ; return to shader part epilog 67; 68; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 69; GFX10: ; %bb.0: 70; GFX10-NEXT: v_mov_b32_e32 v9, 0 71; GFX10-NEXT: v_mov_b32_e32 v5, v0 72; GFX10-NEXT: v_mov_b32_e32 v6, v1 73; GFX10-NEXT: v_mov_b32_e32 v7, v2 74; GFX10-NEXT: v_mov_b32_e32 v8, v3 75; GFX10-NEXT: v_mov_b32_e32 v10, v9 76; GFX10-NEXT: v_mov_b32_e32 v11, v9 77; GFX10-NEXT: v_mov_b32_e32 v12, v9 78; GFX10-NEXT: v_mov_b32_e32 v13, v9 79; GFX10-NEXT: s_mov_b32 s0, s2 80; GFX10-NEXT: s_mov_b32 s1, s3 81; GFX10-NEXT: s_mov_b32 s2, s4 82; GFX10-NEXT: s_mov_b32 s3, s5 83; GFX10-NEXT: s_mov_b32 s4, s6 84; GFX10-NEXT: s_mov_b32 s5, s7 85; GFX10-NEXT: s_mov_b32 s6, s8 86; GFX10-NEXT: s_mov_b32 s7, s9 87; GFX10-NEXT: v_mov_b32_e32 v0, v9 88; GFX10-NEXT: v_mov_b32_e32 v1, v10 89; GFX10-NEXT: v_mov_b32_e32 v2, v11 90; GFX10-NEXT: v_mov_b32_e32 v3, v12 91; GFX10-NEXT: v_mov_b32_e32 v4, v13 92; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe 93; GFX10-NEXT: s_waitcnt vmcnt(0) 94; GFX10-NEXT: global_store_dword v9, v4, s[10:11] 95; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 96; GFX10-NEXT: ; return to shader part epilog 97; 98; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 99; GFX11: ; %bb.0: 100; GFX11-NEXT: v_mov_b32_e32 v9, 0 101; GFX11-NEXT: v_mov_b32_e32 v5, v0 102; GFX11-NEXT: v_mov_b32_e32 v6, v1 103; GFX11-NEXT: v_mov_b32_e32 v7, v2 104; GFX11-NEXT: v_mov_b32_e32 v8, v3 105; GFX11-NEXT: v_mov_b32_e32 v10, v9 106; GFX11-NEXT: v_mov_b32_e32 v11, v9 107; GFX11-NEXT: v_mov_b32_e32 v12, v9 108; GFX11-NEXT: v_mov_b32_e32 v13, v9 109; GFX11-NEXT: s_mov_b32 s0, s2 110; GFX11-NEXT: s_mov_b32 s1, s3 111; GFX11-NEXT: s_mov_b32 s2, s4 112; GFX11-NEXT: s_mov_b32 s3, s5 113; GFX11-NEXT: s_mov_b32 s4, s6 114; GFX11-NEXT: s_mov_b32 s5, s7 115; GFX11-NEXT: s_mov_b32 s6, s8 116; GFX11-NEXT: s_mov_b32 s7, s9 117; GFX11-NEXT: v_mov_b32_e32 v0, v9 118; GFX11-NEXT: v_mov_b32_e32 v1, v10 119; GFX11-NEXT: v_mov_b32_e32 v2, v11 120; GFX11-NEXT: v_mov_b32_e32 v3, v12 121; GFX11-NEXT: v_mov_b32_e32 v4, v13 122; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe 123; GFX11-NEXT: s_waitcnt vmcnt(0) 124; GFX11-NEXT: global_store_b32 v9, v4, s[10:11] 125; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 126; GFX11-NEXT: ; return to shader part epilog 127 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) 128 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 129 %v.err = extractvalue { <4 x float>, i32 } %v, 1 130 store i32 %v.err, i32 addrspace(1)* %out, align 4 131 ret <4 x float> %v.vec 132} 133 134define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 135; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 136; GFX6: ; %bb.0: 137; GFX6-NEXT: v_mov_b32_e32 v5, v0 138; GFX6-NEXT: v_mov_b32_e32 v0, 0 139; GFX6-NEXT: s_mov_b32 s0, s2 140; GFX6-NEXT: s_mov_b32 s1, s3 141; GFX6-NEXT: s_mov_b32 s2, s4 142; GFX6-NEXT: s_mov_b32 s3, s5 143; GFX6-NEXT: s_mov_b32 s4, s6 144; GFX6-NEXT: s_mov_b32 s5, s7 145; GFX6-NEXT: s_mov_b32 s6, s8 146; GFX6-NEXT: s_mov_b32 s7, s9 147; GFX6-NEXT: v_mov_b32_e32 v6, v1 148; GFX6-NEXT: v_mov_b32_e32 v7, v2 149; GFX6-NEXT: v_mov_b32_e32 v8, v3 150; GFX6-NEXT: v_mov_b32_e32 v1, v0 151; GFX6-NEXT: v_mov_b32_e32 v2, v0 152; GFX6-NEXT: v_mov_b32_e32 v3, v0 153; GFX6-NEXT: v_mov_b32_e32 v4, v0 154; GFX6-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe lwe da 155; GFX6-NEXT: s_mov_b32 s8, s10 156; GFX6-NEXT: s_mov_b32 s9, s11 157; GFX6-NEXT: s_mov_b32 s10, -1 158; GFX6-NEXT: s_mov_b32 s11, 0xf000 159; GFX6-NEXT: s_waitcnt vmcnt(0) 160; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 161; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 162; GFX6-NEXT: ; return to shader part epilog 163; 164; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 165; GFX10: ; %bb.0: 166; GFX10-NEXT: v_mov_b32_e32 v9, 0 167; GFX10-NEXT: v_mov_b32_e32 v5, v0 168; GFX10-NEXT: v_mov_b32_e32 v6, v1 169; GFX10-NEXT: v_mov_b32_e32 v7, v2 170; GFX10-NEXT: v_mov_b32_e32 v8, v3 171; GFX10-NEXT: v_mov_b32_e32 v10, v9 172; GFX10-NEXT: v_mov_b32_e32 v11, v9 173; GFX10-NEXT: v_mov_b32_e32 v12, v9 174; GFX10-NEXT: v_mov_b32_e32 v13, v9 175; GFX10-NEXT: s_mov_b32 s0, s2 176; GFX10-NEXT: s_mov_b32 s1, s3 177; GFX10-NEXT: s_mov_b32 s2, s4 178; GFX10-NEXT: s_mov_b32 s3, s5 179; GFX10-NEXT: s_mov_b32 s4, s6 180; GFX10-NEXT: s_mov_b32 s5, s7 181; GFX10-NEXT: s_mov_b32 s6, s8 182; GFX10-NEXT: s_mov_b32 s7, s9 183; GFX10-NEXT: v_mov_b32_e32 v0, v9 184; GFX10-NEXT: v_mov_b32_e32 v1, v10 185; GFX10-NEXT: v_mov_b32_e32 v2, v11 186; GFX10-NEXT: v_mov_b32_e32 v3, v12 187; GFX10-NEXT: v_mov_b32_e32 v4, v13 188; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe 189; GFX10-NEXT: s_waitcnt vmcnt(0) 190; GFX10-NEXT: global_store_dword v9, v4, s[10:11] 191; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 192; GFX10-NEXT: ; return to shader part epilog 193; 194; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 195; GFX11: ; %bb.0: 196; GFX11-NEXT: v_mov_b32_e32 v9, 0 197; GFX11-NEXT: v_mov_b32_e32 v5, v0 198; GFX11-NEXT: v_mov_b32_e32 v6, v1 199; GFX11-NEXT: v_mov_b32_e32 v7, v2 200; GFX11-NEXT: v_mov_b32_e32 v8, v3 201; GFX11-NEXT: v_mov_b32_e32 v10, v9 202; GFX11-NEXT: v_mov_b32_e32 v11, v9 203; GFX11-NEXT: v_mov_b32_e32 v12, v9 204; GFX11-NEXT: v_mov_b32_e32 v13, v9 205; GFX11-NEXT: s_mov_b32 s0, s2 206; GFX11-NEXT: s_mov_b32 s1, s3 207; GFX11-NEXT: s_mov_b32 s2, s4 208; GFX11-NEXT: s_mov_b32 s3, s5 209; GFX11-NEXT: s_mov_b32 s4, s6 210; GFX11-NEXT: s_mov_b32 s5, s7 211; GFX11-NEXT: s_mov_b32 s6, s8 212; GFX11-NEXT: s_mov_b32 s7, s9 213; GFX11-NEXT: v_mov_b32_e32 v0, v9 214; GFX11-NEXT: v_mov_b32_e32 v1, v10 215; GFX11-NEXT: v_mov_b32_e32 v2, v11 216; GFX11-NEXT: v_mov_b32_e32 v3, v12 217; GFX11-NEXT: v_mov_b32_e32 v4, v13 218; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe 219; GFX11-NEXT: s_waitcnt vmcnt(0) 220; GFX11-NEXT: global_store_b32 v9, v4, s[10:11] 221; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 222; GFX11-NEXT: ; return to shader part epilog 223 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0) 224 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 225 %v.err = extractvalue { <4 x float>, i32 } %v, 1 226 store i32 %v.err, i32 addrspace(1)* %out, align 4 227 ret <4 x float> %v.vec 228} 229 230declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 231declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 232 233attributes #0 = { nounwind readonly } 234