1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 5 6define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 7; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw: 8; GFX9: ; %bb.0: 9; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 10; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 11; GFX9-NEXT: v_and_or_b32 v0, v0, v4, v1 12; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v3 13; GFX9-NEXT: s_mov_b32 s0, s2 14; GFX9-NEXT: s_mov_b32 s1, s3 15; GFX9-NEXT: s_mov_b32 s2, s4 16; GFX9-NEXT: s_mov_b32 s3, s5 17; GFX9-NEXT: s_mov_b32 s4, s6 18; GFX9-NEXT: s_mov_b32 s5, s7 19; GFX9-NEXT: s_mov_b32 s6, s8 20; GFX9-NEXT: s_mov_b32 s7, s9 21; GFX9-NEXT: v_and_or_b32 v1, v2, v4, v1 22; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da 23; GFX9-NEXT: s_waitcnt vmcnt(0) 24; GFX9-NEXT: ; return to shader part epilog 25; 26; GFX10PLUS-LABEL: load_2darraymsaa_v4f32_xyzw: 27; GFX10PLUS: ; %bb.0: 28; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, 16, v1 29; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v3, 16, v3 30; GFX10PLUS-NEXT: s_mov_b32 s0, s2 31; GFX10PLUS-NEXT: s_mov_b32 s1, s3 32; GFX10PLUS-NEXT: s_mov_b32 s2, s4 33; GFX10PLUS-NEXT: v_and_or_b32 v0, 0xffff, v0, v1 34; GFX10PLUS-NEXT: v_and_or_b32 v1, 0xffff, v2, v3 35; GFX10PLUS-NEXT: s_mov_b32 s3, s5 36; GFX10PLUS-NEXT: s_mov_b32 s4, s6 37; GFX10PLUS-NEXT: s_mov_b32 s5, s7 38; GFX10PLUS-NEXT: s_mov_b32 s6, s8 39; GFX10PLUS-NEXT: s_mov_b32 s7, s9 40; GFX10PLUS-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 41; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 42; GFX10PLUS-NEXT: ; return to shader part epilog 43 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 44 ret <4 x float> %v 45} 46 47define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 48; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 49; GFX9: ; %bb.0: 50; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 51; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 52; GFX9-NEXT: v_and_or_b32 v10, v0, v4, v1 53; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v3 54; GFX9-NEXT: v_mov_b32_e32 v5, 0 55; GFX9-NEXT: v_and_or_b32 v11, v2, v4, v0 56; GFX9-NEXT: v_mov_b32_e32 v6, v5 57; GFX9-NEXT: v_mov_b32_e32 v7, v5 58; GFX9-NEXT: v_mov_b32_e32 v8, v5 59; GFX9-NEXT: v_mov_b32_e32 v9, v5 60; GFX9-NEXT: v_mov_b32_e32 v0, v5 61; GFX9-NEXT: s_mov_b32 s0, s2 62; GFX9-NEXT: s_mov_b32 s1, s3 63; GFX9-NEXT: s_mov_b32 s2, s4 64; GFX9-NEXT: s_mov_b32 s3, s5 65; GFX9-NEXT: s_mov_b32 s4, s6 66; GFX9-NEXT: s_mov_b32 s5, s7 67; GFX9-NEXT: s_mov_b32 s6, s8 68; GFX9-NEXT: s_mov_b32 s7, s9 69; GFX9-NEXT: v_mov_b32_e32 v1, v6 70; GFX9-NEXT: v_mov_b32_e32 v2, v7 71; GFX9-NEXT: v_mov_b32_e32 v3, v8 72; GFX9-NEXT: v_mov_b32_e32 v4, v9 73; GFX9-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf unorm a16 tfe da 74; GFX9-NEXT: s_waitcnt vmcnt(0) 75; GFX9-NEXT: global_store_dword v5, v4, s[10:11] 76; GFX9-NEXT: s_waitcnt vmcnt(0) 77; GFX9-NEXT: ; return to shader part epilog 78; 79; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 80; GFX10: ; %bb.0: 81; GFX10-NEXT: v_mov_b32_e32 v5, 0 82; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 83; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 84; GFX10-NEXT: s_mov_b32 s0, s2 85; GFX10-NEXT: s_mov_b32 s1, s3 86; GFX10-NEXT: v_mov_b32_e32 v6, v5 87; GFX10-NEXT: v_mov_b32_e32 v7, v5 88; GFX10-NEXT: v_mov_b32_e32 v8, v5 89; GFX10-NEXT: v_mov_b32_e32 v9, v5 90; GFX10-NEXT: v_and_or_b32 v10, 0xffff, v0, v1 91; GFX10-NEXT: v_and_or_b32 v11, 0xffff, v2, v3 92; GFX10-NEXT: s_mov_b32 s2, s4 93; GFX10-NEXT: s_mov_b32 s3, s5 94; GFX10-NEXT: s_mov_b32 s4, s6 95; GFX10-NEXT: s_mov_b32 s5, s7 96; GFX10-NEXT: s_mov_b32 s6, s8 97; GFX10-NEXT: s_mov_b32 s7, s9 98; GFX10-NEXT: v_mov_b32_e32 v0, v5 99; GFX10-NEXT: v_mov_b32_e32 v1, v6 100; GFX10-NEXT: v_mov_b32_e32 v2, v7 101; GFX10-NEXT: v_mov_b32_e32 v3, v8 102; GFX10-NEXT: v_mov_b32_e32 v4, v9 103; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe 104; GFX10-NEXT: s_waitcnt vmcnt(0) 105; GFX10-NEXT: global_store_dword v5, v4, s[10:11] 106; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 107; GFX10-NEXT: ; return to shader part epilog 108; 109; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 110; GFX11: ; %bb.0: 111; GFX11-NEXT: v_mov_b32_e32 v5, 0 112; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 113; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 114; GFX11-NEXT: s_mov_b32 s0, s2 115; GFX11-NEXT: s_mov_b32 s1, s3 116; GFX11-NEXT: v_mov_b32_e32 v6, v5 117; GFX11-NEXT: v_mov_b32_e32 v7, v5 118; GFX11-NEXT: v_mov_b32_e32 v8, v5 119; GFX11-NEXT: v_mov_b32_e32 v9, v5 120; GFX11-NEXT: v_and_or_b32 v10, 0xffff, v0, v1 121; GFX11-NEXT: v_and_or_b32 v11, 0xffff, v2, v3 122; GFX11-NEXT: s_mov_b32 s2, s4 123; GFX11-NEXT: s_mov_b32 s3, s5 124; GFX11-NEXT: s_mov_b32 s4, s6 125; GFX11-NEXT: s_mov_b32 s5, s7 126; GFX11-NEXT: s_mov_b32 s6, s8 127; GFX11-NEXT: s_mov_b32 s7, s9 128; GFX11-NEXT: v_mov_b32_e32 v0, v5 129; GFX11-NEXT: v_mov_b32_e32 v1, v6 130; GFX11-NEXT: v_mov_b32_e32 v2, v7 131; GFX11-NEXT: v_mov_b32_e32 v3, v8 132; GFX11-NEXT: v_mov_b32_e32 v4, v9 133; GFX11-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe 134; GFX11-NEXT: s_waitcnt vmcnt(0) 135; GFX11-NEXT: global_store_b32 v5, v4, s[10:11] 136; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 137; GFX11-NEXT: ; return to shader part epilog 138 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0) 139 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 140 %v.err = extractvalue { <4 x float>, i32 } %v, 1 141 store i32 %v.err, i32 addrspace(1)* %out, align 4 142 ret <4 x float> %v.vec 143} 144 145define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 146; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 147; GFX9: ; %bb.0: 148; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 149; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 150; GFX9-NEXT: v_and_or_b32 v10, v0, v4, v1 151; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v3 152; GFX9-NEXT: v_mov_b32_e32 v5, 0 153; GFX9-NEXT: v_and_or_b32 v11, v2, v4, v0 154; GFX9-NEXT: v_mov_b32_e32 v6, v5 155; GFX9-NEXT: v_mov_b32_e32 v7, v5 156; GFX9-NEXT: v_mov_b32_e32 v8, v5 157; GFX9-NEXT: v_mov_b32_e32 v9, v5 158; GFX9-NEXT: v_mov_b32_e32 v0, v5 159; GFX9-NEXT: s_mov_b32 s0, s2 160; GFX9-NEXT: s_mov_b32 s1, s3 161; GFX9-NEXT: s_mov_b32 s2, s4 162; GFX9-NEXT: s_mov_b32 s3, s5 163; GFX9-NEXT: s_mov_b32 s4, s6 164; GFX9-NEXT: s_mov_b32 s5, s7 165; GFX9-NEXT: s_mov_b32 s6, s8 166; GFX9-NEXT: s_mov_b32 s7, s9 167; GFX9-NEXT: v_mov_b32_e32 v1, v6 168; GFX9-NEXT: v_mov_b32_e32 v2, v7 169; GFX9-NEXT: v_mov_b32_e32 v3, v8 170; GFX9-NEXT: v_mov_b32_e32 v4, v9 171; GFX9-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf unorm a16 tfe lwe da 172; GFX9-NEXT: s_waitcnt vmcnt(0) 173; GFX9-NEXT: global_store_dword v5, v4, s[10:11] 174; GFX9-NEXT: s_waitcnt vmcnt(0) 175; GFX9-NEXT: ; return to shader part epilog 176; 177; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 178; GFX10: ; %bb.0: 179; GFX10-NEXT: v_mov_b32_e32 v5, 0 180; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 181; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 182; GFX10-NEXT: s_mov_b32 s0, s2 183; GFX10-NEXT: s_mov_b32 s1, s3 184; GFX10-NEXT: v_mov_b32_e32 v6, v5 185; GFX10-NEXT: v_mov_b32_e32 v7, v5 186; GFX10-NEXT: v_mov_b32_e32 v8, v5 187; GFX10-NEXT: v_mov_b32_e32 v9, v5 188; GFX10-NEXT: v_and_or_b32 v10, 0xffff, v0, v1 189; GFX10-NEXT: v_and_or_b32 v11, 0xffff, v2, v3 190; GFX10-NEXT: s_mov_b32 s2, s4 191; GFX10-NEXT: s_mov_b32 s3, s5 192; GFX10-NEXT: s_mov_b32 s4, s6 193; GFX10-NEXT: s_mov_b32 s5, s7 194; GFX10-NEXT: s_mov_b32 s6, s8 195; GFX10-NEXT: s_mov_b32 s7, s9 196; GFX10-NEXT: v_mov_b32_e32 v0, v5 197; GFX10-NEXT: v_mov_b32_e32 v1, v6 198; GFX10-NEXT: v_mov_b32_e32 v2, v7 199; GFX10-NEXT: v_mov_b32_e32 v3, v8 200; GFX10-NEXT: v_mov_b32_e32 v4, v9 201; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe lwe 202; GFX10-NEXT: s_waitcnt vmcnt(0) 203; GFX10-NEXT: global_store_dword v5, v4, s[10:11] 204; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 205; GFX10-NEXT: ; return to shader part epilog 206; 207; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 208; GFX11: ; %bb.0: 209; GFX11-NEXT: v_mov_b32_e32 v5, 0 210; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 211; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 212; GFX11-NEXT: s_mov_b32 s0, s2 213; GFX11-NEXT: s_mov_b32 s1, s3 214; GFX11-NEXT: v_mov_b32_e32 v6, v5 215; GFX11-NEXT: v_mov_b32_e32 v7, v5 216; GFX11-NEXT: v_mov_b32_e32 v8, v5 217; GFX11-NEXT: v_mov_b32_e32 v9, v5 218; GFX11-NEXT: v_and_or_b32 v10, 0xffff, v0, v1 219; GFX11-NEXT: v_and_or_b32 v11, 0xffff, v2, v3 220; GFX11-NEXT: s_mov_b32 s2, s4 221; GFX11-NEXT: s_mov_b32 s3, s5 222; GFX11-NEXT: s_mov_b32 s4, s6 223; GFX11-NEXT: s_mov_b32 s5, s7 224; GFX11-NEXT: s_mov_b32 s6, s8 225; GFX11-NEXT: s_mov_b32 s7, s9 226; GFX11-NEXT: v_mov_b32_e32 v0, v5 227; GFX11-NEXT: v_mov_b32_e32 v1, v6 228; GFX11-NEXT: v_mov_b32_e32 v2, v7 229; GFX11-NEXT: v_mov_b32_e32 v3, v8 230; GFX11-NEXT: v_mov_b32_e32 v4, v9 231; GFX11-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe lwe 232; GFX11-NEXT: s_waitcnt vmcnt(0) 233; GFX11-NEXT: global_store_b32 v5, v4, s[10:11] 234; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 235; GFX11-NEXT: ; return to shader part epilog 236 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 3, i32 0) 237 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 238 %v.err = extractvalue { <4 x float>, i32 } %v, 1 239 store i32 %v.err, i32 addrspace(1)* %out, align 4 240 ret <4 x float> %v.vec 241} 242 243declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 244declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 245 246attributes #0 = { nounwind readonly } 247