1XFAIL: * 2; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s 5 6; Natural mapping 7define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 8 ; CHECK-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 9 ; CHECK: bb.1 (%ir-block.0): 10 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 11 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 12 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 13 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 14 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 15 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 16 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 17 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 18 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 19 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 20 ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) 21 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 22 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 23 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 24 ret float %val 25} 26 27; Natural mapping 28define amdgpu_ps <2 x float> @struct_buffer_load_v2f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 29 ; CHECK-LABEL: name: struct_buffer_load_v2f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 30 ; CHECK: bb.1 (%ir-block.0): 31 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 32 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 33 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 34 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 35 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 36 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 37 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 38 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 39 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 40 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 41 ; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "TargetCustom7", align 1, addrspace 4) 42 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 43 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 44 ; CHECK: $vgpr0 = COPY [[COPY7]] 45 ; CHECK: $vgpr1 = COPY [[COPY8]] 46 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 47 %val = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 48 ret <2 x float> %val 49} 50 51; Natural mapping 52define amdgpu_ps <3 x float> @struct_buffer_load_v3f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 53 ; CHECK-LABEL: name: struct_buffer_load_v3f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 54 ; CHECK: bb.1 (%ir-block.0): 55 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 56 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 57 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 58 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 59 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 60 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 61 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 62 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 63 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 64 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 65 ; CHECK: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "TargetCustom7", align 1, addrspace 4) 66 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub0 67 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub1 68 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub2 69 ; CHECK: $vgpr0 = COPY [[COPY7]] 70 ; CHECK: $vgpr1 = COPY [[COPY8]] 71 ; CHECK: $vgpr2 = COPY [[COPY9]] 72 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 73 %val = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 74 ret <3 x float> %val 75} 76 77; Natural mapping 78define amdgpu_ps <4 x float> @struct_buffer_load_v4f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 79 ; CHECK-LABEL: name: struct_buffer_load_v4f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 80 ; CHECK: bb.1 (%ir-block.0): 81 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 82 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 83 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 84 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 85 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 86 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 87 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 88 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 89 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 90 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 91 ; CHECK: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) 92 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub0 93 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub1 94 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub2 95 ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub3 96 ; CHECK: $vgpr0 = COPY [[COPY7]] 97 ; CHECK: $vgpr1 = COPY [[COPY8]] 98 ; CHECK: $vgpr2 = COPY [[COPY9]] 99 ; CHECK: $vgpr3 = COPY [[COPY10]] 100 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 101 %val = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 102 ret <4 x float> %val 103} 104 105; Natural mapping 106define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 107 ; CHECK-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0 108 ; CHECK: bb.1 (%ir-block.0): 109 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 110 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 111 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 112 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 113 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 114 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 115 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 116 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 117 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 118 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 119 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 120 ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) 121 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 122 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 123 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 0) 124 ret float %val 125} 126 127; Natural mapping 128define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset.base, i32 inreg %soffset) { 129 ; CHECK-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095 130 ; CHECK: bb.1 (%ir-block.0): 131 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 132 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 133 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 134 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 135 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 136 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 137 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 138 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 139 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 140 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 141 ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7" + 4095, align 1, addrspace 4) 142 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 143 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 144 %voffset = add i32 %voffset.base, 4095 145 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 146 ret float %val 147} 148 149define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_soffset_64(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset) { 150 ; CHECK-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_soffset_64 151 ; CHECK: bb.1 (%ir-block.0): 152 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 153 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 154 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 155 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 156 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 157 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 158 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 159 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 160 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 64 161 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 162 ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) 163 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 164 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 165 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 64, i32 0) 166 ret float %val 167} 168 169; Need to legalize all reg operands 170define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 %soffset) { 171 ; CHECK-LABEL: name: struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset 172 ; CHECK: bb.1 (%ir-block.0): 173 ; CHECK: successors: %bb.2(0x80000000) 174 ; CHECK: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 175 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 176 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 177 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 178 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 179 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 180 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 181 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 182 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 183 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 184 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 185 ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 186 ; CHECK: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 187 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 188 ; CHECK: bb.2: 189 ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) 190 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec 191 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec 192 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 193 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY9]], implicit $exec 194 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec 195 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec 196 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 197 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec 198 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 199 ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 200 ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 201 ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec 202 ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 203 ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 204 ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) 205 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 206 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 207 ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec 208 ; CHECK: bb.3: 209 ; CHECK: successors: %bb.4(0x80000000) 210 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 211 ; CHECK: bb.4: 212 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 213 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 214 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 215 ret float %val 216} 217 218define amdgpu_ps float @struct_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 219 ; CHECK-LABEL: name: struct_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 220 ; CHECK: bb.1 (%ir-block.0): 221 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 222 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 223 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 224 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 225 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 226 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 227 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 228 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 229 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 230 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 231 ; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "TargetCustom7", addrspace 4) 232 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 233 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 234 ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[BUFFER_LOAD_UBYTE_BOTHEN]], [[COPY7]], implicit $exec 235 ; CHECK: $vgpr0 = COPY [[V_AND_B32_e64_]] 236 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 237 %val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 238 %ext = zext i8 %val to i32 239 %cast = bitcast i32 %ext to float 240 ret float %cast 241} 242 243define amdgpu_ps float @struct_buffer_load_i8_sext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 244 ; CHECK-LABEL: name: struct_buffer_load_i8_sext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 245 ; CHECK: bb.1 (%ir-block.0): 246 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 247 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 248 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 249 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 250 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 251 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 252 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 253 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 254 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 255 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 256 ; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "TargetCustom7", addrspace 4) 257 ; CHECK: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[BUFFER_LOAD_UBYTE_BOTHEN]], 0, 8, implicit $exec 258 ; CHECK: $vgpr0 = COPY [[V_BFE_I32_]] 259 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 260 %val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 261 %ext = sext i8 %val to i32 262 %cast = bitcast i32 %ext to float 263 ret float %cast 264} 265 266define amdgpu_ps float @struct_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 267 ; CHECK-LABEL: name: struct_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 268 ; CHECK: bb.1 (%ir-block.0): 269 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 270 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 271 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 272 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 273 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 274 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 275 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 276 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 277 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 278 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 279 ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 280 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 281 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 282 ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[BUFFER_LOAD_USHORT_BOTHEN]], [[COPY7]], implicit $exec 283 ; CHECK: $vgpr0 = COPY [[V_AND_B32_e64_]] 284 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 285 %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 286 %ext = zext i16 %val to i32 287 %cast = bitcast i32 %ext to float 288 ret float %cast 289} 290 291define amdgpu_ps float @struct_buffer_load_i16_sext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 292 ; CHECK-LABEL: name: struct_buffer_load_i16_sext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 293 ; CHECK: bb.1 (%ir-block.0): 294 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 295 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 296 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 297 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 298 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 299 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 300 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 301 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 302 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 303 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 304 ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 305 ; CHECK: [[V_BFE_I32_:%[0-9]+]]:vgpr_32 = V_BFE_I32 [[BUFFER_LOAD_USHORT_BOTHEN]], 0, 16, implicit $exec 306 ; CHECK: $vgpr0 = COPY [[V_BFE_I32_]] 307 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 308 %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 309 %ext = sext i16 %val to i32 310 %cast = bitcast i32 %ext to float 311 ret float %cast 312} 313 314; Natural mapping 315define amdgpu_ps half @struct_buffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 316 ; CHECK-LABEL: name: struct_buffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 317 ; CHECK: bb.1 (%ir-block.0): 318 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 319 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 320 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 321 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 322 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 323 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 324 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 325 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 326 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 327 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 328 ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 329 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] 330 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 331 %val = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 332 ret half %val 333} 334 335; Natural mapping 336define amdgpu_ps <2 x half> @struct_buffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 337 ; CHECK-LABEL: name: struct_buffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 338 ; CHECK: bb.1 (%ir-block.0): 339 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 340 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 341 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 342 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 343 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 344 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 345 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 346 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 347 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 348 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 349 ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) 350 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 351 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 352 %val = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 353 ret <2 x half> %val 354} 355 356; FIXME: Crashes 357; define amdgpu_ps <3 x half> @struct_buffer_load_v3f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 358; %val = call <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 359; ret <3 x half> %val 360; } 361 362; Natural mapping 363define amdgpu_ps <4 x half> @struct_buffer_load_v4f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 364 ; CHECK-LABEL: name: struct_buffer_load_v4f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 365 ; CHECK: bb.1 (%ir-block.0): 366 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 367 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 368 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 369 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 370 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 371 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 372 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 373 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 374 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 375 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 376 ; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "TargetCustom7", align 1, addrspace 4) 377 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 378 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 379 ; CHECK: $vgpr0 = COPY [[COPY7]] 380 ; CHECK: $vgpr1 = COPY [[COPY8]] 381 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 382 %val = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 383 ret <4 x half> %val 384} 385 386; Natural mapping + glc 387define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_glc(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 388 ; CHECK-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_glc 389 ; CHECK: bb.1 (%ir-block.0): 390 ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 391 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 392 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 393 ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 394 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 395 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 396 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 397 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 398 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 399 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 400 ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) 401 ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 402 ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 403 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1) 404 ret float %val 405} 406 407declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32 immarg) #0 408declare i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32>, i32, i32, i32, i32 immarg) #0 409declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) #0 410declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32 immarg) #0 411declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32 immarg) #0 412declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) #0 413declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32 immarg) #0 414declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32 immarg) #0 415declare <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32>, i32, i32, i32, i32 immarg) #0 416declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32 immarg) #0 417 418attributes #0 = { nounwind readonly } 419