1*791ec1c6SStanislav Mekhanoshin; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*791ec1c6SStanislav Mekhanoshin; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,SDAG 3*791ec1c6SStanislav Mekhanoshin; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GISEL 4*791ec1c6SStanislav Mekhanoshin 5*791ec1c6SStanislav Mekhanoshindeclare void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) 6*791ec1c6SStanislav Mekhanoshin 7*791ec1c6SStanislav Mekhanoshindefine amdgpu_ps float @buffer_load_lds_dword(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds) { 8*791ec1c6SStanislav Mekhanoshin; SDAG-LABEL: buffer_load_lds_dword: 9*791ec1c6SStanislav Mekhanoshin; SDAG: ; %bb.0: ; %main_body 10*791ec1c6SStanislav Mekhanoshin; SDAG-NEXT: v_mov_b32_e32 v0, 8 11*791ec1c6SStanislav Mekhanoshin; SDAG-NEXT: s_mov_b32 m0, s4 12*791ec1c6SStanislav Mekhanoshin; SDAG-NEXT: s_nop 0 13*791ec1c6SStanislav Mekhanoshin; SDAG-NEXT: buffer_load_dword v0, s[0:3], 0 idxen lds 14*791ec1c6SStanislav Mekhanoshin; SDAG-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:4 glc lds 15*791ec1c6SStanislav Mekhanoshin; SDAG-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:8 slc lds 16*791ec1c6SStanislav Mekhanoshin; SDAG-NEXT: v_mov_b32_e32 v0, s4 17*791ec1c6SStanislav Mekhanoshin; SDAG-NEXT: s_waitcnt vmcnt(0) 18*791ec1c6SStanislav Mekhanoshin; SDAG-NEXT: ds_read_b32 v0, v0 19*791ec1c6SStanislav Mekhanoshin; SDAG-NEXT: s_waitcnt lgkmcnt(0) 20*791ec1c6SStanislav Mekhanoshin; SDAG-NEXT: ; return to shader part epilog 21*791ec1c6SStanislav Mekhanoshin; 22*791ec1c6SStanislav Mekhanoshin; GISEL-LABEL: buffer_load_lds_dword: 23*791ec1c6SStanislav Mekhanoshin; GISEL: ; %bb.0: ; %main_body 24*791ec1c6SStanislav Mekhanoshin; GISEL-NEXT: s_mov_b32 m0, s4 25*791ec1c6SStanislav Mekhanoshin; GISEL-NEXT: v_mov_b32_e32 v0, 8 26*791ec1c6SStanislav Mekhanoshin; GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 idxen lds 27*791ec1c6SStanislav Mekhanoshin; GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:4 glc lds 28*791ec1c6SStanislav Mekhanoshin; GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:8 slc lds 29*791ec1c6SStanislav Mekhanoshin; GISEL-NEXT: v_mov_b32_e32 v0, s4 30*791ec1c6SStanislav Mekhanoshin; GISEL-NEXT: s_waitcnt vmcnt(0) 31*791ec1c6SStanislav Mekhanoshin; GISEL-NEXT: ds_read_b32 v0, v0 32*791ec1c6SStanislav Mekhanoshin; GISEL-NEXT: s_waitcnt lgkmcnt(0) 33*791ec1c6SStanislav Mekhanoshin; GISEL-NEXT: ; return to shader part epilog 34*791ec1c6SStanislav Mekhanoshinmain_body: 35*791ec1c6SStanislav Mekhanoshin call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 8, i32 0, i32 0, i32 0, i32 0) 36*791ec1c6SStanislav Mekhanoshin call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 8, i32 0, i32 0, i32 4, i32 1) 37*791ec1c6SStanislav Mekhanoshin call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 8, i32 0, i32 0, i32 8, i32 2) 38*791ec1c6SStanislav Mekhanoshin %ptr = bitcast i8 addrspace(3)* %lds to float addrspace(3)* 39*791ec1c6SStanislav Mekhanoshin %res = load float, float addrspace(3)* %ptr 40*791ec1c6SStanislav Mekhanoshin ret float %res 41*791ec1c6SStanislav Mekhanoshin} 42*791ec1c6SStanislav Mekhanoshin 43*791ec1c6SStanislav Mekhanoshindefine amdgpu_ps void @buffer_load_lds_dword_imm_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex) { 44*791ec1c6SStanislav Mekhanoshin; GCN-LABEL: buffer_load_lds_dword_imm_offset: 45*791ec1c6SStanislav Mekhanoshin; GCN: ; %bb.0: ; %main_body 46*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_mov_b32 m0, s4 47*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_nop 0 48*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:2048 lds 49*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_endpgm 50*791ec1c6SStanislav Mekhanoshinmain_body: 51*791ec1c6SStanislav Mekhanoshin call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 0, i32 0, i32 2048, i32 0) 52*791ec1c6SStanislav Mekhanoshin ret void 53*791ec1c6SStanislav Mekhanoshin} 54*791ec1c6SStanislav Mekhanoshin 55*791ec1c6SStanislav Mekhanoshindefine amdgpu_ps void @buffer_load_lds_dword_v_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex, i32 %voffset) { 56*791ec1c6SStanislav Mekhanoshin; GCN-LABEL: buffer_load_lds_dword_v_offset: 57*791ec1c6SStanislav Mekhanoshin; GCN: ; %bb.0: ; %main_body 58*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_mov_b32 m0, s4 59*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_nop 0 60*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: buffer_load_dword v[0:1], s[0:3], 0 idxen offen lds 61*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_endpgm 62*791ec1c6SStanislav Mekhanoshinmain_body: 63*791ec1c6SStanislav Mekhanoshin call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 0) 64*791ec1c6SStanislav Mekhanoshin ret void 65*791ec1c6SStanislav Mekhanoshin} 66*791ec1c6SStanislav Mekhanoshin 67*791ec1c6SStanislav Mekhanoshindefine amdgpu_ps void @buffer_load_lds_dword_s_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex, i32 inreg %soffset) { 68*791ec1c6SStanislav Mekhanoshin; GCN-LABEL: buffer_load_lds_dword_s_offset: 69*791ec1c6SStanislav Mekhanoshin; GCN: ; %bb.0: ; %main_body 70*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_mov_b32 m0, s4 71*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_nop 0 72*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: buffer_load_dword v0, s[0:3], s5 idxen lds 73*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_endpgm 74*791ec1c6SStanislav Mekhanoshinmain_body: 75*791ec1c6SStanislav Mekhanoshin call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 0, i32 %soffset, i32 0, i32 0) 76*791ec1c6SStanislav Mekhanoshin ret void 77*791ec1c6SStanislav Mekhanoshin} 78*791ec1c6SStanislav Mekhanoshin 79*791ec1c6SStanislav Mekhanoshindefine amdgpu_ps void @buffer_load_lds_dword_vs_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 80*791ec1c6SStanislav Mekhanoshin; GCN-LABEL: buffer_load_lds_dword_vs_offset: 81*791ec1c6SStanislav Mekhanoshin; GCN: ; %bb.0: ; %main_body 82*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_mov_b32 m0, s4 83*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_nop 0 84*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: buffer_load_dword v[0:1], s[0:3], s5 idxen offen lds 85*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_endpgm 86*791ec1c6SStanislav Mekhanoshinmain_body: 87*791ec1c6SStanislav Mekhanoshin call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 %voffset, i32 %soffset, i32 0, i32 0) 88*791ec1c6SStanislav Mekhanoshin ret void 89*791ec1c6SStanislav Mekhanoshin} 90*791ec1c6SStanislav Mekhanoshin 91*791ec1c6SStanislav Mekhanoshindefine amdgpu_ps void @buffer_load_lds_dword_vs_imm_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 92*791ec1c6SStanislav Mekhanoshin; GCN-LABEL: buffer_load_lds_dword_vs_imm_offset: 93*791ec1c6SStanislav Mekhanoshin; GCN: ; %bb.0: ; %main_body 94*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_mov_b32 m0, s4 95*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_nop 0 96*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: buffer_load_dword v[0:1], s[0:3], s5 idxen offen offset:2048 lds 97*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_endpgm 98*791ec1c6SStanislav Mekhanoshinmain_body: 99*791ec1c6SStanislav Mekhanoshin call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 %voffset, i32 %soffset, i32 2048, i32 0) 100*791ec1c6SStanislav Mekhanoshin ret void 101*791ec1c6SStanislav Mekhanoshin} 102*791ec1c6SStanislav Mekhanoshin 103*791ec1c6SStanislav Mekhanoshindefine amdgpu_ps void @buffer_load_lds_ushort(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex) { 104*791ec1c6SStanislav Mekhanoshin; GCN-LABEL: buffer_load_lds_ushort: 105*791ec1c6SStanislav Mekhanoshin; GCN: ; %bb.0: ; %main_body 106*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: v_mov_b32_e32 v1, 0x800 107*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_mov_b32 m0, s4 108*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_nop 0 109*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: buffer_load_ushort v[0:1], s[0:3], 0 idxen offen lds 110*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_endpgm 111*791ec1c6SStanislav Mekhanoshinmain_body: 112*791ec1c6SStanislav Mekhanoshin call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 2, i32 %vindex, i32 2048, i32 0, i32 0, i32 0) 113*791ec1c6SStanislav Mekhanoshin ret void 114*791ec1c6SStanislav Mekhanoshin} 115*791ec1c6SStanislav Mekhanoshin 116*791ec1c6SStanislav Mekhanoshindefine amdgpu_ps void @buffer_load_lds_ubyte(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex) { 117*791ec1c6SStanislav Mekhanoshin; GCN-LABEL: buffer_load_lds_ubyte: 118*791ec1c6SStanislav Mekhanoshin; GCN: ; %bb.0: ; %main_body 119*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_mov_b32 m0, s4 120*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_nop 0 121*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: buffer_load_ubyte v0, s[0:3], 0 idxen offset:2048 lds 122*791ec1c6SStanislav Mekhanoshin; GCN-NEXT: s_endpgm 123*791ec1c6SStanislav Mekhanoshinmain_body: 124*791ec1c6SStanislav Mekhanoshin call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 1, i32 %vindex, i32 0, i32 0, i32 2048, i32 0) 125*791ec1c6SStanislav Mekhanoshin ret void 126*791ec1c6SStanislav Mekhanoshin} 127