1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,SDAG 3; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GISEL 4 5declare void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) 6 7define amdgpu_ps float @buffer_load_lds_dword(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds) { 8; SDAG-LABEL: buffer_load_lds_dword: 9; SDAG: ; %bb.0: ; %main_body 10; SDAG-NEXT: v_mov_b32_e32 v0, 8 11; SDAG-NEXT: s_mov_b32 m0, s4 12; SDAG-NEXT: s_nop 0 13; SDAG-NEXT: buffer_load_dword v0, s[0:3], 0 idxen lds 14; SDAG-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:4 glc lds 15; SDAG-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:8 slc lds 16; SDAG-NEXT: v_mov_b32_e32 v0, s4 17; SDAG-NEXT: s_waitcnt vmcnt(0) 18; SDAG-NEXT: ds_read_b32 v0, v0 19; SDAG-NEXT: s_waitcnt lgkmcnt(0) 20; SDAG-NEXT: ; return to shader part epilog 21; 22; GISEL-LABEL: buffer_load_lds_dword: 23; GISEL: ; %bb.0: ; %main_body 24; GISEL-NEXT: s_mov_b32 m0, s4 25; GISEL-NEXT: v_mov_b32_e32 v0, 8 26; GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 idxen lds 27; GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:4 glc lds 28; GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:8 slc lds 29; GISEL-NEXT: v_mov_b32_e32 v0, s4 30; GISEL-NEXT: s_waitcnt vmcnt(0) 31; GISEL-NEXT: ds_read_b32 v0, v0 32; GISEL-NEXT: s_waitcnt lgkmcnt(0) 33; GISEL-NEXT: ; return to shader part epilog 34main_body: 35 call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 8, i32 0, i32 0, i32 0, i32 0) 36 call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 8, i32 0, i32 0, i32 4, i32 1) 37 call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 8, i32 0, i32 0, i32 8, i32 2) 38 %ptr = bitcast i8 addrspace(3)* %lds to float addrspace(3)* 39 %res = load float, float addrspace(3)* %ptr 40 ret float %res 41} 42 43define amdgpu_ps void @buffer_load_lds_dword_imm_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex) { 44; GCN-LABEL: buffer_load_lds_dword_imm_offset: 45; GCN: ; %bb.0: ; %main_body 46; GCN-NEXT: s_mov_b32 m0, s4 47; GCN-NEXT: s_nop 0 48; GCN-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:2048 lds 49; GCN-NEXT: s_endpgm 50main_body: 51 call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 0, i32 0, i32 2048, i32 0) 52 ret void 53} 54 55define amdgpu_ps void @buffer_load_lds_dword_v_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex, i32 %voffset) { 56; GCN-LABEL: buffer_load_lds_dword_v_offset: 57; GCN: ; %bb.0: ; %main_body 58; GCN-NEXT: s_mov_b32 m0, s4 59; GCN-NEXT: s_nop 0 60; GCN-NEXT: buffer_load_dword v[0:1], s[0:3], 0 idxen offen lds 61; GCN-NEXT: s_endpgm 62main_body: 63 call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 0) 64 ret void 65} 66 67define amdgpu_ps void @buffer_load_lds_dword_s_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex, i32 inreg %soffset) { 68; GCN-LABEL: buffer_load_lds_dword_s_offset: 69; GCN: ; %bb.0: ; %main_body 70; GCN-NEXT: s_mov_b32 m0, s4 71; GCN-NEXT: s_nop 0 72; GCN-NEXT: buffer_load_dword v0, s[0:3], s5 idxen lds 73; GCN-NEXT: s_endpgm 74main_body: 75 call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 0, i32 %soffset, i32 0, i32 0) 76 ret void 77} 78 79define amdgpu_ps void @buffer_load_lds_dword_vs_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 80; GCN-LABEL: buffer_load_lds_dword_vs_offset: 81; GCN: ; %bb.0: ; %main_body 82; GCN-NEXT: s_mov_b32 m0, s4 83; GCN-NEXT: s_nop 0 84; GCN-NEXT: buffer_load_dword v[0:1], s[0:3], s5 idxen offen lds 85; GCN-NEXT: s_endpgm 86main_body: 87 call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 %voffset, i32 %soffset, i32 0, i32 0) 88 ret void 89} 90 91define amdgpu_ps void @buffer_load_lds_dword_vs_imm_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 92; GCN-LABEL: buffer_load_lds_dword_vs_imm_offset: 93; GCN: ; %bb.0: ; %main_body 94; GCN-NEXT: s_mov_b32 m0, s4 95; GCN-NEXT: s_nop 0 96; GCN-NEXT: buffer_load_dword v[0:1], s[0:3], s5 idxen offen offset:2048 lds 97; GCN-NEXT: s_endpgm 98main_body: 99 call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 %voffset, i32 %soffset, i32 2048, i32 0) 100 ret void 101} 102 103define amdgpu_ps void @buffer_load_lds_ushort(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex) { 104; GCN-LABEL: buffer_load_lds_ushort: 105; GCN: ; %bb.0: ; %main_body 106; GCN-NEXT: v_mov_b32_e32 v1, 0x800 107; GCN-NEXT: s_mov_b32 m0, s4 108; GCN-NEXT: s_nop 0 109; GCN-NEXT: buffer_load_ushort v[0:1], s[0:3], 0 idxen offen lds 110; GCN-NEXT: s_endpgm 111main_body: 112 call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 2, i32 %vindex, i32 2048, i32 0, i32 0, i32 0) 113 ret void 114} 115 116define amdgpu_ps void @buffer_load_lds_ubyte(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex) { 117; GCN-LABEL: buffer_load_lds_ubyte: 118; GCN: ; %bb.0: ; %main_body 119; GCN-NEXT: s_mov_b32 m0, s4 120; GCN-NEXT: s_nop 0 121; GCN-NEXT: buffer_load_ubyte v0, s[0:3], 0 idxen offset:2048 lds 122; GCN-NEXT: s_endpgm 123main_body: 124 call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 1, i32 %vindex, i32 0, i32 0, i32 2048, i32 0) 125 ret void 126} 127