1; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 3 4; When a frame index offset is more than 12-bits, make sure we don't store 5; it in mubuf's offset field. 6 7; Also, make sure we use the same register for storing the scratch buffer addresss 8; for both stores. This register is allocated by the register scavenger, so we 9; should be able to reuse the same regiser for each scratch buffer access. 10 11; GCN-LABEL: {{^}}legal_offset_fi: 12; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:4{{$}} 13; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8004 14; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offen{{$}} 15 16define amdgpu_kernel void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) { 17entry: 18 %scratch0 = alloca [8192 x i32], addrspace(5) 19 %scratch1 = alloca [8192 x i32], addrspace(5) 20 21 %scratchptr0 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 0 22 store i32 1, i32 addrspace(5)* %scratchptr0 23 24 %scratchptr1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 0 25 store i32 2, i32 addrspace(5)* %scratchptr1 26 27 %cmp = icmp eq i32 %cond, 0 28 br i1 %cmp, label %if, label %else 29 30if: 31 %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset 32 %if_value = load i32, i32 addrspace(5)* %if_ptr 33 br label %done 34 35else: 36 %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset 37 %else_value = load i32, i32 addrspace(5)* %else_ptr 38 br label %done 39 40done: 41 %value = phi i32 [%if_value, %if], [%else_value, %else] 42 store i32 %value, i32 addrspace(1)* %out 43 ret void 44 45 ret void 46 47} 48 49; GCN-LABEL: {{^}}legal_offset_fi_offset: 50; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offen{{$}} 51; GCN-DAG: v_mov_b32_e32 [[K8000:v[0-9]+]], 0x8004 52; GCN-DAG: v_add_{{[iu]}}32_e32 [[OFFSET:v[0-9]+]], vcc, 0x8004 53; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offen{{$}} 54 55define amdgpu_kernel void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { 56entry: 57 %scratch0 = alloca [8192 x i32], addrspace(5) 58 %scratch1 = alloca [8192 x i32], addrspace(5) 59 60 %offset0 = load i32, i32 addrspace(1)* %offsets 61 %scratchptr0 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %offset0 62 store i32 %offset0, i32 addrspace(5)* %scratchptr0 63 64 %offsetptr1 = getelementptr i32, i32 addrspace(1)* %offsets, i32 1 65 %offset1 = load i32, i32 addrspace(1)* %offsetptr1 66 %scratchptr1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %offset1 67 store i32 %offset1, i32 addrspace(5)* %scratchptr1 68 69 %cmp = icmp eq i32 %cond, 0 70 br i1 %cmp, label %if, label %else 71 72if: 73 %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset 74 %if_value = load i32, i32 addrspace(5)* %if_ptr 75 br label %done 76 77else: 78 %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset 79 %else_value = load i32, i32 addrspace(5)* %else_ptr 80 br label %done 81 82done: 83 %value = phi i32 [%if_value, %if], [%else_value, %else] 84 store i32 %value, i32 addrspace(1)* %out 85 ret void 86} 87 88; GCN-LABEL: {{^}}neg_vaddr_offset_inbounds: 89; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}} 90; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], 0 offen{{$}} 91define amdgpu_kernel void @neg_vaddr_offset_inbounds(i32 %offset) { 92entry: 93 %array = alloca [8192 x i32], addrspace(5) 94 %ptr_offset = add i32 %offset, 4 95 %ptr = getelementptr inbounds [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %ptr_offset 96 store i32 0, i32 addrspace(5)* %ptr 97 ret void 98} 99 100; GCN-LABEL: {{^}}neg_vaddr_offset: 101; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}} 102; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], 0 offen{{$}} 103define amdgpu_kernel void @neg_vaddr_offset(i32 %offset) { 104entry: 105 %array = alloca [8192 x i32], addrspace(5) 106 %ptr_offset = add i32 %offset, 4 107 %ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %ptr_offset 108 store i32 0, i32 addrspace(5)* %ptr 109 ret void 110} 111 112; GCN-LABEL: {{^}}pos_vaddr_offset: 113; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:20 114define amdgpu_kernel void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) { 115entry: 116 %array = alloca [8192 x i32], addrspace(5) 117 %ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 4 118 store i32 0, i32 addrspace(5)* %ptr 119 %load_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %offset 120 %val = load i32, i32 addrspace(5)* %load_ptr 121 store i32 %val, i32 addrspace(1)* %out 122 ret void 123} 124