1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s 3 4; GCN-LABEL: {{^}}v_ubfe_sub_i32: 5; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 6; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 7; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]] 8define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 9 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 10 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 11 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 12 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 13 %src = load volatile i32, i32 addrspace(1)* %in0.gep 14 %width = load volatile i32, i32 addrspace(1)* %in0.gep 15 %sub = sub i32 32, %width 16 %shl = shl i32 %src, %sub 17 %bfe = lshr i32 %shl, %sub 18 store i32 %bfe, i32 addrspace(1)* %out.gep 19 ret void 20} 21 22; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32: 23; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 24; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 25; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] 26 27; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] 28; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] 29 30; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] 31; VI-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] 32 33; GCN: [[BFE]] 34; GCN: [[SHL]] 35define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 36 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 37 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 38 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 39 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 40 %src = load volatile i32, i32 addrspace(1)* %in0.gep 41 %width = load volatile i32, i32 addrspace(1)* %in0.gep 42 %sub = sub i32 32, %width 43 %shl = shl i32 %src, %sub 44 %bfe = lshr i32 %shl, %sub 45 store i32 %bfe, i32 addrspace(1)* %out.gep 46 store volatile i32 %shl, i32 addrspace(1)* undef 47 ret void 48} 49 50; GCN-LABEL: {{^}}s_ubfe_sub_i32: 51; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} 52; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], s[[WIDTH]] 53; GCN: v_bfe_u32 v{{[0-9]+}}, s[[SRC]], 0, [[VWIDTH]] 54define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 55 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 56 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 57 %sub = sub i32 32, %width 58 %shl = shl i32 %src, %sub 59 %bfe = lshr i32 %shl, %sub 60 store i32 %bfe, i32 addrspace(1)* %out.gep 61 ret void 62} 63 64; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32: 65; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} 66; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] 67; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]] 68; GCN: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]] 69define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 70 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 71 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 72 %sub = sub i32 32, %width 73 %shl = shl i32 %src, %sub 74 %bfe = lshr i32 %shl, %sub 75 store i32 %bfe, i32 addrspace(1)* %out.gep 76 store volatile i32 %shl, i32 addrspace(1)* undef 77 ret void 78} 79 80; GCN-LABEL: {{^}}v_sbfe_sub_i32: 81; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 82; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 83; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]] 84define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 85 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 86 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 87 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 88 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 89 %src = load volatile i32, i32 addrspace(1)* %in0.gep 90 %width = load volatile i32, i32 addrspace(1)* %in0.gep 91 %sub = sub i32 32, %width 92 %shl = shl i32 %src, %sub 93 %bfe = ashr i32 %shl, %sub 94 store i32 %bfe, i32 addrspace(1)* %out.gep 95 ret void 96} 97 98; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32: 99; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 100; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 101; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] 102 103; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] 104; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] 105 106; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] 107; VI-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] 108 109; GCN: [[BFE]] 110; GCN: [[SHL]] 111define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 112 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 113 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 114 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 115 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 116 %src = load volatile i32, i32 addrspace(1)* %in0.gep 117 %width = load volatile i32, i32 addrspace(1)* %in0.gep 118 %sub = sub i32 32, %width 119 %shl = shl i32 %src, %sub 120 %bfe = ashr i32 %shl, %sub 121 store i32 %bfe, i32 addrspace(1)* %out.gep 122 store volatile i32 %shl, i32 addrspace(1)* undef 123 ret void 124} 125 126; GCN-LABEL: {{^}}s_sbfe_sub_i32: 127; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} 128; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], s[[WIDTH]] 129; GCN: v_bfe_i32 v{{[0-9]+}}, s[[SRC]], 0, [[VWIDTH]] 130define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 131 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 132 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 133 %sub = sub i32 32, %width 134 %shl = shl i32 %src, %sub 135 %bfe = ashr i32 %shl, %sub 136 store i32 %bfe, i32 addrspace(1)* %out.gep 137 ret void 138} 139 140; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32: 141; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} 142; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] 143; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]] 144; GCN: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]] 145define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 146 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 147 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 148 %sub = sub i32 32, %width 149 %shl = shl i32 %src, %sub 150 %bfe = ashr i32 %shl, %sub 151 store i32 %bfe, i32 addrspace(1)* %out.gep 152 store volatile i32 %shl, i32 addrspace(1)* undef 153 ret void 154} 155 156declare i32 @llvm.amdgcn.workitem.id.x() #0 157 158attributes #0 = { nounwind readnone } 159attributes #1 = { nounwind } 160