1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 3 4; GCN-LABEL: {{^}}v_ubfe_sub_i32: 5; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 6; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 7; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]] 8define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 9 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 10 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 11 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 12 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 13 %src = load volatile i32, i32 addrspace(1)* %in0.gep 14 %width = load volatile i32, i32 addrspace(1)* %in0.gep 15 %sub = sub i32 32, %width 16 %shl = shl i32 %src, %sub 17 %bfe = lshr i32 %shl, %sub 18 store i32 %bfe, i32 addrspace(1)* %out.gep 19 ret void 20} 21 22; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32: 23; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 24; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 25; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] 26 27; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] 28; GCN-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] 29 30; GCN: [[BFE]] 31; GCN: [[SHL]] 32define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 33 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 34 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 35 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 36 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 37 %src = load volatile i32, i32 addrspace(1)* %in0.gep 38 %width = load volatile i32, i32 addrspace(1)* %in0.gep 39 %sub = sub i32 32, %width 40 %shl = shl i32 %src, %sub 41 %bfe = lshr i32 %shl, %sub 42 store i32 %bfe, i32 addrspace(1)* %out.gep 43 store volatile i32 %shl, i32 addrspace(1)* undef 44 ret void 45} 46 47; GCN-LABEL: {{^}}s_ubfe_sub_i32: 48; GCN: s_load_dwordx2 s[[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]], s[0:1], {{0xb|0x2c}} 49; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] 50; GCN: s_lshl_b32 [[TMP:s[0-9]+]], s[[SRC]], [[SUB]] 51; GCN: s_lshr_b32 s{{[0-9]+}}, [[TMP]], [[SUB]] 52define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 53 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 54 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 55 %sub = sub i32 32, %width 56 %shl = shl i32 %src, %sub 57 %bfe = lshr i32 %shl, %sub 58 store i32 %bfe, i32 addrspace(1)* %out.gep 59 ret void 60} 61 62; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32: 63; GCN: s_load_dwordx2 s[[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]], s[0:1], {{0xb|0x2c}} 64; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] 65; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]] 66; GCN: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]] 67define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 68 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 69 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 70 %sub = sub i32 32, %width 71 %shl = shl i32 %src, %sub 72 %bfe = lshr i32 %shl, %sub 73 store i32 %bfe, i32 addrspace(1)* %out.gep 74 store volatile i32 %shl, i32 addrspace(1)* undef 75 ret void 76} 77 78; GCN-LABEL: {{^}}v_sbfe_sub_i32: 79; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 80; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 81; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]] 82define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 83 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 84 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 85 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 86 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 87 %src = load volatile i32, i32 addrspace(1)* %in0.gep 88 %width = load volatile i32, i32 addrspace(1)* %in0.gep 89 %sub = sub i32 32, %width 90 %shl = shl i32 %src, %sub 91 %bfe = ashr i32 %shl, %sub 92 store i32 %bfe, i32 addrspace(1)* %out.gep 93 ret void 94} 95 96; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32: 97; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 98; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 99; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] 100 101; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] 102; GCN-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] 103 104; GCN: [[BFE]] 105; GCN: [[SHL]] 106define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 107 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 108 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 109 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 110 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 111 %src = load volatile i32, i32 addrspace(1)* %in0.gep 112 %width = load volatile i32, i32 addrspace(1)* %in0.gep 113 %sub = sub i32 32, %width 114 %shl = shl i32 %src, %sub 115 %bfe = ashr i32 %shl, %sub 116 store i32 %bfe, i32 addrspace(1)* %out.gep 117 store volatile i32 %shl, i32 addrspace(1)* undef 118 ret void 119} 120 121; GCN-LABEL: {{^}}s_sbfe_sub_i32: 122; GCN: s_load_dwordx2 s[[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]], s[0:1], {{0xb|0x2c}} 123; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] 124; GCN: s_lshl_b32 [[TMP:s[0-9]+]], s[[SRC]], [[SUB]] 125; GCN: s_ashr_i32 s{{[0-9]+}}, [[TMP]], [[SUB]] 126define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 127 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 128 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 129 %sub = sub i32 32, %width 130 %shl = shl i32 %src, %sub 131 %bfe = ashr i32 %shl, %sub 132 store i32 %bfe, i32 addrspace(1)* %out.gep 133 ret void 134} 135 136; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32: 137; GCN: s_load_dwordx2 s[[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]], s[0:1], {{0xb|0x2c}} 138; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] 139; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]] 140; GCN: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]] 141define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 142 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 143 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 144 %sub = sub i32 32, %width 145 %shl = shl i32 %src, %sub 146 %bfe = ashr i32 %shl, %sub 147 store i32 %bfe, i32 addrspace(1)* %out.gep 148 store volatile i32 %shl, i32 addrspace(1)* undef 149 ret void 150} 151 152declare i32 @llvm.amdgcn.workitem.id.x() #0 153 154attributes #0 = { nounwind readnone } 155attributes #1 = { nounwind } 156