1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4; GCN-LABEL: {{^}}v_ubfe_sub_i32: 5; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 6; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 7; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]] 8define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 9 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 10 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 11 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 12 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 13 %src = load volatile i32, i32 addrspace(1)* %in0.gep 14 %width = load volatile i32, i32 addrspace(1)* %in0.gep 15 %sub = sub i32 32, %width 16 %shl = shl i32 %src, %sub 17 %bfe = lshr i32 %shl, %sub 18 store i32 %bfe, i32 addrspace(1)* %out.gep 19 ret void 20} 21 22; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32: 23; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 24; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 25; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] 26 27; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] 28; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] 29 30; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] 31; VI-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] 32 33; GCN: [[BFE]] 34; GCN: [[SHL]] 35define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 36 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 37 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 38 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 39 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 40 %src = load volatile i32, i32 addrspace(1)* %in0.gep 41 %width = load volatile i32, i32 addrspace(1)* %in0.gep 42 %sub = sub i32 32, %width 43 %shl = shl i32 %src, %sub 44 %bfe = lshr i32 %shl, %sub 45 store i32 %bfe, i32 addrspace(1)* %out.gep 46 store volatile i32 %shl, i32 addrspace(1)* undef 47 ret void 48} 49 50; GCN-LABEL: {{^}}s_ubfe_sub_i32: 51; GCN: s_load_dword [[SRC:s[0-9]+]] 52; GCN: s_load_dword [[WIDTH:s[0-9]+]] 53; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]] 54; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]] 55define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 56 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 57 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 58 %sub = sub i32 32, %width 59 %shl = shl i32 %src, %sub 60 %bfe = lshr i32 %shl, %sub 61 store i32 %bfe, i32 addrspace(1)* %out.gep 62 ret void 63} 64 65; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32: 66; GCN: s_load_dword [[SRC:s[0-9]+]] 67; GCN: s_load_dword [[WIDTH:s[0-9]+]] 68; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]] 69; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]] 70; GCN-NEXT: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]] 71define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 72 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 73 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 74 %sub = sub i32 32, %width 75 %shl = shl i32 %src, %sub 76 %bfe = lshr i32 %shl, %sub 77 store i32 %bfe, i32 addrspace(1)* %out.gep 78 store volatile i32 %shl, i32 addrspace(1)* undef 79 ret void 80} 81 82; GCN-LABEL: {{^}}v_sbfe_sub_i32: 83; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 84; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 85; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]] 86define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 87 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 88 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 89 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 90 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 91 %src = load volatile i32, i32 addrspace(1)* %in0.gep 92 %width = load volatile i32, i32 addrspace(1)* %in0.gep 93 %sub = sub i32 32, %width 94 %shl = shl i32 %src, %sub 95 %bfe = ashr i32 %shl, %sub 96 store i32 %bfe, i32 addrspace(1)* %out.gep 97 ret void 98} 99 100; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32: 101; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 102; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 103; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] 104 105; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] 106; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] 107 108; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] 109; VI-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] 110 111; GCN: [[BFE]] 112; GCN: [[SHL]] 113define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 114 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 115 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 116 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 117 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 118 %src = load volatile i32, i32 addrspace(1)* %in0.gep 119 %width = load volatile i32, i32 addrspace(1)* %in0.gep 120 %sub = sub i32 32, %width 121 %shl = shl i32 %src, %sub 122 %bfe = ashr i32 %shl, %sub 123 store i32 %bfe, i32 addrspace(1)* %out.gep 124 store volatile i32 %shl, i32 addrspace(1)* undef 125 ret void 126} 127 128; GCN-LABEL: {{^}}s_sbfe_sub_i32: 129; GCN: s_load_dword [[SRC:s[0-9]+]] 130; GCN: s_load_dword [[WIDTH:s[0-9]+]] 131; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]] 132; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]] 133define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 134 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 135 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 136 %sub = sub i32 32, %width 137 %shl = shl i32 %src, %sub 138 %bfe = ashr i32 %shl, %sub 139 store i32 %bfe, i32 addrspace(1)* %out.gep 140 ret void 141} 142 143; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32: 144; GCN: s_load_dword [[SRC:s[0-9]+]] 145; GCN: s_load_dword [[WIDTH:s[0-9]+]] 146; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]] 147; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]] 148; GCN-NEXT: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]] 149define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 150 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 151 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 152 %sub = sub i32 32, %width 153 %shl = shl i32 %src, %sub 154 %bfe = ashr i32 %shl, %sub 155 store i32 %bfe, i32 addrspace(1)* %out.gep 156 store volatile i32 %shl, i32 addrspace(1)* undef 157 ret void 158} 159 160declare i32 @llvm.amdgcn.workitem.id.x() #0 161 162attributes #0 = { nounwind readnone } 163attributes #1 = { nounwind } 164