1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}v_ubfe_sub_i32:
5; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
6; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
7; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
8define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
9  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
10  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
11  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
12  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
13  %src = load volatile i32, i32 addrspace(1)* %in0.gep
14  %width = load volatile i32, i32 addrspace(1)* %in0.gep
15  %sub = sub i32 32, %width
16  %shl = shl i32 %src, %sub
17  %bfe = lshr i32 %shl, %sub
18  store i32 %bfe, i32 addrspace(1)* %out.gep
19  ret void
20}
21
22; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32:
23; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
24; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
25; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
26
27; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
28; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
29
30; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
31; VI-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
32
33; GCN: [[BFE]]
34; GCN: [[SHL]]
35define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
36  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
37  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
38  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
39  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
40  %src = load volatile i32, i32 addrspace(1)* %in0.gep
41  %width = load volatile i32, i32 addrspace(1)* %in0.gep
42  %sub = sub i32 32, %width
43  %shl = shl i32 %src, %sub
44  %bfe = lshr i32 %shl, %sub
45  store i32 %bfe, i32 addrspace(1)* %out.gep
46  store volatile i32 %shl, i32 addrspace(1)* undef
47  ret void
48}
49
50; GCN-LABEL: {{^}}s_ubfe_sub_i32:
51; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
52; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], s[[WIDTH]]
53; GCN: v_bfe_u32 v{{[0-9]+}}, s[[SRC]], 0, [[VWIDTH]]
54define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
55  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
56  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
57  %sub = sub i32 32, %width
58  %shl = shl i32 %src, %sub
59  %bfe = lshr i32 %shl, %sub
60  store i32 %bfe, i32 addrspace(1)* %out.gep
61  ret void
62}
63
64; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32:
65; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
66; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
67; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]]
68; GCN: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]]
69define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
70  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
71  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
72  %sub = sub i32 32, %width
73  %shl = shl i32 %src, %sub
74  %bfe = lshr i32 %shl, %sub
75  store i32 %bfe, i32 addrspace(1)* %out.gep
76  store volatile i32 %shl, i32 addrspace(1)* undef
77  ret void
78}
79
80; GCN-LABEL: {{^}}v_sbfe_sub_i32:
81; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
82; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
83; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
84define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
85  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
86  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
87  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
88  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
89  %src = load volatile i32, i32 addrspace(1)* %in0.gep
90  %width = load volatile i32, i32 addrspace(1)* %in0.gep
91  %sub = sub i32 32, %width
92  %shl = shl i32 %src, %sub
93  %bfe = ashr i32 %shl, %sub
94  store i32 %bfe, i32 addrspace(1)* %out.gep
95  ret void
96}
97
98; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32:
99; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
100; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
101; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
102
103; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
104; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
105
106; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
107; VI-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
108
109; GCN: [[BFE]]
110; GCN: [[SHL]]
111define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
112  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
113  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
114  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
115  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
116  %src = load volatile i32, i32 addrspace(1)* %in0.gep
117  %width = load volatile i32, i32 addrspace(1)* %in0.gep
118  %sub = sub i32 32, %width
119  %shl = shl i32 %src, %sub
120  %bfe = ashr i32 %shl, %sub
121  store i32 %bfe, i32 addrspace(1)* %out.gep
122  store volatile i32 %shl, i32 addrspace(1)* undef
123  ret void
124}
125
126; GCN-LABEL: {{^}}s_sbfe_sub_i32:
127; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
128; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], s[[WIDTH]]
129; GCN: v_bfe_i32 v{{[0-9]+}}, s[[SRC]], 0, [[VWIDTH]]
130define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
131  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
132  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
133  %sub = sub i32 32, %width
134  %shl = shl i32 %src, %sub
135  %bfe = ashr i32 %shl, %sub
136  store i32 %bfe, i32 addrspace(1)* %out.gep
137  ret void
138}
139
140; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32:
141; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
142; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
143; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]]
144; GCN: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]]
145define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
146  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
147  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
148  %sub = sub i32 32, %width
149  %shl = shl i32 %src, %sub
150  %bfe = ashr i32 %shl, %sub
151  store i32 %bfe, i32 addrspace(1)* %out.gep
152  store volatile i32 %shl, i32 addrspace(1)* undef
153  ret void
154}
155
156declare i32 @llvm.amdgcn.workitem.id.x() #0
157
158attributes #0 = { nounwind readnone }
159attributes #1 = { nounwind }
160