1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}v_ubfe_sub_i32:
5; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
6; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
7; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
8define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
9  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
10  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
11  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
12  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
13  %src = load volatile i32, i32 addrspace(1)* %in0.gep
14  %width = load volatile i32, i32 addrspace(1)* %in0.gep
15  %sub = sub i32 32, %width
16  %shl = shl i32 %src, %sub
17  %bfe = lshr i32 %shl, %sub
18  store i32 %bfe, i32 addrspace(1)* %out.gep
19  ret void
20}
21
22; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32:
23; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
24; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
25; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
26
27; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
28; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
29
30; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
31; VI-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
32
33; GCN: [[BFE]]
34; GCN: [[SHL]]
35define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
36  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
37  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
38  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
39  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
40  %src = load volatile i32, i32 addrspace(1)* %in0.gep
41  %width = load volatile i32, i32 addrspace(1)* %in0.gep
42  %sub = sub i32 32, %width
43  %shl = shl i32 %src, %sub
44  %bfe = lshr i32 %shl, %sub
45  store i32 %bfe, i32 addrspace(1)* %out.gep
46  store volatile i32 %shl, i32 addrspace(1)* undef
47  ret void
48}
49
50; GCN-LABEL: {{^}}s_ubfe_sub_i32:
51; GCN: s_load_dword [[SRC:s[0-9]+]]
52; GCN: s_load_dword [[WIDTH:s[0-9]+]]
53; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]]
54; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]]
55define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
56  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
57  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
58  %sub = sub i32 32, %width
59  %shl = shl i32 %src, %sub
60  %bfe = lshr i32 %shl, %sub
61  store i32 %bfe, i32 addrspace(1)* %out.gep
62  ret void
63}
64
65; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32:
66; GCN: s_load_dword [[SRC:s[0-9]+]]
67; GCN: s_load_dword [[WIDTH:s[0-9]+]]
68; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]]
69; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]]
70; GCN-NEXT: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]]
71define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
72  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
73  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
74  %sub = sub i32 32, %width
75  %shl = shl i32 %src, %sub
76  %bfe = lshr i32 %shl, %sub
77  store i32 %bfe, i32 addrspace(1)* %out.gep
78  store volatile i32 %shl, i32 addrspace(1)* undef
79  ret void
80}
81
82; GCN-LABEL: {{^}}v_sbfe_sub_i32:
83; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
84; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
85; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
86define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
87  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
88  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
89  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
90  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
91  %src = load volatile i32, i32 addrspace(1)* %in0.gep
92  %width = load volatile i32, i32 addrspace(1)* %in0.gep
93  %sub = sub i32 32, %width
94  %shl = shl i32 %src, %sub
95  %bfe = ashr i32 %shl, %sub
96  store i32 %bfe, i32 addrspace(1)* %out.gep
97  ret void
98}
99
100; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32:
101; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
102; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
103; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
104
105; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
106; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
107
108; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
109; VI-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
110
111; GCN: [[BFE]]
112; GCN: [[SHL]]
113define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
114  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
115  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
116  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
117  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
118  %src = load volatile i32, i32 addrspace(1)* %in0.gep
119  %width = load volatile i32, i32 addrspace(1)* %in0.gep
120  %sub = sub i32 32, %width
121  %shl = shl i32 %src, %sub
122  %bfe = ashr i32 %shl, %sub
123  store i32 %bfe, i32 addrspace(1)* %out.gep
124  store volatile i32 %shl, i32 addrspace(1)* undef
125  ret void
126}
127
128; GCN-LABEL: {{^}}s_sbfe_sub_i32:
129; GCN: s_load_dword [[SRC:s[0-9]+]]
130; GCN: s_load_dword [[WIDTH:s[0-9]+]]
131; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]]
132; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]]
133define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
134  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
135  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
136  %sub = sub i32 32, %width
137  %shl = shl i32 %src, %sub
138  %bfe = ashr i32 %shl, %sub
139  store i32 %bfe, i32 addrspace(1)* %out.gep
140  ret void
141}
142
143; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32:
144; GCN: s_load_dword [[SRC:s[0-9]+]]
145; GCN: s_load_dword [[WIDTH:s[0-9]+]]
146; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]]
147; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]]
148; GCN-NEXT: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]]
149define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
150  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
151  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
152  %sub = sub i32 32, %width
153  %shl = shl i32 %src, %sub
154  %bfe = ashr i32 %shl, %sub
155  store i32 %bfe, i32 addrspace(1)* %out.gep
156  store volatile i32 %shl, i32 addrspace(1)* undef
157  ret void
158}
159
160declare i32 @llvm.amdgcn.workitem.id.x() #0
161
162attributes #0 = { nounwind readnone }
163attributes #1 = { nounwind }
164