1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
3
4; GCN-LABEL: {{^}}v_ubfe_sub_i32:
5; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
6; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
7; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
8define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
9  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
10  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
11  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
12  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
13  %src = load volatile i32, i32 addrspace(1)* %in0.gep
14  %width = load volatile i32, i32 addrspace(1)* %in0.gep
15  %sub = sub i32 32, %width
16  %shl = shl i32 %src, %sub
17  %bfe = lshr i32 %shl, %sub
18  store i32 %bfe, i32 addrspace(1)* %out.gep
19  ret void
20}
21
22; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32:
23; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
24; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
25; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
26
27; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
28; GCN-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
29
30; GCN: [[BFE]]
31; GCN: [[SHL]]
32define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
33  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
34  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
35  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
36  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
37  %src = load volatile i32, i32 addrspace(1)* %in0.gep
38  %width = load volatile i32, i32 addrspace(1)* %in0.gep
39  %sub = sub i32 32, %width
40  %shl = shl i32 %src, %sub
41  %bfe = lshr i32 %shl, %sub
42  store i32 %bfe, i32 addrspace(1)* %out.gep
43  store volatile i32 %shl, i32 addrspace(1)* undef
44  ret void
45}
46
47; GCN-LABEL: {{^}}s_ubfe_sub_i32:
48; GCN: s_load_dwordx2 s[[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]], s[0:1], {{0xb|0x2c}}
49; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
50; GCN: s_lshl_b32 [[TMP:s[0-9]+]], s[[SRC]], [[SUB]]
51; GCN: s_lshr_b32 s{{[0-9]+}}, [[TMP]], [[SUB]]
52define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
53  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
54  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
55  %sub = sub i32 32, %width
56  %shl = shl i32 %src, %sub
57  %bfe = lshr i32 %shl, %sub
58  store i32 %bfe, i32 addrspace(1)* %out.gep
59  ret void
60}
61
62; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32:
63; GCN: s_load_dwordx2 s[[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]], s[0:1], {{0xb|0x2c}}
64; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
65; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]]
66; GCN: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]]
67define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
68  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
69  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
70  %sub = sub i32 32, %width
71  %shl = shl i32 %src, %sub
72  %bfe = lshr i32 %shl, %sub
73  store i32 %bfe, i32 addrspace(1)* %out.gep
74  store volatile i32 %shl, i32 addrspace(1)* undef
75  ret void
76}
77
78; GCN-LABEL: {{^}}v_sbfe_sub_i32:
79; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
80; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
81; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
82define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
83  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
84  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
85  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
86  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
87  %src = load volatile i32, i32 addrspace(1)* %in0.gep
88  %width = load volatile i32, i32 addrspace(1)* %in0.gep
89  %sub = sub i32 32, %width
90  %shl = shl i32 %src, %sub
91  %bfe = ashr i32 %shl, %sub
92  store i32 %bfe, i32 addrspace(1)* %out.gep
93  ret void
94}
95
96; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32:
97; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
98; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
99; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
100
101; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
102; GCN-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
103
104; GCN: [[BFE]]
105; GCN: [[SHL]]
106define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
107  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
108  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
109  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
110  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
111  %src = load volatile i32, i32 addrspace(1)* %in0.gep
112  %width = load volatile i32, i32 addrspace(1)* %in0.gep
113  %sub = sub i32 32, %width
114  %shl = shl i32 %src, %sub
115  %bfe = ashr i32 %shl, %sub
116  store i32 %bfe, i32 addrspace(1)* %out.gep
117  store volatile i32 %shl, i32 addrspace(1)* undef
118  ret void
119}
120
121; GCN-LABEL: {{^}}s_sbfe_sub_i32:
122; GCN: s_load_dwordx2 s[[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]], s[0:1], {{0xb|0x2c}}
123; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
124; GCN: s_lshl_b32 [[TMP:s[0-9]+]], s[[SRC]], [[SUB]]
125; GCN: s_ashr_i32 s{{[0-9]+}}, [[TMP]], [[SUB]]
126define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
127  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
128  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
129  %sub = sub i32 32, %width
130  %shl = shl i32 %src, %sub
131  %bfe = ashr i32 %shl, %sub
132  store i32 %bfe, i32 addrspace(1)* %out.gep
133  ret void
134}
135
136; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32:
137; GCN: s_load_dwordx2 s[[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]], s[0:1], {{0xb|0x2c}}
138; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
139; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]]
140; GCN: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]]
141define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
142  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
143  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
144  %sub = sub i32 32, %width
145  %shl = shl i32 %src, %sub
146  %bfe = ashr i32 %shl, %sub
147  store i32 %bfe, i32 addrspace(1)* %out.gep
148  store volatile i32 %shl, i32 addrspace(1)* undef
149  ret void
150}
151
152declare i32 @llvm.amdgcn.workitem.id.x() #0
153
154attributes #0 = { nounwind readnone }
155attributes #1 = { nounwind }
156