1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
4
5; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll,
6; but with all 64-bit tests, and tests with loads dropped.
7
8; Patterns:
9;   a) x &  (1 << nbits) - 1
10;   b) x & ~(-1 << nbits)
11;   c) x &  (-1 >> (32 - y))
12;   d) x << (32 - y) >> (32 - y)
13; are equivalent.
14
15; ---------------------------------------------------------------------------- ;
16; Pattern a. 32-bit
17; ---------------------------------------------------------------------------- ;
18
19define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
20; GCN-LABEL: bzhi32_a0:
21; GCN:       ; %bb.0:
22; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
24; GCN-NEXT:    s_setpc_b64 s[30:31]
25  %onebit = shl i32 1, %numlowbits
26  %mask = add nsw i32 %onebit, -1
27  %masked = and i32 %mask, %val
28  ret i32 %masked
29}
30
31define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
32; GCN-LABEL: bzhi32_a1_indexzext:
33; GCN:       ; %bb.0:
34; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
36; GCN-NEXT:    s_setpc_b64 s[30:31]
37  %conv = zext i8 %numlowbits to i32
38  %onebit = shl i32 1, %conv
39  %mask = add nsw i32 %onebit, -1
40  %masked = and i32 %mask, %val
41  ret i32 %masked
42}
43
44define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
45; GCN-LABEL: bzhi32_a4_commutative:
46; GCN:       ; %bb.0:
47; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
49; GCN-NEXT:    s_setpc_b64 s[30:31]
50  %onebit = shl i32 1, %numlowbits
51  %mask = add nsw i32 %onebit, -1
52  %masked = and i32 %val, %mask ; swapped order
53  ret i32 %masked
54}
55
56; ---------------------------------------------------------------------------- ;
57; Pattern b. 32-bit
58; ---------------------------------------------------------------------------- ;
59
60define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
61; GCN-LABEL: bzhi32_b0:
62; GCN:       ; %bb.0:
63; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
65; GCN-NEXT:    s_setpc_b64 s[30:31]
66  %notmask = shl i32 -1, %numlowbits
67  %mask = xor i32 %notmask, -1
68  %masked = and i32 %mask, %val
69  ret i32 %masked
70}
71
72define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
73; GCN-LABEL: bzhi32_b1_indexzext:
74; GCN:       ; %bb.0:
75; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
77; GCN-NEXT:    s_setpc_b64 s[30:31]
78  %conv = zext i8 %numlowbits to i32
79  %notmask = shl i32 -1, %conv
80  %mask = xor i32 %notmask, -1
81  %masked = and i32 %mask, %val
82  ret i32 %masked
83}
84
85define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
86; GCN-LABEL: bzhi32_b4_commutative:
87; GCN:       ; %bb.0:
88; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
90; GCN-NEXT:    s_setpc_b64 s[30:31]
91  %notmask = shl i32 -1, %numlowbits
92  %mask = xor i32 %notmask, -1
93  %masked = and i32 %val, %mask ; swapped order
94  ret i32 %masked
95}
96
97; ---------------------------------------------------------------------------- ;
98; Pattern c. 32-bit
99; ---------------------------------------------------------------------------- ;
100
101define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
102; GCN-LABEL: bzhi32_c0:
103; GCN:       ; %bb.0:
104; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
106; GCN-NEXT:    s_setpc_b64 s[30:31]
107  %numhighbits = sub i32 32, %numlowbits
108  %mask = lshr i32 -1, %numhighbits
109  %masked = and i32 %mask, %val
110  ret i32 %masked
111}
112
113define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
114; SI-LABEL: bzhi32_c1_indexzext:
115; SI:       ; %bb.0:
116; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
118; SI-NEXT:    v_lshr_b32_e32 v1, -1, v1
119; SI-NEXT:    v_and_b32_e32 v0, v1, v0
120; SI-NEXT:    s_setpc_b64 s[30:31]
121;
122; VI-LABEL: bzhi32_c1_indexzext:
123; VI:       ; %bb.0:
124; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125; VI-NEXT:    v_sub_u16_e32 v1, 32, v1
126; VI-NEXT:    v_lshrrev_b32_e64 v1, v1, -1
127; VI-NEXT:    v_and_b32_e32 v0, v1, v0
128; VI-NEXT:    s_setpc_b64 s[30:31]
129  %numhighbits = sub i8 32, %numlowbits
130  %sh_prom = zext i8 %numhighbits to i32
131  %mask = lshr i32 -1, %sh_prom
132  %masked = and i32 %mask, %val
133  ret i32 %masked
134}
135
136define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
137; GCN-LABEL: bzhi32_c4_commutative:
138; GCN:       ; %bb.0:
139; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
141; GCN-NEXT:    s_setpc_b64 s[30:31]
142  %numhighbits = sub i32 32, %numlowbits
143  %mask = lshr i32 -1, %numhighbits
144  %masked = and i32 %val, %mask ; swapped order
145  ret i32 %masked
146}
147
148; ---------------------------------------------------------------------------- ;
149; Pattern d. 32-bit.
150; ---------------------------------------------------------------------------- ;
151
152define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
153; GCN-LABEL: bzhi32_d0:
154; GCN:       ; %bb.0:
155; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
157; GCN-NEXT:    s_setpc_b64 s[30:31]
158  %numhighbits = sub i32 32, %numlowbits
159  %highbitscleared = shl i32 %val, %numhighbits
160  %masked = lshr i32 %highbitscleared, %numhighbits
161  ret i32 %masked
162}
163
164define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
165; SI-LABEL: bzhi32_d1_indexzext:
166; SI:       ; %bb.0:
167; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
169; SI-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
170; SI-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
171; SI-NEXT:    s_setpc_b64 s[30:31]
172;
173; VI-LABEL: bzhi32_d1_indexzext:
174; VI:       ; %bb.0:
175; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176; VI-NEXT:    v_sub_u16_e32 v1, 32, v1
177; VI-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
178; VI-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
179; VI-NEXT:    s_setpc_b64 s[30:31]
180  %numhighbits = sub i8 32, %numlowbits
181  %sh_prom = zext i8 %numhighbits to i32
182  %highbitscleared = shl i32 %val, %sh_prom
183  %masked = lshr i32 %highbitscleared, %sh_prom
184  ret i32 %masked
185}
186