1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI
3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1
4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1
5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI
8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1NOTBM
9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1TBM
10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2TBM
11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2NOTBM
12
13; *Please* keep in sync with test/CodeGen/AArch64/extract-lowbits.ll
14
15; https://bugs.llvm.org/show_bug.cgi?id=36419
16; https://bugs.llvm.org/show_bug.cgi?id=37603
17; https://bugs.llvm.org/show_bug.cgi?id=37610
18
19; Patterns:
20;   a) x &  (1 << nbits) - 1
21;   b) x & ~(-1 << nbits)
22;   c) x &  (-1 >> (32 - y))
23;   d) x << (32 - y) >> (32 - y)
24; are equivalent.
25
26; ---------------------------------------------------------------------------- ;
27; Pattern a. 32-bit
28; ---------------------------------------------------------------------------- ;
29
30define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
31; X86-NOBMI-LABEL: bzhi32_a0:
32; X86-NOBMI:       # %bb.0:
33; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
34; X86-NOBMI-NEXT:    movl $1, %eax
35; X86-NOBMI-NEXT:    shll %cl, %eax
36; X86-NOBMI-NEXT:    decl %eax
37; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
38; X86-NOBMI-NEXT:    retl
39;
40; X86-BMI1-LABEL: bzhi32_a0:
41; X86-BMI1:       # %bb.0:
42; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
43; X86-BMI1-NEXT:    shll $8, %eax
44; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
45; X86-BMI1-NEXT:    retl
46;
47; X86-BMI2-LABEL: bzhi32_a0:
48; X86-BMI2:       # %bb.0:
49; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
50; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
51; X86-BMI2-NEXT:    retl
52;
53; X64-NOBMI-LABEL: bzhi32_a0:
54; X64-NOBMI:       # %bb.0:
55; X64-NOBMI-NEXT:    movl %esi, %ecx
56; X64-NOBMI-NEXT:    movl $1, %eax
57; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
58; X64-NOBMI-NEXT:    shll %cl, %eax
59; X64-NOBMI-NEXT:    decl %eax
60; X64-NOBMI-NEXT:    andl %edi, %eax
61; X64-NOBMI-NEXT:    retq
62;
63; X64-BMI1-LABEL: bzhi32_a0:
64; X64-BMI1:       # %bb.0:
65; X64-BMI1-NEXT:    shll $8, %esi
66; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
67; X64-BMI1-NEXT:    retq
68;
69; X64-BMI2-LABEL: bzhi32_a0:
70; X64-BMI2:       # %bb.0:
71; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
72; X64-BMI2-NEXT:    retq
73  %onebit = shl i32 1, %numlowbits
74  %mask = add nsw i32 %onebit, -1
75  %masked = and i32 %mask, %val
76  ret i32 %masked
77}
78
79define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
80; X86-NOBMI-LABEL: bzhi32_a1_indexzext:
81; X86-NOBMI:       # %bb.0:
82; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
83; X86-NOBMI-NEXT:    movl $1, %eax
84; X86-NOBMI-NEXT:    shll %cl, %eax
85; X86-NOBMI-NEXT:    decl %eax
86; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
87; X86-NOBMI-NEXT:    retl
88;
89; X86-BMI1-LABEL: bzhi32_a1_indexzext:
90; X86-BMI1:       # %bb.0:
91; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
92; X86-BMI1-NEXT:    shll $8, %eax
93; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
94; X86-BMI1-NEXT:    retl
95;
96; X86-BMI2-LABEL: bzhi32_a1_indexzext:
97; X86-BMI2:       # %bb.0:
98; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
99; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
100; X86-BMI2-NEXT:    retl
101;
102; X64-NOBMI-LABEL: bzhi32_a1_indexzext:
103; X64-NOBMI:       # %bb.0:
104; X64-NOBMI-NEXT:    movl %esi, %ecx
105; X64-NOBMI-NEXT:    movl $1, %eax
106; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
107; X64-NOBMI-NEXT:    shll %cl, %eax
108; X64-NOBMI-NEXT:    decl %eax
109; X64-NOBMI-NEXT:    andl %edi, %eax
110; X64-NOBMI-NEXT:    retq
111;
112; X64-BMI1-LABEL: bzhi32_a1_indexzext:
113; X64-BMI1:       # %bb.0:
114; X64-BMI1-NEXT:    shll $8, %esi
115; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
116; X64-BMI1-NEXT:    retq
117;
118; X64-BMI2-LABEL: bzhi32_a1_indexzext:
119; X64-BMI2:       # %bb.0:
120; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
121; X64-BMI2-NEXT:    retq
122  %conv = zext i8 %numlowbits to i32
123  %onebit = shl i32 1, %conv
124  %mask = add nsw i32 %onebit, -1
125  %masked = and i32 %mask, %val
126  ret i32 %masked
127}
128
129define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind {
130; X86-NOBMI-LABEL: bzhi32_a2_load:
131; X86-NOBMI:       # %bb.0:
132; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
133; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
134; X86-NOBMI-NEXT:    movl $1, %eax
135; X86-NOBMI-NEXT:    shll %cl, %eax
136; X86-NOBMI-NEXT:    decl %eax
137; X86-NOBMI-NEXT:    andl (%edx), %eax
138; X86-NOBMI-NEXT:    retl
139;
140; X86-BMI1-LABEL: bzhi32_a2_load:
141; X86-BMI1:       # %bb.0:
142; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
143; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
144; X86-BMI1-NEXT:    shll $8, %ecx
145; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
146; X86-BMI1-NEXT:    retl
147;
148; X86-BMI2-LABEL: bzhi32_a2_load:
149; X86-BMI2:       # %bb.0:
150; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
151; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
152; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
153; X86-BMI2-NEXT:    retl
154;
155; X64-NOBMI-LABEL: bzhi32_a2_load:
156; X64-NOBMI:       # %bb.0:
157; X64-NOBMI-NEXT:    movl %esi, %ecx
158; X64-NOBMI-NEXT:    movl $1, %eax
159; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
160; X64-NOBMI-NEXT:    shll %cl, %eax
161; X64-NOBMI-NEXT:    decl %eax
162; X64-NOBMI-NEXT:    andl (%rdi), %eax
163; X64-NOBMI-NEXT:    retq
164;
165; X64-BMI1-LABEL: bzhi32_a2_load:
166; X64-BMI1:       # %bb.0:
167; X64-BMI1-NEXT:    shll $8, %esi
168; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
169; X64-BMI1-NEXT:    retq
170;
171; X64-BMI2-LABEL: bzhi32_a2_load:
172; X64-BMI2:       # %bb.0:
173; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
174; X64-BMI2-NEXT:    retq
175  %val = load i32, ptr %w
176  %onebit = shl i32 1, %numlowbits
177  %mask = add nsw i32 %onebit, -1
178  %masked = and i32 %mask, %val
179  ret i32 %masked
180}
181
182define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
183; X86-NOBMI-LABEL: bzhi32_a3_load_indexzext:
184; X86-NOBMI:       # %bb.0:
185; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
186; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
187; X86-NOBMI-NEXT:    movl $1, %eax
188; X86-NOBMI-NEXT:    shll %cl, %eax
189; X86-NOBMI-NEXT:    decl %eax
190; X86-NOBMI-NEXT:    andl (%edx), %eax
191; X86-NOBMI-NEXT:    retl
192;
193; X86-BMI1-LABEL: bzhi32_a3_load_indexzext:
194; X86-BMI1:       # %bb.0:
195; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
196; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
197; X86-BMI1-NEXT:    shll $8, %ecx
198; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
199; X86-BMI1-NEXT:    retl
200;
201; X86-BMI2-LABEL: bzhi32_a3_load_indexzext:
202; X86-BMI2:       # %bb.0:
203; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
204; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
205; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
206; X86-BMI2-NEXT:    retl
207;
208; X64-NOBMI-LABEL: bzhi32_a3_load_indexzext:
209; X64-NOBMI:       # %bb.0:
210; X64-NOBMI-NEXT:    movl %esi, %ecx
211; X64-NOBMI-NEXT:    movl $1, %eax
212; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
213; X64-NOBMI-NEXT:    shll %cl, %eax
214; X64-NOBMI-NEXT:    decl %eax
215; X64-NOBMI-NEXT:    andl (%rdi), %eax
216; X64-NOBMI-NEXT:    retq
217;
218; X64-BMI1-LABEL: bzhi32_a3_load_indexzext:
219; X64-BMI1:       # %bb.0:
220; X64-BMI1-NEXT:    shll $8, %esi
221; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
222; X64-BMI1-NEXT:    retq
223;
224; X64-BMI2-LABEL: bzhi32_a3_load_indexzext:
225; X64-BMI2:       # %bb.0:
226; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
227; X64-BMI2-NEXT:    retq
228  %val = load i32, ptr %w
229  %conv = zext i8 %numlowbits to i32
230  %onebit = shl i32 1, %conv
231  %mask = add nsw i32 %onebit, -1
232  %masked = and i32 %mask, %val
233  ret i32 %masked
234}
235
236define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
237; X86-NOBMI-LABEL: bzhi32_a4_commutative:
238; X86-NOBMI:       # %bb.0:
239; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
240; X86-NOBMI-NEXT:    movl $1, %eax
241; X86-NOBMI-NEXT:    shll %cl, %eax
242; X86-NOBMI-NEXT:    decl %eax
243; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
244; X86-NOBMI-NEXT:    retl
245;
246; X86-BMI1-LABEL: bzhi32_a4_commutative:
247; X86-BMI1:       # %bb.0:
248; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
249; X86-BMI1-NEXT:    shll $8, %eax
250; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
251; X86-BMI1-NEXT:    retl
252;
253; X86-BMI2-LABEL: bzhi32_a4_commutative:
254; X86-BMI2:       # %bb.0:
255; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
256; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
257; X86-BMI2-NEXT:    retl
258;
259; X64-NOBMI-LABEL: bzhi32_a4_commutative:
260; X64-NOBMI:       # %bb.0:
261; X64-NOBMI-NEXT:    movl %esi, %ecx
262; X64-NOBMI-NEXT:    movl $1, %eax
263; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
264; X64-NOBMI-NEXT:    shll %cl, %eax
265; X64-NOBMI-NEXT:    decl %eax
266; X64-NOBMI-NEXT:    andl %edi, %eax
267; X64-NOBMI-NEXT:    retq
268;
269; X64-BMI1-LABEL: bzhi32_a4_commutative:
270; X64-BMI1:       # %bb.0:
271; X64-BMI1-NEXT:    shll $8, %esi
272; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
273; X64-BMI1-NEXT:    retq
274;
275; X64-BMI2-LABEL: bzhi32_a4_commutative:
276; X64-BMI2:       # %bb.0:
277; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
278; X64-BMI2-NEXT:    retq
279  %onebit = shl i32 1, %numlowbits
280  %mask = add nsw i32 %onebit, -1
281  %masked = and i32 %val, %mask ; swapped order
282  ret i32 %masked
283}
284
285; 64-bit
286
287define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
288; X86-NOBMI-LABEL: bzhi64_a0:
289; X86-NOBMI:       # %bb.0:
290; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
291; X86-NOBMI-NEXT:    movl $1, %eax
292; X86-NOBMI-NEXT:    xorl %edx, %edx
293; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
294; X86-NOBMI-NEXT:    shll %cl, %eax
295; X86-NOBMI-NEXT:    testb $32, %cl
296; X86-NOBMI-NEXT:    je .LBB5_2
297; X86-NOBMI-NEXT:  # %bb.1:
298; X86-NOBMI-NEXT:    movl %eax, %edx
299; X86-NOBMI-NEXT:    xorl %eax, %eax
300; X86-NOBMI-NEXT:  .LBB5_2:
301; X86-NOBMI-NEXT:    addl $-1, %eax
302; X86-NOBMI-NEXT:    adcl $-1, %edx
303; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
304; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
305; X86-NOBMI-NEXT:    retl
306;
307; X86-BMI1-LABEL: bzhi64_a0:
308; X86-BMI1:       # %bb.0:
309; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
310; X86-BMI1-NEXT:    movl $1, %eax
311; X86-BMI1-NEXT:    xorl %edx, %edx
312; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
313; X86-BMI1-NEXT:    shll %cl, %eax
314; X86-BMI1-NEXT:    testb $32, %cl
315; X86-BMI1-NEXT:    je .LBB5_2
316; X86-BMI1-NEXT:  # %bb.1:
317; X86-BMI1-NEXT:    movl %eax, %edx
318; X86-BMI1-NEXT:    xorl %eax, %eax
319; X86-BMI1-NEXT:  .LBB5_2:
320; X86-BMI1-NEXT:    addl $-1, %eax
321; X86-BMI1-NEXT:    adcl $-1, %edx
322; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
323; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
324; X86-BMI1-NEXT:    retl
325;
326; X86-BMI2-LABEL: bzhi64_a0:
327; X86-BMI2:       # %bb.0:
328; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
329; X86-BMI2-NEXT:    movl $1, %eax
330; X86-BMI2-NEXT:    xorl %edx, %edx
331; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
332; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
333; X86-BMI2-NEXT:    testb $32, %cl
334; X86-BMI2-NEXT:    je .LBB5_2
335; X86-BMI2-NEXT:  # %bb.1:
336; X86-BMI2-NEXT:    movl %eax, %edx
337; X86-BMI2-NEXT:    xorl %eax, %eax
338; X86-BMI2-NEXT:  .LBB5_2:
339; X86-BMI2-NEXT:    addl $-1, %eax
340; X86-BMI2-NEXT:    adcl $-1, %edx
341; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
342; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
343; X86-BMI2-NEXT:    retl
344;
345; X64-NOBMI-LABEL: bzhi64_a0:
346; X64-NOBMI:       # %bb.0:
347; X64-NOBMI-NEXT:    movq %rsi, %rcx
348; X64-NOBMI-NEXT:    movl $1, %eax
349; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
350; X64-NOBMI-NEXT:    shlq %cl, %rax
351; X64-NOBMI-NEXT:    decq %rax
352; X64-NOBMI-NEXT:    andq %rdi, %rax
353; X64-NOBMI-NEXT:    retq
354;
355; X64-BMI1-LABEL: bzhi64_a0:
356; X64-BMI1:       # %bb.0:
357; X64-BMI1-NEXT:    shll $8, %esi
358; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
359; X64-BMI1-NEXT:    retq
360;
361; X64-BMI2-LABEL: bzhi64_a0:
362; X64-BMI2:       # %bb.0:
363; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
364; X64-BMI2-NEXT:    retq
365  %onebit = shl i64 1, %numlowbits
366  %mask = add nsw i64 %onebit, -1
367  %masked = and i64 %mask, %val
368  ret i64 %masked
369}
370
371define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
372; X86-NOBMI-LABEL: bzhi64_a1_indexzext:
373; X86-NOBMI:       # %bb.0:
374; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
375; X86-NOBMI-NEXT:    movl $1, %eax
376; X86-NOBMI-NEXT:    xorl %edx, %edx
377; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
378; X86-NOBMI-NEXT:    shll %cl, %eax
379; X86-NOBMI-NEXT:    testb $32, %cl
380; X86-NOBMI-NEXT:    je .LBB6_2
381; X86-NOBMI-NEXT:  # %bb.1:
382; X86-NOBMI-NEXT:    movl %eax, %edx
383; X86-NOBMI-NEXT:    xorl %eax, %eax
384; X86-NOBMI-NEXT:  .LBB6_2:
385; X86-NOBMI-NEXT:    addl $-1, %eax
386; X86-NOBMI-NEXT:    adcl $-1, %edx
387; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
388; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
389; X86-NOBMI-NEXT:    retl
390;
391; X86-BMI1-LABEL: bzhi64_a1_indexzext:
392; X86-BMI1:       # %bb.0:
393; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
394; X86-BMI1-NEXT:    movl $1, %eax
395; X86-BMI1-NEXT:    xorl %edx, %edx
396; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
397; X86-BMI1-NEXT:    shll %cl, %eax
398; X86-BMI1-NEXT:    testb $32, %cl
399; X86-BMI1-NEXT:    je .LBB6_2
400; X86-BMI1-NEXT:  # %bb.1:
401; X86-BMI1-NEXT:    movl %eax, %edx
402; X86-BMI1-NEXT:    xorl %eax, %eax
403; X86-BMI1-NEXT:  .LBB6_2:
404; X86-BMI1-NEXT:    addl $-1, %eax
405; X86-BMI1-NEXT:    adcl $-1, %edx
406; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
407; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
408; X86-BMI1-NEXT:    retl
409;
410; X86-BMI2-LABEL: bzhi64_a1_indexzext:
411; X86-BMI2:       # %bb.0:
412; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
413; X86-BMI2-NEXT:    movl $1, %eax
414; X86-BMI2-NEXT:    xorl %edx, %edx
415; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
416; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
417; X86-BMI2-NEXT:    testb $32, %cl
418; X86-BMI2-NEXT:    je .LBB6_2
419; X86-BMI2-NEXT:  # %bb.1:
420; X86-BMI2-NEXT:    movl %eax, %edx
421; X86-BMI2-NEXT:    xorl %eax, %eax
422; X86-BMI2-NEXT:  .LBB6_2:
423; X86-BMI2-NEXT:    addl $-1, %eax
424; X86-BMI2-NEXT:    adcl $-1, %edx
425; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
426; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
427; X86-BMI2-NEXT:    retl
428;
429; X64-NOBMI-LABEL: bzhi64_a1_indexzext:
430; X64-NOBMI:       # %bb.0:
431; X64-NOBMI-NEXT:    movl %esi, %ecx
432; X64-NOBMI-NEXT:    movl $1, %eax
433; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
434; X64-NOBMI-NEXT:    shlq %cl, %rax
435; X64-NOBMI-NEXT:    decq %rax
436; X64-NOBMI-NEXT:    andq %rdi, %rax
437; X64-NOBMI-NEXT:    retq
438;
439; X64-BMI1-LABEL: bzhi64_a1_indexzext:
440; X64-BMI1:       # %bb.0:
441; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
442; X64-BMI1-NEXT:    shll $8, %esi
443; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
444; X64-BMI1-NEXT:    retq
445;
446; X64-BMI2-LABEL: bzhi64_a1_indexzext:
447; X64-BMI2:       # %bb.0:
448; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
449; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
450; X64-BMI2-NEXT:    retq
451  %conv = zext i8 %numlowbits to i64
452  %onebit = shl i64 1, %conv
453  %mask = add nsw i64 %onebit, -1
454  %masked = and i64 %mask, %val
455  ret i64 %masked
456}
457
458define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind {
459; X86-NOBMI-LABEL: bzhi64_a2_load:
460; X86-NOBMI:       # %bb.0:
461; X86-NOBMI-NEXT:    pushl %esi
462; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
463; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
464; X86-NOBMI-NEXT:    movl $1, %eax
465; X86-NOBMI-NEXT:    xorl %edx, %edx
466; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
467; X86-NOBMI-NEXT:    shll %cl, %eax
468; X86-NOBMI-NEXT:    testb $32, %cl
469; X86-NOBMI-NEXT:    je .LBB7_2
470; X86-NOBMI-NEXT:  # %bb.1:
471; X86-NOBMI-NEXT:    movl %eax, %edx
472; X86-NOBMI-NEXT:    xorl %eax, %eax
473; X86-NOBMI-NEXT:  .LBB7_2:
474; X86-NOBMI-NEXT:    addl $-1, %eax
475; X86-NOBMI-NEXT:    adcl $-1, %edx
476; X86-NOBMI-NEXT:    andl 4(%esi), %edx
477; X86-NOBMI-NEXT:    andl (%esi), %eax
478; X86-NOBMI-NEXT:    popl %esi
479; X86-NOBMI-NEXT:    retl
480;
481; X86-BMI1-LABEL: bzhi64_a2_load:
482; X86-BMI1:       # %bb.0:
483; X86-BMI1-NEXT:    pushl %esi
484; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
485; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
486; X86-BMI1-NEXT:    movl $1, %eax
487; X86-BMI1-NEXT:    xorl %edx, %edx
488; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
489; X86-BMI1-NEXT:    shll %cl, %eax
490; X86-BMI1-NEXT:    testb $32, %cl
491; X86-BMI1-NEXT:    je .LBB7_2
492; X86-BMI1-NEXT:  # %bb.1:
493; X86-BMI1-NEXT:    movl %eax, %edx
494; X86-BMI1-NEXT:    xorl %eax, %eax
495; X86-BMI1-NEXT:  .LBB7_2:
496; X86-BMI1-NEXT:    addl $-1, %eax
497; X86-BMI1-NEXT:    adcl $-1, %edx
498; X86-BMI1-NEXT:    andl 4(%esi), %edx
499; X86-BMI1-NEXT:    andl (%esi), %eax
500; X86-BMI1-NEXT:    popl %esi
501; X86-BMI1-NEXT:    retl
502;
503; X86-BMI2-LABEL: bzhi64_a2_load:
504; X86-BMI2:       # %bb.0:
505; X86-BMI2-NEXT:    pushl %esi
506; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
507; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
508; X86-BMI2-NEXT:    movl $1, %eax
509; X86-BMI2-NEXT:    xorl %edx, %edx
510; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
511; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
512; X86-BMI2-NEXT:    testb $32, %cl
513; X86-BMI2-NEXT:    je .LBB7_2
514; X86-BMI2-NEXT:  # %bb.1:
515; X86-BMI2-NEXT:    movl %eax, %edx
516; X86-BMI2-NEXT:    xorl %eax, %eax
517; X86-BMI2-NEXT:  .LBB7_2:
518; X86-BMI2-NEXT:    addl $-1, %eax
519; X86-BMI2-NEXT:    adcl $-1, %edx
520; X86-BMI2-NEXT:    andl 4(%esi), %edx
521; X86-BMI2-NEXT:    andl (%esi), %eax
522; X86-BMI2-NEXT:    popl %esi
523; X86-BMI2-NEXT:    retl
524;
525; X64-NOBMI-LABEL: bzhi64_a2_load:
526; X64-NOBMI:       # %bb.0:
527; X64-NOBMI-NEXT:    movq %rsi, %rcx
528; X64-NOBMI-NEXT:    movl $1, %eax
529; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
530; X64-NOBMI-NEXT:    shlq %cl, %rax
531; X64-NOBMI-NEXT:    decq %rax
532; X64-NOBMI-NEXT:    andq (%rdi), %rax
533; X64-NOBMI-NEXT:    retq
534;
535; X64-BMI1-LABEL: bzhi64_a2_load:
536; X64-BMI1:       # %bb.0:
537; X64-BMI1-NEXT:    shll $8, %esi
538; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
539; X64-BMI1-NEXT:    retq
540;
541; X64-BMI2-LABEL: bzhi64_a2_load:
542; X64-BMI2:       # %bb.0:
543; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
544; X64-BMI2-NEXT:    retq
545  %val = load i64, ptr %w
546  %onebit = shl i64 1, %numlowbits
547  %mask = add nsw i64 %onebit, -1
548  %masked = and i64 %mask, %val
549  ret i64 %masked
550}
551
552define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
553; X86-NOBMI-LABEL: bzhi64_a3_load_indexzext:
554; X86-NOBMI:       # %bb.0:
555; X86-NOBMI-NEXT:    pushl %esi
556; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
557; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
558; X86-NOBMI-NEXT:    movl $1, %eax
559; X86-NOBMI-NEXT:    xorl %edx, %edx
560; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
561; X86-NOBMI-NEXT:    shll %cl, %eax
562; X86-NOBMI-NEXT:    testb $32, %cl
563; X86-NOBMI-NEXT:    je .LBB8_2
564; X86-NOBMI-NEXT:  # %bb.1:
565; X86-NOBMI-NEXT:    movl %eax, %edx
566; X86-NOBMI-NEXT:    xorl %eax, %eax
567; X86-NOBMI-NEXT:  .LBB8_2:
568; X86-NOBMI-NEXT:    addl $-1, %eax
569; X86-NOBMI-NEXT:    adcl $-1, %edx
570; X86-NOBMI-NEXT:    andl 4(%esi), %edx
571; X86-NOBMI-NEXT:    andl (%esi), %eax
572; X86-NOBMI-NEXT:    popl %esi
573; X86-NOBMI-NEXT:    retl
574;
575; X86-BMI1-LABEL: bzhi64_a3_load_indexzext:
576; X86-BMI1:       # %bb.0:
577; X86-BMI1-NEXT:    pushl %esi
578; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
579; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
580; X86-BMI1-NEXT:    movl $1, %eax
581; X86-BMI1-NEXT:    xorl %edx, %edx
582; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
583; X86-BMI1-NEXT:    shll %cl, %eax
584; X86-BMI1-NEXT:    testb $32, %cl
585; X86-BMI1-NEXT:    je .LBB8_2
586; X86-BMI1-NEXT:  # %bb.1:
587; X86-BMI1-NEXT:    movl %eax, %edx
588; X86-BMI1-NEXT:    xorl %eax, %eax
589; X86-BMI1-NEXT:  .LBB8_2:
590; X86-BMI1-NEXT:    addl $-1, %eax
591; X86-BMI1-NEXT:    adcl $-1, %edx
592; X86-BMI1-NEXT:    andl 4(%esi), %edx
593; X86-BMI1-NEXT:    andl (%esi), %eax
594; X86-BMI1-NEXT:    popl %esi
595; X86-BMI1-NEXT:    retl
596;
597; X86-BMI2-LABEL: bzhi64_a3_load_indexzext:
598; X86-BMI2:       # %bb.0:
599; X86-BMI2-NEXT:    pushl %esi
600; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
601; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
602; X86-BMI2-NEXT:    movl $1, %eax
603; X86-BMI2-NEXT:    xorl %edx, %edx
604; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
605; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
606; X86-BMI2-NEXT:    testb $32, %cl
607; X86-BMI2-NEXT:    je .LBB8_2
608; X86-BMI2-NEXT:  # %bb.1:
609; X86-BMI2-NEXT:    movl %eax, %edx
610; X86-BMI2-NEXT:    xorl %eax, %eax
611; X86-BMI2-NEXT:  .LBB8_2:
612; X86-BMI2-NEXT:    addl $-1, %eax
613; X86-BMI2-NEXT:    adcl $-1, %edx
614; X86-BMI2-NEXT:    andl 4(%esi), %edx
615; X86-BMI2-NEXT:    andl (%esi), %eax
616; X86-BMI2-NEXT:    popl %esi
617; X86-BMI2-NEXT:    retl
618;
619; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext:
620; X64-NOBMI:       # %bb.0:
621; X64-NOBMI-NEXT:    movl %esi, %ecx
622; X64-NOBMI-NEXT:    movl $1, %eax
623; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
624; X64-NOBMI-NEXT:    shlq %cl, %rax
625; X64-NOBMI-NEXT:    decq %rax
626; X64-NOBMI-NEXT:    andq (%rdi), %rax
627; X64-NOBMI-NEXT:    retq
628;
629; X64-BMI1-LABEL: bzhi64_a3_load_indexzext:
630; X64-BMI1:       # %bb.0:
631; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
632; X64-BMI1-NEXT:    shll $8, %esi
633; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
634; X64-BMI1-NEXT:    retq
635;
636; X64-BMI2-LABEL: bzhi64_a3_load_indexzext:
637; X64-BMI2:       # %bb.0:
638; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
639; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
640; X64-BMI2-NEXT:    retq
641  %val = load i64, ptr %w
642  %conv = zext i8 %numlowbits to i64
643  %onebit = shl i64 1, %conv
644  %mask = add nsw i64 %onebit, -1
645  %masked = and i64 %mask, %val
646  ret i64 %masked
647}
648
649define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
650; X86-NOBMI-LABEL: bzhi64_a4_commutative:
651; X86-NOBMI:       # %bb.0:
652; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
653; X86-NOBMI-NEXT:    movl $1, %eax
654; X86-NOBMI-NEXT:    xorl %edx, %edx
655; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
656; X86-NOBMI-NEXT:    shll %cl, %eax
657; X86-NOBMI-NEXT:    testb $32, %cl
658; X86-NOBMI-NEXT:    je .LBB9_2
659; X86-NOBMI-NEXT:  # %bb.1:
660; X86-NOBMI-NEXT:    movl %eax, %edx
661; X86-NOBMI-NEXT:    xorl %eax, %eax
662; X86-NOBMI-NEXT:  .LBB9_2:
663; X86-NOBMI-NEXT:    addl $-1, %eax
664; X86-NOBMI-NEXT:    adcl $-1, %edx
665; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
666; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
667; X86-NOBMI-NEXT:    retl
668;
669; X86-BMI1-LABEL: bzhi64_a4_commutative:
670; X86-BMI1:       # %bb.0:
671; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
672; X86-BMI1-NEXT:    movl $1, %eax
673; X86-BMI1-NEXT:    xorl %edx, %edx
674; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
675; X86-BMI1-NEXT:    shll %cl, %eax
676; X86-BMI1-NEXT:    testb $32, %cl
677; X86-BMI1-NEXT:    je .LBB9_2
678; X86-BMI1-NEXT:  # %bb.1:
679; X86-BMI1-NEXT:    movl %eax, %edx
680; X86-BMI1-NEXT:    xorl %eax, %eax
681; X86-BMI1-NEXT:  .LBB9_2:
682; X86-BMI1-NEXT:    addl $-1, %eax
683; X86-BMI1-NEXT:    adcl $-1, %edx
684; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
685; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
686; X86-BMI1-NEXT:    retl
687;
688; X86-BMI2-LABEL: bzhi64_a4_commutative:
689; X86-BMI2:       # %bb.0:
690; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
691; X86-BMI2-NEXT:    movl $1, %eax
692; X86-BMI2-NEXT:    xorl %edx, %edx
693; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
694; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
695; X86-BMI2-NEXT:    testb $32, %cl
696; X86-BMI2-NEXT:    je .LBB9_2
697; X86-BMI2-NEXT:  # %bb.1:
698; X86-BMI2-NEXT:    movl %eax, %edx
699; X86-BMI2-NEXT:    xorl %eax, %eax
700; X86-BMI2-NEXT:  .LBB9_2:
701; X86-BMI2-NEXT:    addl $-1, %eax
702; X86-BMI2-NEXT:    adcl $-1, %edx
703; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
704; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
705; X86-BMI2-NEXT:    retl
706;
707; X64-NOBMI-LABEL: bzhi64_a4_commutative:
708; X64-NOBMI:       # %bb.0:
709; X64-NOBMI-NEXT:    movq %rsi, %rcx
710; X64-NOBMI-NEXT:    movl $1, %eax
711; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
712; X64-NOBMI-NEXT:    shlq %cl, %rax
713; X64-NOBMI-NEXT:    decq %rax
714; X64-NOBMI-NEXT:    andq %rdi, %rax
715; X64-NOBMI-NEXT:    retq
716;
717; X64-BMI1-LABEL: bzhi64_a4_commutative:
718; X64-BMI1:       # %bb.0:
719; X64-BMI1-NEXT:    shll $8, %esi
720; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
721; X64-BMI1-NEXT:    retq
722;
723; X64-BMI2-LABEL: bzhi64_a4_commutative:
724; X64-BMI2:       # %bb.0:
725; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
726; X64-BMI2-NEXT:    retq
727  %onebit = shl i64 1, %numlowbits
728  %mask = add nsw i64 %onebit, -1
729  %masked = and i64 %val, %mask ; swapped order
730  ret i64 %masked
731}
732
733; 64-bit, but with 32-bit output
734
735; Everything done in 64-bit, truncation happens last.
736define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
737; X86-NOBMI-LABEL: bzhi64_32_a0:
738; X86-NOBMI:       # %bb.0:
739; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
740; X86-NOBMI-NEXT:    movl $1, %edx
741; X86-NOBMI-NEXT:    shll %cl, %edx
742; X86-NOBMI-NEXT:    xorl %eax, %eax
743; X86-NOBMI-NEXT:    testb $32, %cl
744; X86-NOBMI-NEXT:    jne .LBB10_2
745; X86-NOBMI-NEXT:  # %bb.1:
746; X86-NOBMI-NEXT:    movl %edx, %eax
747; X86-NOBMI-NEXT:  .LBB10_2:
748; X86-NOBMI-NEXT:    decl %eax
749; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
750; X86-NOBMI-NEXT:    retl
751;
752; X86-BMI1-LABEL: bzhi64_32_a0:
753; X86-BMI1:       # %bb.0:
754; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
755; X86-BMI1-NEXT:    movl $1, %edx
756; X86-BMI1-NEXT:    shll %cl, %edx
757; X86-BMI1-NEXT:    xorl %eax, %eax
758; X86-BMI1-NEXT:    testb $32, %cl
759; X86-BMI1-NEXT:    jne .LBB10_2
760; X86-BMI1-NEXT:  # %bb.1:
761; X86-BMI1-NEXT:    movl %edx, %eax
762; X86-BMI1-NEXT:  .LBB10_2:
763; X86-BMI1-NEXT:    decl %eax
764; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
765; X86-BMI1-NEXT:    retl
766;
767; X86-BMI2-LABEL: bzhi64_32_a0:
768; X86-BMI2:       # %bb.0:
769; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
770; X86-BMI2-NEXT:    xorl %eax, %eax
771; X86-BMI2-NEXT:    testb $32, %cl
772; X86-BMI2-NEXT:    jne .LBB10_2
773; X86-BMI2-NEXT:  # %bb.1:
774; X86-BMI2-NEXT:    movl $1, %eax
775; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
776; X86-BMI2-NEXT:  .LBB10_2:
777; X86-BMI2-NEXT:    decl %eax
778; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
779; X86-BMI2-NEXT:    retl
780;
781; X64-NOBMI-LABEL: bzhi64_32_a0:
782; X64-NOBMI:       # %bb.0:
783; X64-NOBMI-NEXT:    movq %rsi, %rcx
784; X64-NOBMI-NEXT:    movl $1, %eax
785; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
786; X64-NOBMI-NEXT:    shlq %cl, %rax
787; X64-NOBMI-NEXT:    decl %eax
788; X64-NOBMI-NEXT:    andl %edi, %eax
789; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
790; X64-NOBMI-NEXT:    retq
791;
792; X64-BMI1-LABEL: bzhi64_32_a0:
793; X64-BMI1:       # %bb.0:
794; X64-BMI1-NEXT:    shll $8, %esi
795; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
796; X64-BMI1-NEXT:    retq
797;
798; X64-BMI2-LABEL: bzhi64_32_a0:
799; X64-BMI2:       # %bb.0:
800; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
801; X64-BMI2-NEXT:    retq
802  %onebit = shl i64 1, %numlowbits
803  %mask = add nsw i64 %onebit, -1
804  %masked = and i64 %mask, %val
805  %res = trunc i64 %masked to i32
806  ret i32 %res
807}
808
809; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
810define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind {
811; X86-NOBMI-LABEL: bzhi64_32_a1:
812; X86-NOBMI:       # %bb.0:
813; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
814; X86-NOBMI-NEXT:    movl $1, %eax
815; X86-NOBMI-NEXT:    shll %cl, %eax
816; X86-NOBMI-NEXT:    decl %eax
817; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
818; X86-NOBMI-NEXT:    retl
819;
820; X86-BMI1-LABEL: bzhi64_32_a1:
821; X86-BMI1:       # %bb.0:
822; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
823; X86-BMI1-NEXT:    shll $8, %eax
824; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
825; X86-BMI1-NEXT:    retl
826;
827; X86-BMI2-LABEL: bzhi64_32_a1:
828; X86-BMI2:       # %bb.0:
829; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
830; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
831; X86-BMI2-NEXT:    retl
832;
833; X64-NOBMI-LABEL: bzhi64_32_a1:
834; X64-NOBMI:       # %bb.0:
835; X64-NOBMI-NEXT:    movl %esi, %ecx
836; X64-NOBMI-NEXT:    movl $1, %eax
837; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
838; X64-NOBMI-NEXT:    shll %cl, %eax
839; X64-NOBMI-NEXT:    decl %eax
840; X64-NOBMI-NEXT:    andl %edi, %eax
841; X64-NOBMI-NEXT:    retq
842;
843; X64-BMI1-LABEL: bzhi64_32_a1:
844; X64-BMI1:       # %bb.0:
845; X64-BMI1-NEXT:    shll $8, %esi
846; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
847; X64-BMI1-NEXT:    retq
848;
849; X64-BMI2-LABEL: bzhi64_32_a1:
850; X64-BMI2:       # %bb.0:
851; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
852; X64-BMI2-NEXT:    retq
853  %truncval = trunc i64 %val to i32
854  %onebit = shl i32 1, %numlowbits
855  %mask = add nsw i32 %onebit, -1
856  %masked = and i32 %mask, %truncval
857  ret i32 %masked
858}
859
860; Shifting happens in 64-bit, then truncation (with extra use).
861; Masking is 32-bit.
862define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, ptr %escape) nounwind {
863; X86-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause:
864; X86-NOBMI:       # %bb.0:
865; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
866; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
867; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
868; X86-NOBMI-NEXT:    movl %edx, (%eax)
869; X86-NOBMI-NEXT:    movl $1, %eax
870; X86-NOBMI-NEXT:    shll %cl, %eax
871; X86-NOBMI-NEXT:    decl %eax
872; X86-NOBMI-NEXT:    andl %edx, %eax
873; X86-NOBMI-NEXT:    retl
874;
875; X86-BMI1-LABEL: bzhi64_32_a1_trunc_extrause:
876; X86-BMI1:       # %bb.0:
877; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
878; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
879; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
880; X86-BMI1-NEXT:    movl %ecx, (%edx)
881; X86-BMI1-NEXT:    shll $8, %eax
882; X86-BMI1-NEXT:    bextrl %eax, %ecx, %eax
883; X86-BMI1-NEXT:    retl
884;
885; X86-BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
886; X86-BMI2:       # %bb.0:
887; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
888; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
889; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
890; X86-BMI2-NEXT:    movl %ecx, (%edx)
891; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
892; X86-BMI2-NEXT:    retl
893;
894; X64-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause:
895; X64-NOBMI:       # %bb.0:
896; X64-NOBMI-NEXT:    movl %esi, %ecx
897; X64-NOBMI-NEXT:    movl %edi, (%rdx)
898; X64-NOBMI-NEXT:    movl $1, %eax
899; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
900; X64-NOBMI-NEXT:    shll %cl, %eax
901; X64-NOBMI-NEXT:    decl %eax
902; X64-NOBMI-NEXT:    andl %edi, %eax
903; X64-NOBMI-NEXT:    retq
904;
905; X64-BMI1-LABEL: bzhi64_32_a1_trunc_extrause:
906; X64-BMI1:       # %bb.0:
907; X64-BMI1-NEXT:    movl %edi, (%rdx)
908; X64-BMI1-NEXT:    shll $8, %esi
909; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
910; X64-BMI1-NEXT:    retq
911;
912; X64-BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
913; X64-BMI2:       # %bb.0:
914; X64-BMI2-NEXT:    movl %edi, (%rdx)
915; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
916; X64-BMI2-NEXT:    retq
917  %truncval = trunc i64 %val to i32
918  store i32 %truncval, ptr %escape
919  %onebit = shl i32 1, %numlowbits
920  %mask = add nsw i32 %onebit, -1
921  %masked = and i32 %mask, %truncval
922  ret i32 %masked
923}
924
925; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
926; Masking is 64-bit. Then truncation.
927define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind {
928; X86-NOBMI-LABEL: bzhi64_32_a2:
929; X86-NOBMI:       # %bb.0:
930; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
931; X86-NOBMI-NEXT:    movl $1, %eax
932; X86-NOBMI-NEXT:    shll %cl, %eax
933; X86-NOBMI-NEXT:    decl %eax
934; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
935; X86-NOBMI-NEXT:    retl
936;
937; X86-BMI1-LABEL: bzhi64_32_a2:
938; X86-BMI1:       # %bb.0:
939; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
940; X86-BMI1-NEXT:    shll $8, %eax
941; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
942; X86-BMI1-NEXT:    retl
943;
944; X86-BMI2-LABEL: bzhi64_32_a2:
945; X86-BMI2:       # %bb.0:
946; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
947; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
948; X86-BMI2-NEXT:    retl
949;
950; X64-NOBMI-LABEL: bzhi64_32_a2:
951; X64-NOBMI:       # %bb.0:
952; X64-NOBMI-NEXT:    movl %esi, %ecx
953; X64-NOBMI-NEXT:    movl $1, %eax
954; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
955; X64-NOBMI-NEXT:    shll %cl, %eax
956; X64-NOBMI-NEXT:    decl %eax
957; X64-NOBMI-NEXT:    andl %edi, %eax
958; X64-NOBMI-NEXT:    retq
959;
960; X64-BMI1-LABEL: bzhi64_32_a2:
961; X64-BMI1:       # %bb.0:
962; X64-BMI1-NEXT:    shll $8, %esi
963; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
964; X64-BMI1-NEXT:    retq
965;
966; X64-BMI2-LABEL: bzhi64_32_a2:
967; X64-BMI2:       # %bb.0:
968; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
969; X64-BMI2-NEXT:    retq
970  %onebit = shl i32 1, %numlowbits
971  %mask = add nsw i32 %onebit, -1
972  %zextmask = zext i32 %mask to i64
973  %masked = and i64 %zextmask, %val
974  %truncmasked = trunc i64 %masked to i32
975  ret i32 %truncmasked
976}
977
978; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
979; Masking is 64-bit. Then truncation.
980define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
981; X86-NOBMI-LABEL: bzhi64_32_a3:
982; X86-NOBMI:       # %bb.0:
983; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
984; X86-NOBMI-NEXT:    movl $1, %edx
985; X86-NOBMI-NEXT:    shll %cl, %edx
986; X86-NOBMI-NEXT:    xorl %eax, %eax
987; X86-NOBMI-NEXT:    testb $32, %cl
988; X86-NOBMI-NEXT:    jne .LBB14_2
989; X86-NOBMI-NEXT:  # %bb.1:
990; X86-NOBMI-NEXT:    movl %edx, %eax
991; X86-NOBMI-NEXT:  .LBB14_2:
992; X86-NOBMI-NEXT:    decl %eax
993; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
994; X86-NOBMI-NEXT:    retl
995;
996; X86-BMI1-LABEL: bzhi64_32_a3:
997; X86-BMI1:       # %bb.0:
998; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
999; X86-BMI1-NEXT:    movl $1, %edx
1000; X86-BMI1-NEXT:    shll %cl, %edx
1001; X86-BMI1-NEXT:    xorl %eax, %eax
1002; X86-BMI1-NEXT:    testb $32, %cl
1003; X86-BMI1-NEXT:    jne .LBB14_2
1004; X86-BMI1-NEXT:  # %bb.1:
1005; X86-BMI1-NEXT:    movl %edx, %eax
1006; X86-BMI1-NEXT:  .LBB14_2:
1007; X86-BMI1-NEXT:    decl %eax
1008; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
1009; X86-BMI1-NEXT:    retl
1010;
1011; X86-BMI2-LABEL: bzhi64_32_a3:
1012; X86-BMI2:       # %bb.0:
1013; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1014; X86-BMI2-NEXT:    xorl %eax, %eax
1015; X86-BMI2-NEXT:    testb $32, %cl
1016; X86-BMI2-NEXT:    jne .LBB14_2
1017; X86-BMI2-NEXT:  # %bb.1:
1018; X86-BMI2-NEXT:    movl $1, %eax
1019; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
1020; X86-BMI2-NEXT:  .LBB14_2:
1021; X86-BMI2-NEXT:    decl %eax
1022; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1023; X86-BMI2-NEXT:    retl
1024;
1025; X64-NOBMI-LABEL: bzhi64_32_a3:
1026; X64-NOBMI:       # %bb.0:
1027; X64-NOBMI-NEXT:    movq %rsi, %rcx
1028; X64-NOBMI-NEXT:    movl $1, %eax
1029; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
1030; X64-NOBMI-NEXT:    shlq %cl, %rax
1031; X64-NOBMI-NEXT:    decl %eax
1032; X64-NOBMI-NEXT:    andl %edi, %eax
1033; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
1034; X64-NOBMI-NEXT:    retq
1035;
1036; X64-BMI1-LABEL: bzhi64_32_a3:
1037; X64-BMI1:       # %bb.0:
1038; X64-BMI1-NEXT:    shll $8, %esi
1039; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1040; X64-BMI1-NEXT:    retq
1041;
1042; X64-BMI2-LABEL: bzhi64_32_a3:
1043; X64-BMI2:       # %bb.0:
1044; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1045; X64-BMI2-NEXT:    retq
1046  %onebit = shl i64 1, %numlowbits
1047  %mask = add nsw i64 %onebit, 4294967295
1048  %masked = and i64 %mask, %val
1049  %truncmasked = trunc i64 %masked to i32
1050  ret i32 %truncmasked
1051}
1052
1053; ---------------------------------------------------------------------------- ;
1054; Pattern b. 32-bit
1055; ---------------------------------------------------------------------------- ;
1056
1057define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
1058; X86-NOBMI-LABEL: bzhi32_b0:
1059; X86-NOBMI:       # %bb.0:
1060; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1061; X86-NOBMI-NEXT:    movl $-1, %eax
1062; X86-NOBMI-NEXT:    shll %cl, %eax
1063; X86-NOBMI-NEXT:    notl %eax
1064; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1065; X86-NOBMI-NEXT:    retl
1066;
1067; X86-BMI1-LABEL: bzhi32_b0:
1068; X86-BMI1:       # %bb.0:
1069; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1070; X86-BMI1-NEXT:    shll $8, %eax
1071; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1072; X86-BMI1-NEXT:    retl
1073;
1074; X86-BMI2-LABEL: bzhi32_b0:
1075; X86-BMI2:       # %bb.0:
1076; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1077; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1078; X86-BMI2-NEXT:    retl
1079;
1080; X64-NOBMI-LABEL: bzhi32_b0:
1081; X64-NOBMI:       # %bb.0:
1082; X64-NOBMI-NEXT:    movl %esi, %ecx
1083; X64-NOBMI-NEXT:    movl $-1, %eax
1084; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1085; X64-NOBMI-NEXT:    shll %cl, %eax
1086; X64-NOBMI-NEXT:    notl %eax
1087; X64-NOBMI-NEXT:    andl %edi, %eax
1088; X64-NOBMI-NEXT:    retq
1089;
1090; X64-BMI1-LABEL: bzhi32_b0:
1091; X64-BMI1:       # %bb.0:
1092; X64-BMI1-NEXT:    shll $8, %esi
1093; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1094; X64-BMI1-NEXT:    retq
1095;
1096; X64-BMI2-LABEL: bzhi32_b0:
1097; X64-BMI2:       # %bb.0:
1098; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1099; X64-BMI2-NEXT:    retq
1100  %notmask = shl i32 -1, %numlowbits
1101  %mask = xor i32 %notmask, -1
1102  %masked = and i32 %mask, %val
1103  ret i32 %masked
1104}
1105
1106define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
1107; X86-NOBMI-LABEL: bzhi32_b1_indexzext:
1108; X86-NOBMI:       # %bb.0:
1109; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1110; X86-NOBMI-NEXT:    movl $-1, %eax
1111; X86-NOBMI-NEXT:    shll %cl, %eax
1112; X86-NOBMI-NEXT:    notl %eax
1113; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1114; X86-NOBMI-NEXT:    retl
1115;
1116; X86-BMI1-LABEL: bzhi32_b1_indexzext:
1117; X86-BMI1:       # %bb.0:
1118; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1119; X86-BMI1-NEXT:    shll $8, %eax
1120; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1121; X86-BMI1-NEXT:    retl
1122;
1123; X86-BMI2-LABEL: bzhi32_b1_indexzext:
1124; X86-BMI2:       # %bb.0:
1125; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1126; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1127; X86-BMI2-NEXT:    retl
1128;
1129; X64-NOBMI-LABEL: bzhi32_b1_indexzext:
1130; X64-NOBMI:       # %bb.0:
1131; X64-NOBMI-NEXT:    movl %esi, %ecx
1132; X64-NOBMI-NEXT:    movl $-1, %eax
1133; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1134; X64-NOBMI-NEXT:    shll %cl, %eax
1135; X64-NOBMI-NEXT:    notl %eax
1136; X64-NOBMI-NEXT:    andl %edi, %eax
1137; X64-NOBMI-NEXT:    retq
1138;
1139; X64-BMI1-LABEL: bzhi32_b1_indexzext:
1140; X64-BMI1:       # %bb.0:
1141; X64-BMI1-NEXT:    shll $8, %esi
1142; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1143; X64-BMI1-NEXT:    retq
1144;
1145; X64-BMI2-LABEL: bzhi32_b1_indexzext:
1146; X64-BMI2:       # %bb.0:
1147; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1148; X64-BMI2-NEXT:    retq
1149  %conv = zext i8 %numlowbits to i32
1150  %notmask = shl i32 -1, %conv
1151  %mask = xor i32 %notmask, -1
1152  %masked = and i32 %mask, %val
1153  ret i32 %masked
1154}
1155
1156define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind {
1157; X86-NOBMI-LABEL: bzhi32_b2_load:
1158; X86-NOBMI:       # %bb.0:
1159; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
1160; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1161; X86-NOBMI-NEXT:    movl $-1, %eax
1162; X86-NOBMI-NEXT:    shll %cl, %eax
1163; X86-NOBMI-NEXT:    notl %eax
1164; X86-NOBMI-NEXT:    andl (%edx), %eax
1165; X86-NOBMI-NEXT:    retl
1166;
1167; X86-BMI1-LABEL: bzhi32_b2_load:
1168; X86-BMI1:       # %bb.0:
1169; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
1170; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1171; X86-BMI1-NEXT:    shll $8, %ecx
1172; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
1173; X86-BMI1-NEXT:    retl
1174;
1175; X86-BMI2-LABEL: bzhi32_b2_load:
1176; X86-BMI2:       # %bb.0:
1177; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1178; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1179; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
1180; X86-BMI2-NEXT:    retl
1181;
1182; X64-NOBMI-LABEL: bzhi32_b2_load:
1183; X64-NOBMI:       # %bb.0:
1184; X64-NOBMI-NEXT:    movl %esi, %ecx
1185; X64-NOBMI-NEXT:    movl $-1, %eax
1186; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1187; X64-NOBMI-NEXT:    shll %cl, %eax
1188; X64-NOBMI-NEXT:    notl %eax
1189; X64-NOBMI-NEXT:    andl (%rdi), %eax
1190; X64-NOBMI-NEXT:    retq
1191;
1192; X64-BMI1-LABEL: bzhi32_b2_load:
1193; X64-BMI1:       # %bb.0:
1194; X64-BMI1-NEXT:    shll $8, %esi
1195; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
1196; X64-BMI1-NEXT:    retq
1197;
1198; X64-BMI2-LABEL: bzhi32_b2_load:
1199; X64-BMI2:       # %bb.0:
1200; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
1201; X64-BMI2-NEXT:    retq
1202  %val = load i32, ptr %w
1203  %notmask = shl i32 -1, %numlowbits
1204  %mask = xor i32 %notmask, -1
1205  %masked = and i32 %mask, %val
1206  ret i32 %masked
1207}
1208
1209define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
1210; X86-NOBMI-LABEL: bzhi32_b3_load_indexzext:
1211; X86-NOBMI:       # %bb.0:
1212; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
1213; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1214; X86-NOBMI-NEXT:    movl $-1, %eax
1215; X86-NOBMI-NEXT:    shll %cl, %eax
1216; X86-NOBMI-NEXT:    notl %eax
1217; X86-NOBMI-NEXT:    andl (%edx), %eax
1218; X86-NOBMI-NEXT:    retl
1219;
1220; X86-BMI1-LABEL: bzhi32_b3_load_indexzext:
1221; X86-BMI1:       # %bb.0:
1222; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
1223; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1224; X86-BMI1-NEXT:    shll $8, %ecx
1225; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
1226; X86-BMI1-NEXT:    retl
1227;
1228; X86-BMI2-LABEL: bzhi32_b3_load_indexzext:
1229; X86-BMI2:       # %bb.0:
1230; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1231; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1232; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
1233; X86-BMI2-NEXT:    retl
1234;
1235; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext:
1236; X64-NOBMI:       # %bb.0:
1237; X64-NOBMI-NEXT:    movl %esi, %ecx
1238; X64-NOBMI-NEXT:    movl $-1, %eax
1239; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1240; X64-NOBMI-NEXT:    shll %cl, %eax
1241; X64-NOBMI-NEXT:    notl %eax
1242; X64-NOBMI-NEXT:    andl (%rdi), %eax
1243; X64-NOBMI-NEXT:    retq
1244;
1245; X64-BMI1-LABEL: bzhi32_b3_load_indexzext:
1246; X64-BMI1:       # %bb.0:
1247; X64-BMI1-NEXT:    shll $8, %esi
1248; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
1249; X64-BMI1-NEXT:    retq
1250;
1251; X64-BMI2-LABEL: bzhi32_b3_load_indexzext:
1252; X64-BMI2:       # %bb.0:
1253; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
1254; X64-BMI2-NEXT:    retq
1255  %val = load i32, ptr %w
1256  %conv = zext i8 %numlowbits to i32
1257  %notmask = shl i32 -1, %conv
1258  %mask = xor i32 %notmask, -1
1259  %masked = and i32 %mask, %val
1260  ret i32 %masked
1261}
1262
1263define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
1264; X86-NOBMI-LABEL: bzhi32_b4_commutative:
1265; X86-NOBMI:       # %bb.0:
1266; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1267; X86-NOBMI-NEXT:    movl $-1, %eax
1268; X86-NOBMI-NEXT:    shll %cl, %eax
1269; X86-NOBMI-NEXT:    notl %eax
1270; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1271; X86-NOBMI-NEXT:    retl
1272;
1273; X86-BMI1-LABEL: bzhi32_b4_commutative:
1274; X86-BMI1:       # %bb.0:
1275; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1276; X86-BMI1-NEXT:    shll $8, %eax
1277; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1278; X86-BMI1-NEXT:    retl
1279;
1280; X86-BMI2-LABEL: bzhi32_b4_commutative:
1281; X86-BMI2:       # %bb.0:
1282; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1283; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1284; X86-BMI2-NEXT:    retl
1285;
1286; X64-NOBMI-LABEL: bzhi32_b4_commutative:
1287; X64-NOBMI:       # %bb.0:
1288; X64-NOBMI-NEXT:    movl %esi, %ecx
1289; X64-NOBMI-NEXT:    movl $-1, %eax
1290; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1291; X64-NOBMI-NEXT:    shll %cl, %eax
1292; X64-NOBMI-NEXT:    notl %eax
1293; X64-NOBMI-NEXT:    andl %edi, %eax
1294; X64-NOBMI-NEXT:    retq
1295;
1296; X64-BMI1-LABEL: bzhi32_b4_commutative:
1297; X64-BMI1:       # %bb.0:
1298; X64-BMI1-NEXT:    shll $8, %esi
1299; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1300; X64-BMI1-NEXT:    retq
1301;
1302; X64-BMI2-LABEL: bzhi32_b4_commutative:
1303; X64-BMI2:       # %bb.0:
1304; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1305; X64-BMI2-NEXT:    retq
1306  %notmask = shl i32 -1, %numlowbits
1307  %mask = xor i32 %notmask, -1
1308  %masked = and i32 %val, %mask ; swapped order
1309  ret i32 %masked
1310}
1311
1312; 64-bit
1313
1314define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
1315; X86-NOBMI-LABEL: bzhi64_b0:
1316; X86-NOBMI:       # %bb.0:
1317; X86-NOBMI-NEXT:    pushl %esi
1318; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1319; X86-NOBMI-NEXT:    movl $-1, %edx
1320; X86-NOBMI-NEXT:    movl $-1, %esi
1321; X86-NOBMI-NEXT:    shll %cl, %esi
1322; X86-NOBMI-NEXT:    xorl %eax, %eax
1323; X86-NOBMI-NEXT:    testb $32, %cl
1324; X86-NOBMI-NEXT:    jne .LBB20_1
1325; X86-NOBMI-NEXT:  # %bb.2:
1326; X86-NOBMI-NEXT:    movl %esi, %eax
1327; X86-NOBMI-NEXT:    jmp .LBB20_3
1328; X86-NOBMI-NEXT:  .LBB20_1:
1329; X86-NOBMI-NEXT:    movl %esi, %edx
1330; X86-NOBMI-NEXT:  .LBB20_3:
1331; X86-NOBMI-NEXT:    notl %edx
1332; X86-NOBMI-NEXT:    notl %eax
1333; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1334; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
1335; X86-NOBMI-NEXT:    popl %esi
1336; X86-NOBMI-NEXT:    retl
1337;
1338; X86-BMI1-LABEL: bzhi64_b0:
1339; X86-BMI1:       # %bb.0:
1340; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1341; X86-BMI1-NEXT:    movl $-1, %edx
1342; X86-BMI1-NEXT:    movl $-1, %eax
1343; X86-BMI1-NEXT:    shll %cl, %eax
1344; X86-BMI1-NEXT:    testb $32, %cl
1345; X86-BMI1-NEXT:    je .LBB20_2
1346; X86-BMI1-NEXT:  # %bb.1:
1347; X86-BMI1-NEXT:    movl %eax, %edx
1348; X86-BMI1-NEXT:    xorl %eax, %eax
1349; X86-BMI1-NEXT:  .LBB20_2:
1350; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1351; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
1352; X86-BMI1-NEXT:    retl
1353;
1354; X86-BMI2-LABEL: bzhi64_b0:
1355; X86-BMI2:       # %bb.0:
1356; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
1357; X86-BMI2-NEXT:    movl $-1, %ecx
1358; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
1359; X86-BMI2-NEXT:    testb $32, %dl
1360; X86-BMI2-NEXT:    je .LBB20_2
1361; X86-BMI2-NEXT:  # %bb.1:
1362; X86-BMI2-NEXT:    movl %eax, %ecx
1363; X86-BMI2-NEXT:    xorl %eax, %eax
1364; X86-BMI2-NEXT:  .LBB20_2:
1365; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1366; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
1367; X86-BMI2-NEXT:    retl
1368;
1369; X64-NOBMI-LABEL: bzhi64_b0:
1370; X64-NOBMI:       # %bb.0:
1371; X64-NOBMI-NEXT:    movq %rsi, %rcx
1372; X64-NOBMI-NEXT:    movq $-1, %rax
1373; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
1374; X64-NOBMI-NEXT:    shlq %cl, %rax
1375; X64-NOBMI-NEXT:    notq %rax
1376; X64-NOBMI-NEXT:    andq %rdi, %rax
1377; X64-NOBMI-NEXT:    retq
1378;
1379; X64-BMI1-LABEL: bzhi64_b0:
1380; X64-BMI1:       # %bb.0:
1381; X64-BMI1-NEXT:    shll $8, %esi
1382; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
1383; X64-BMI1-NEXT:    retq
1384;
1385; X64-BMI2-LABEL: bzhi64_b0:
1386; X64-BMI2:       # %bb.0:
1387; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1388; X64-BMI2-NEXT:    retq
1389  %notmask = shl i64 -1, %numlowbits
1390  %mask = xor i64 %notmask, -1
1391  %masked = and i64 %mask, %val
1392  ret i64 %masked
1393}
1394
1395define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
1396; X86-NOBMI-LABEL: bzhi64_b1_indexzext:
1397; X86-NOBMI:       # %bb.0:
1398; X86-NOBMI-NEXT:    pushl %esi
1399; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1400; X86-NOBMI-NEXT:    movl $-1, %edx
1401; X86-NOBMI-NEXT:    movl $-1, %esi
1402; X86-NOBMI-NEXT:    shll %cl, %esi
1403; X86-NOBMI-NEXT:    xorl %eax, %eax
1404; X86-NOBMI-NEXT:    testb $32, %cl
1405; X86-NOBMI-NEXT:    jne .LBB21_1
1406; X86-NOBMI-NEXT:  # %bb.2:
1407; X86-NOBMI-NEXT:    movl %esi, %eax
1408; X86-NOBMI-NEXT:    jmp .LBB21_3
1409; X86-NOBMI-NEXT:  .LBB21_1:
1410; X86-NOBMI-NEXT:    movl %esi, %edx
1411; X86-NOBMI-NEXT:  .LBB21_3:
1412; X86-NOBMI-NEXT:    notl %edx
1413; X86-NOBMI-NEXT:    notl %eax
1414; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1415; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
1416; X86-NOBMI-NEXT:    popl %esi
1417; X86-NOBMI-NEXT:    retl
1418;
1419; X86-BMI1-LABEL: bzhi64_b1_indexzext:
1420; X86-BMI1:       # %bb.0:
1421; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1422; X86-BMI1-NEXT:    movl $-1, %edx
1423; X86-BMI1-NEXT:    movl $-1, %eax
1424; X86-BMI1-NEXT:    shll %cl, %eax
1425; X86-BMI1-NEXT:    testb $32, %cl
1426; X86-BMI1-NEXT:    je .LBB21_2
1427; X86-BMI1-NEXT:  # %bb.1:
1428; X86-BMI1-NEXT:    movl %eax, %edx
1429; X86-BMI1-NEXT:    xorl %eax, %eax
1430; X86-BMI1-NEXT:  .LBB21_2:
1431; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1432; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
1433; X86-BMI1-NEXT:    retl
1434;
1435; X86-BMI2-LABEL: bzhi64_b1_indexzext:
1436; X86-BMI2:       # %bb.0:
1437; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
1438; X86-BMI2-NEXT:    movl $-1, %ecx
1439; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
1440; X86-BMI2-NEXT:    testb $32, %dl
1441; X86-BMI2-NEXT:    je .LBB21_2
1442; X86-BMI2-NEXT:  # %bb.1:
1443; X86-BMI2-NEXT:    movl %eax, %ecx
1444; X86-BMI2-NEXT:    xorl %eax, %eax
1445; X86-BMI2-NEXT:  .LBB21_2:
1446; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1447; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
1448; X86-BMI2-NEXT:    retl
1449;
1450; X64-NOBMI-LABEL: bzhi64_b1_indexzext:
1451; X64-NOBMI:       # %bb.0:
1452; X64-NOBMI-NEXT:    movl %esi, %ecx
1453; X64-NOBMI-NEXT:    movq $-1, %rax
1454; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1455; X64-NOBMI-NEXT:    shlq %cl, %rax
1456; X64-NOBMI-NEXT:    notq %rax
1457; X64-NOBMI-NEXT:    andq %rdi, %rax
1458; X64-NOBMI-NEXT:    retq
1459;
1460; X64-BMI1-LABEL: bzhi64_b1_indexzext:
1461; X64-BMI1:       # %bb.0:
1462; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
1463; X64-BMI1-NEXT:    shll $8, %esi
1464; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
1465; X64-BMI1-NEXT:    retq
1466;
1467; X64-BMI2-LABEL: bzhi64_b1_indexzext:
1468; X64-BMI2:       # %bb.0:
1469; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
1470; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1471; X64-BMI2-NEXT:    retq
1472  %conv = zext i8 %numlowbits to i64
1473  %notmask = shl i64 -1, %conv
1474  %mask = xor i64 %notmask, -1
1475  %masked = and i64 %mask, %val
1476  ret i64 %masked
1477}
1478
1479define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind {
1480; X86-NOBMI-LABEL: bzhi64_b2_load:
1481; X86-NOBMI:       # %bb.0:
1482; X86-NOBMI-NEXT:    pushl %edi
1483; X86-NOBMI-NEXT:    pushl %esi
1484; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
1485; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1486; X86-NOBMI-NEXT:    movl $-1, %edx
1487; X86-NOBMI-NEXT:    movl $-1, %edi
1488; X86-NOBMI-NEXT:    shll %cl, %edi
1489; X86-NOBMI-NEXT:    xorl %eax, %eax
1490; X86-NOBMI-NEXT:    testb $32, %cl
1491; X86-NOBMI-NEXT:    jne .LBB22_1
1492; X86-NOBMI-NEXT:  # %bb.2:
1493; X86-NOBMI-NEXT:    movl %edi, %eax
1494; X86-NOBMI-NEXT:    jmp .LBB22_3
1495; X86-NOBMI-NEXT:  .LBB22_1:
1496; X86-NOBMI-NEXT:    movl %edi, %edx
1497; X86-NOBMI-NEXT:  .LBB22_3:
1498; X86-NOBMI-NEXT:    notl %edx
1499; X86-NOBMI-NEXT:    notl %eax
1500; X86-NOBMI-NEXT:    andl (%esi), %eax
1501; X86-NOBMI-NEXT:    andl 4(%esi), %edx
1502; X86-NOBMI-NEXT:    popl %esi
1503; X86-NOBMI-NEXT:    popl %edi
1504; X86-NOBMI-NEXT:    retl
1505;
1506; X86-BMI1-LABEL: bzhi64_b2_load:
1507; X86-BMI1:       # %bb.0:
1508; X86-BMI1-NEXT:    pushl %esi
1509; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
1510; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1511; X86-BMI1-NEXT:    movl $-1, %esi
1512; X86-BMI1-NEXT:    movl $-1, %eax
1513; X86-BMI1-NEXT:    shll %cl, %eax
1514; X86-BMI1-NEXT:    testb $32, %cl
1515; X86-BMI1-NEXT:    je .LBB22_2
1516; X86-BMI1-NEXT:  # %bb.1:
1517; X86-BMI1-NEXT:    movl %eax, %esi
1518; X86-BMI1-NEXT:    xorl %eax, %eax
1519; X86-BMI1-NEXT:  .LBB22_2:
1520; X86-BMI1-NEXT:    andnl (%edx), %eax, %eax
1521; X86-BMI1-NEXT:    andnl 4(%edx), %esi, %edx
1522; X86-BMI1-NEXT:    popl %esi
1523; X86-BMI1-NEXT:    retl
1524;
1525; X86-BMI2-LABEL: bzhi64_b2_load:
1526; X86-BMI2:       # %bb.0:
1527; X86-BMI2-NEXT:    pushl %ebx
1528; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1529; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
1530; X86-BMI2-NEXT:    movl $-1, %edx
1531; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
1532; X86-BMI2-NEXT:    testb $32, %bl
1533; X86-BMI2-NEXT:    je .LBB22_2
1534; X86-BMI2-NEXT:  # %bb.1:
1535; X86-BMI2-NEXT:    movl %eax, %edx
1536; X86-BMI2-NEXT:    xorl %eax, %eax
1537; X86-BMI2-NEXT:  .LBB22_2:
1538; X86-BMI2-NEXT:    andnl (%ecx), %eax, %eax
1539; X86-BMI2-NEXT:    andnl 4(%ecx), %edx, %edx
1540; X86-BMI2-NEXT:    popl %ebx
1541; X86-BMI2-NEXT:    retl
1542;
1543; X64-NOBMI-LABEL: bzhi64_b2_load:
1544; X64-NOBMI:       # %bb.0:
1545; X64-NOBMI-NEXT:    movq %rsi, %rcx
1546; X64-NOBMI-NEXT:    movq $-1, %rax
1547; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
1548; X64-NOBMI-NEXT:    shlq %cl, %rax
1549; X64-NOBMI-NEXT:    notq %rax
1550; X64-NOBMI-NEXT:    andq (%rdi), %rax
1551; X64-NOBMI-NEXT:    retq
1552;
1553; X64-BMI1-LABEL: bzhi64_b2_load:
1554; X64-BMI1:       # %bb.0:
1555; X64-BMI1-NEXT:    shll $8, %esi
1556; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
1557; X64-BMI1-NEXT:    retq
1558;
1559; X64-BMI2-LABEL: bzhi64_b2_load:
1560; X64-BMI2:       # %bb.0:
1561; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
1562; X64-BMI2-NEXT:    retq
1563  %val = load i64, ptr %w
1564  %notmask = shl i64 -1, %numlowbits
1565  %mask = xor i64 %notmask, -1
1566  %masked = and i64 %mask, %val
1567  ret i64 %masked
1568}
1569
1570define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
1571; X86-NOBMI-LABEL: bzhi64_b3_load_indexzext:
1572; X86-NOBMI:       # %bb.0:
1573; X86-NOBMI-NEXT:    pushl %edi
1574; X86-NOBMI-NEXT:    pushl %esi
1575; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
1576; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1577; X86-NOBMI-NEXT:    movl $-1, %edx
1578; X86-NOBMI-NEXT:    movl $-1, %edi
1579; X86-NOBMI-NEXT:    shll %cl, %edi
1580; X86-NOBMI-NEXT:    xorl %eax, %eax
1581; X86-NOBMI-NEXT:    testb $32, %cl
1582; X86-NOBMI-NEXT:    jne .LBB23_1
1583; X86-NOBMI-NEXT:  # %bb.2:
1584; X86-NOBMI-NEXT:    movl %edi, %eax
1585; X86-NOBMI-NEXT:    jmp .LBB23_3
1586; X86-NOBMI-NEXT:  .LBB23_1:
1587; X86-NOBMI-NEXT:    movl %edi, %edx
1588; X86-NOBMI-NEXT:  .LBB23_3:
1589; X86-NOBMI-NEXT:    notl %edx
1590; X86-NOBMI-NEXT:    notl %eax
1591; X86-NOBMI-NEXT:    andl (%esi), %eax
1592; X86-NOBMI-NEXT:    andl 4(%esi), %edx
1593; X86-NOBMI-NEXT:    popl %esi
1594; X86-NOBMI-NEXT:    popl %edi
1595; X86-NOBMI-NEXT:    retl
1596;
1597; X86-BMI1-LABEL: bzhi64_b3_load_indexzext:
1598; X86-BMI1:       # %bb.0:
1599; X86-BMI1-NEXT:    pushl %esi
1600; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
1601; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1602; X86-BMI1-NEXT:    movl $-1, %esi
1603; X86-BMI1-NEXT:    movl $-1, %eax
1604; X86-BMI1-NEXT:    shll %cl, %eax
1605; X86-BMI1-NEXT:    testb $32, %cl
1606; X86-BMI1-NEXT:    je .LBB23_2
1607; X86-BMI1-NEXT:  # %bb.1:
1608; X86-BMI1-NEXT:    movl %eax, %esi
1609; X86-BMI1-NEXT:    xorl %eax, %eax
1610; X86-BMI1-NEXT:  .LBB23_2:
1611; X86-BMI1-NEXT:    andnl (%edx), %eax, %eax
1612; X86-BMI1-NEXT:    andnl 4(%edx), %esi, %edx
1613; X86-BMI1-NEXT:    popl %esi
1614; X86-BMI1-NEXT:    retl
1615;
1616; X86-BMI2-LABEL: bzhi64_b3_load_indexzext:
1617; X86-BMI2:       # %bb.0:
1618; X86-BMI2-NEXT:    pushl %ebx
1619; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1620; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
1621; X86-BMI2-NEXT:    movl $-1, %edx
1622; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
1623; X86-BMI2-NEXT:    testb $32, %bl
1624; X86-BMI2-NEXT:    je .LBB23_2
1625; X86-BMI2-NEXT:  # %bb.1:
1626; X86-BMI2-NEXT:    movl %eax, %edx
1627; X86-BMI2-NEXT:    xorl %eax, %eax
1628; X86-BMI2-NEXT:  .LBB23_2:
1629; X86-BMI2-NEXT:    andnl (%ecx), %eax, %eax
1630; X86-BMI2-NEXT:    andnl 4(%ecx), %edx, %edx
1631; X86-BMI2-NEXT:    popl %ebx
1632; X86-BMI2-NEXT:    retl
1633;
1634; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext:
1635; X64-NOBMI:       # %bb.0:
1636; X64-NOBMI-NEXT:    movl %esi, %ecx
1637; X64-NOBMI-NEXT:    movq $-1, %rax
1638; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1639; X64-NOBMI-NEXT:    shlq %cl, %rax
1640; X64-NOBMI-NEXT:    notq %rax
1641; X64-NOBMI-NEXT:    andq (%rdi), %rax
1642; X64-NOBMI-NEXT:    retq
1643;
1644; X64-BMI1-LABEL: bzhi64_b3_load_indexzext:
1645; X64-BMI1:       # %bb.0:
1646; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
1647; X64-BMI1-NEXT:    shll $8, %esi
1648; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
1649; X64-BMI1-NEXT:    retq
1650;
1651; X64-BMI2-LABEL: bzhi64_b3_load_indexzext:
1652; X64-BMI2:       # %bb.0:
1653; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
1654; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
1655; X64-BMI2-NEXT:    retq
1656  %val = load i64, ptr %w
1657  %conv = zext i8 %numlowbits to i64
1658  %notmask = shl i64 -1, %conv
1659  %mask = xor i64 %notmask, -1
1660  %masked = and i64 %mask, %val
1661  ret i64 %masked
1662}
1663
1664define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
1665; X86-NOBMI-LABEL: bzhi64_b4_commutative:
1666; X86-NOBMI:       # %bb.0:
1667; X86-NOBMI-NEXT:    pushl %esi
1668; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1669; X86-NOBMI-NEXT:    movl $-1, %edx
1670; X86-NOBMI-NEXT:    movl $-1, %esi
1671; X86-NOBMI-NEXT:    shll %cl, %esi
1672; X86-NOBMI-NEXT:    xorl %eax, %eax
1673; X86-NOBMI-NEXT:    testb $32, %cl
1674; X86-NOBMI-NEXT:    jne .LBB24_1
1675; X86-NOBMI-NEXT:  # %bb.2:
1676; X86-NOBMI-NEXT:    movl %esi, %eax
1677; X86-NOBMI-NEXT:    jmp .LBB24_3
1678; X86-NOBMI-NEXT:  .LBB24_1:
1679; X86-NOBMI-NEXT:    movl %esi, %edx
1680; X86-NOBMI-NEXT:  .LBB24_3:
1681; X86-NOBMI-NEXT:    notl %edx
1682; X86-NOBMI-NEXT:    notl %eax
1683; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1684; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
1685; X86-NOBMI-NEXT:    popl %esi
1686; X86-NOBMI-NEXT:    retl
1687;
1688; X86-BMI1-LABEL: bzhi64_b4_commutative:
1689; X86-BMI1:       # %bb.0:
1690; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1691; X86-BMI1-NEXT:    movl $-1, %edx
1692; X86-BMI1-NEXT:    movl $-1, %eax
1693; X86-BMI1-NEXT:    shll %cl, %eax
1694; X86-BMI1-NEXT:    testb $32, %cl
1695; X86-BMI1-NEXT:    je .LBB24_2
1696; X86-BMI1-NEXT:  # %bb.1:
1697; X86-BMI1-NEXT:    movl %eax, %edx
1698; X86-BMI1-NEXT:    xorl %eax, %eax
1699; X86-BMI1-NEXT:  .LBB24_2:
1700; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1701; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
1702; X86-BMI1-NEXT:    retl
1703;
1704; X86-BMI2-LABEL: bzhi64_b4_commutative:
1705; X86-BMI2:       # %bb.0:
1706; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
1707; X86-BMI2-NEXT:    movl $-1, %ecx
1708; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
1709; X86-BMI2-NEXT:    testb $32, %dl
1710; X86-BMI2-NEXT:    je .LBB24_2
1711; X86-BMI2-NEXT:  # %bb.1:
1712; X86-BMI2-NEXT:    movl %eax, %ecx
1713; X86-BMI2-NEXT:    xorl %eax, %eax
1714; X86-BMI2-NEXT:  .LBB24_2:
1715; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
1716; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
1717; X86-BMI2-NEXT:    retl
1718;
1719; X64-NOBMI-LABEL: bzhi64_b4_commutative:
1720; X64-NOBMI:       # %bb.0:
1721; X64-NOBMI-NEXT:    movq %rsi, %rcx
1722; X64-NOBMI-NEXT:    movq $-1, %rax
1723; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
1724; X64-NOBMI-NEXT:    shlq %cl, %rax
1725; X64-NOBMI-NEXT:    notq %rax
1726; X64-NOBMI-NEXT:    andq %rdi, %rax
1727; X64-NOBMI-NEXT:    retq
1728;
1729; X64-BMI1-LABEL: bzhi64_b4_commutative:
1730; X64-BMI1:       # %bb.0:
1731; X64-BMI1-NEXT:    shll $8, %esi
1732; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
1733; X64-BMI1-NEXT:    retq
1734;
1735; X64-BMI2-LABEL: bzhi64_b4_commutative:
1736; X64-BMI2:       # %bb.0:
1737; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1738; X64-BMI2-NEXT:    retq
1739  %notmask = shl i64 -1, %numlowbits
1740  %mask = xor i64 %notmask, -1
1741  %masked = and i64 %val, %mask ; swapped order
1742  ret i64 %masked
1743}
1744
1745; 64-bit, but with 32-bit output
1746
1747; Everything done in 64-bit, truncation happens last.
1748define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
1749; X86-NOBMI-LABEL: bzhi64_32_b0:
1750; X86-NOBMI:       # %bb.0:
1751; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1752; X86-NOBMI-NEXT:    movl $-1, %edx
1753; X86-NOBMI-NEXT:    shll %cl, %edx
1754; X86-NOBMI-NEXT:    xorl %eax, %eax
1755; X86-NOBMI-NEXT:    testb $32, %cl
1756; X86-NOBMI-NEXT:    jne .LBB25_2
1757; X86-NOBMI-NEXT:  # %bb.1:
1758; X86-NOBMI-NEXT:    movl %edx, %eax
1759; X86-NOBMI-NEXT:  .LBB25_2:
1760; X86-NOBMI-NEXT:    notl %eax
1761; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1762; X86-NOBMI-NEXT:    retl
1763;
1764; X86-BMI1-LABEL: bzhi64_32_b0:
1765; X86-BMI1:       # %bb.0:
1766; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1767; X86-BMI1-NEXT:    movl $-1, %eax
1768; X86-BMI1-NEXT:    shll %cl, %eax
1769; X86-BMI1-NEXT:    xorl %edx, %edx
1770; X86-BMI1-NEXT:    testb $32, %cl
1771; X86-BMI1-NEXT:    jne .LBB25_2
1772; X86-BMI1-NEXT:  # %bb.1:
1773; X86-BMI1-NEXT:    movl %eax, %edx
1774; X86-BMI1-NEXT:  .LBB25_2:
1775; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
1776; X86-BMI1-NEXT:    retl
1777;
1778; X86-BMI2-LABEL: bzhi64_32_b0:
1779; X86-BMI2:       # %bb.0:
1780; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1781; X86-BMI2-NEXT:    xorl %ecx, %ecx
1782; X86-BMI2-NEXT:    testb $32, %al
1783; X86-BMI2-NEXT:    jne .LBB25_2
1784; X86-BMI2-NEXT:  # %bb.1:
1785; X86-BMI2-NEXT:    movl $-1, %ecx
1786; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
1787; X86-BMI2-NEXT:  .LBB25_2:
1788; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
1789; X86-BMI2-NEXT:    retl
1790;
1791; X64-NOBMI-LABEL: bzhi64_32_b0:
1792; X64-NOBMI:       # %bb.0:
1793; X64-NOBMI-NEXT:    movl %esi, %ecx
1794; X64-NOBMI-NEXT:    movq $-1, %rax
1795; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1796; X64-NOBMI-NEXT:    shlq %cl, %rax
1797; X64-NOBMI-NEXT:    notl %eax
1798; X64-NOBMI-NEXT:    andl %edi, %eax
1799; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
1800; X64-NOBMI-NEXT:    retq
1801;
1802; X64-BMI1-LABEL: bzhi64_32_b0:
1803; X64-BMI1:       # %bb.0:
1804; X64-BMI1-NEXT:    shll $8, %esi
1805; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1806; X64-BMI1-NEXT:    retq
1807;
1808; X64-BMI2-LABEL: bzhi64_32_b0:
1809; X64-BMI2:       # %bb.0:
1810; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1811; X64-BMI2-NEXT:    retq
1812  %widenumlowbits = zext i8 %numlowbits to i64
1813  %notmask = shl nsw i64 -1, %widenumlowbits
1814  %mask = xor i64 %notmask, -1
1815  %wideres = and i64 %val, %mask
1816  %res = trunc i64 %wideres to i32
1817  ret i32 %res
1818}
1819
1820; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
1821define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind {
1822; X86-NOBMI-LABEL: bzhi64_32_b1:
1823; X86-NOBMI:       # %bb.0:
1824; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1825; X86-NOBMI-NEXT:    movl $-1, %eax
1826; X86-NOBMI-NEXT:    shll %cl, %eax
1827; X86-NOBMI-NEXT:    notl %eax
1828; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1829; X86-NOBMI-NEXT:    retl
1830;
1831; X86-BMI1-LABEL: bzhi64_32_b1:
1832; X86-BMI1:       # %bb.0:
1833; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1834; X86-BMI1-NEXT:    shll $8, %eax
1835; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1836; X86-BMI1-NEXT:    retl
1837;
1838; X86-BMI2-LABEL: bzhi64_32_b1:
1839; X86-BMI2:       # %bb.0:
1840; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1841; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1842; X86-BMI2-NEXT:    retl
1843;
1844; X64-NOBMI-LABEL: bzhi64_32_b1:
1845; X64-NOBMI:       # %bb.0:
1846; X64-NOBMI-NEXT:    movl %esi, %ecx
1847; X64-NOBMI-NEXT:    movl $-1, %eax
1848; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1849; X64-NOBMI-NEXT:    shll %cl, %eax
1850; X64-NOBMI-NEXT:    notl %eax
1851; X64-NOBMI-NEXT:    andl %edi, %eax
1852; X64-NOBMI-NEXT:    retq
1853;
1854; X64-BMI1-LABEL: bzhi64_32_b1:
1855; X64-BMI1:       # %bb.0:
1856; X64-BMI1-NEXT:    shll $8, %esi
1857; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1858; X64-BMI1-NEXT:    retq
1859;
1860; X64-BMI2-LABEL: bzhi64_32_b1:
1861; X64-BMI2:       # %bb.0:
1862; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1863; X64-BMI2-NEXT:    retq
1864  %truncval = trunc i64 %val to i32
1865  %widenumlowbits = zext i8 %numlowbits to i32
1866  %notmask = shl nsw i32 -1, %widenumlowbits
1867  %mask = xor i32 %notmask, -1
1868  %res = and i32 %truncval, %mask
1869  ret i32 %res
1870}
1871
1872; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
1873; Masking is 64-bit. Then truncation.
1874define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind {
1875; X86-NOBMI-LABEL: bzhi64_32_b2:
1876; X86-NOBMI:       # %bb.0:
1877; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1878; X86-NOBMI-NEXT:    movl $-1, %eax
1879; X86-NOBMI-NEXT:    shll %cl, %eax
1880; X86-NOBMI-NEXT:    notl %eax
1881; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1882; X86-NOBMI-NEXT:    retl
1883;
1884; X86-BMI1-LABEL: bzhi64_32_b2:
1885; X86-BMI1:       # %bb.0:
1886; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1887; X86-BMI1-NEXT:    shll $8, %eax
1888; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1889; X86-BMI1-NEXT:    retl
1890;
1891; X86-BMI2-LABEL: bzhi64_32_b2:
1892; X86-BMI2:       # %bb.0:
1893; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1894; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1895; X86-BMI2-NEXT:    retl
1896;
1897; X64-NOBMI-LABEL: bzhi64_32_b2:
1898; X64-NOBMI:       # %bb.0:
1899; X64-NOBMI-NEXT:    movl %esi, %ecx
1900; X64-NOBMI-NEXT:    movl $-1, %eax
1901; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1902; X64-NOBMI-NEXT:    shll %cl, %eax
1903; X64-NOBMI-NEXT:    notl %eax
1904; X64-NOBMI-NEXT:    andl %edi, %eax
1905; X64-NOBMI-NEXT:    retq
1906;
1907; X64-BMI1-LABEL: bzhi64_32_b2:
1908; X64-BMI1:       # %bb.0:
1909; X64-BMI1-NEXT:    shll $8, %esi
1910; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1911; X64-BMI1-NEXT:    retq
1912;
1913; X64-BMI2-LABEL: bzhi64_32_b2:
1914; X64-BMI2:       # %bb.0:
1915; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1916; X64-BMI2-NEXT:    retq
1917  %widenumlowbits = zext i8 %numlowbits to i32
1918  %notmask = shl nsw i32 -1, %widenumlowbits
1919  %mask = xor i32 %notmask, -1
1920  %zextmask = zext i32 %mask to i64
1921  %wideres = and i64 %val, %zextmask
1922  %res = trunc i64 %wideres to i32
1923  ret i32 %res
1924}
1925
1926; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
1927; Masking is 64-bit. Then truncation.
1928define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
1929; X86-NOBMI-LABEL: bzhi64_32_b3:
1930; X86-NOBMI:       # %bb.0:
1931; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1932; X86-NOBMI-NEXT:    movl $-1, %edx
1933; X86-NOBMI-NEXT:    shll %cl, %edx
1934; X86-NOBMI-NEXT:    xorl %eax, %eax
1935; X86-NOBMI-NEXT:    testb $32, %cl
1936; X86-NOBMI-NEXT:    jne .LBB28_2
1937; X86-NOBMI-NEXT:  # %bb.1:
1938; X86-NOBMI-NEXT:    movl %edx, %eax
1939; X86-NOBMI-NEXT:  .LBB28_2:
1940; X86-NOBMI-NEXT:    notl %eax
1941; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1942; X86-NOBMI-NEXT:    retl
1943;
1944; X86-BMI1-LABEL: bzhi64_32_b3:
1945; X86-BMI1:       # %bb.0:
1946; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1947; X86-BMI1-NEXT:    movl $-1, %eax
1948; X86-BMI1-NEXT:    shll %cl, %eax
1949; X86-BMI1-NEXT:    xorl %edx, %edx
1950; X86-BMI1-NEXT:    testb $32, %cl
1951; X86-BMI1-NEXT:    jne .LBB28_2
1952; X86-BMI1-NEXT:  # %bb.1:
1953; X86-BMI1-NEXT:    movl %eax, %edx
1954; X86-BMI1-NEXT:  .LBB28_2:
1955; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
1956; X86-BMI1-NEXT:    retl
1957;
1958; X86-BMI2-LABEL: bzhi64_32_b3:
1959; X86-BMI2:       # %bb.0:
1960; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1961; X86-BMI2-NEXT:    xorl %ecx, %ecx
1962; X86-BMI2-NEXT:    testb $32, %al
1963; X86-BMI2-NEXT:    jne .LBB28_2
1964; X86-BMI2-NEXT:  # %bb.1:
1965; X86-BMI2-NEXT:    movl $-1, %ecx
1966; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
1967; X86-BMI2-NEXT:  .LBB28_2:
1968; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
1969; X86-BMI2-NEXT:    retl
1970;
1971; X64-NOBMI-LABEL: bzhi64_32_b3:
1972; X64-NOBMI:       # %bb.0:
1973; X64-NOBMI-NEXT:    movl %esi, %ecx
1974; X64-NOBMI-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
1975; X64-NOBMI-NEXT:    movl $4294967295, %edx # imm = 0xFFFFFFFF
1976; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1977; X64-NOBMI-NEXT:    shlq %cl, %rdx
1978; X64-NOBMI-NEXT:    xorl %edx, %eax
1979; X64-NOBMI-NEXT:    andl %edi, %eax
1980; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
1981; X64-NOBMI-NEXT:    retq
1982;
1983; X64-BMI1-LABEL: bzhi64_32_b3:
1984; X64-BMI1:       # %bb.0:
1985; X64-BMI1-NEXT:    shll $8, %esi
1986; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
1987; X64-BMI1-NEXT:    retq
1988;
1989; X64-BMI2-LABEL: bzhi64_32_b3:
1990; X64-BMI2:       # %bb.0:
1991; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
1992; X64-BMI2-NEXT:    retq
1993  %widenumlowbits = zext i8 %numlowbits to i64
1994  %notmask = shl nsw i64 4294967295, %widenumlowbits
1995  %mask = xor i64 %notmask, 4294967295
1996  %wideres = and i64 %val, %mask
1997  %res = trunc i64 %wideres to i32
1998  ret i32 %res
1999}
2000
2001; ---------------------------------------------------------------------------- ;
2002; Pattern c. 32-bit
2003; ---------------------------------------------------------------------------- ;
2004
2005define i32 @bzhi32_c0(i32 %val, i32 %numlowbits, ptr %escape) nounwind {
2006; X86-NOBMI-LABEL: bzhi32_c0:
2007; X86-NOBMI:       # %bb.0:
2008; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
2009; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2010; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2011; X86-NOBMI-NEXT:    movl $-1, %eax
2012; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2013; X86-NOBMI-NEXT:    shrl %cl, %eax
2014; X86-NOBMI-NEXT:    movl %eax, (%edx)
2015; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
2016; X86-NOBMI-NEXT:    retl
2017;
2018; X86-BMI1-LABEL: bzhi32_c0:
2019; X86-BMI1:       # %bb.0:
2020; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
2021; X86-BMI1-NEXT:    xorl %ecx, %ecx
2022; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2023; X86-BMI1-NEXT:    movl $-1, %eax
2024; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2025; X86-BMI1-NEXT:    shrl %cl, %eax
2026; X86-BMI1-NEXT:    movl %eax, (%edx)
2027; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
2028; X86-BMI1-NEXT:    retl
2029;
2030; X86-BMI2-LABEL: bzhi32_c0:
2031; X86-BMI2:       # %bb.0:
2032; X86-BMI2-NEXT:    pushl %esi
2033; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2034; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
2035; X86-BMI2-NEXT:    bzhil %edx, {{[0-9]+}}(%esp), %eax
2036; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
2037; X86-BMI2-NEXT:    negb %dl
2038; X86-BMI2-NEXT:    movl $-1, %esi
2039; X86-BMI2-NEXT:    shrxl %edx, %esi, %edx
2040; X86-BMI2-NEXT:    movl %edx, (%ecx)
2041; X86-BMI2-NEXT:    popl %esi
2042; X86-BMI2-NEXT:    retl
2043;
2044; X64-NOBMI-LABEL: bzhi32_c0:
2045; X64-NOBMI:       # %bb.0:
2046; X64-NOBMI-NEXT:    movl %esi, %ecx
2047; X64-NOBMI-NEXT:    negb %cl
2048; X64-NOBMI-NEXT:    movl $-1, %eax
2049; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2050; X64-NOBMI-NEXT:    shrl %cl, %eax
2051; X64-NOBMI-NEXT:    movl %eax, (%rdx)
2052; X64-NOBMI-NEXT:    andl %edi, %eax
2053; X64-NOBMI-NEXT:    retq
2054;
2055; X64-BMI1-LABEL: bzhi32_c0:
2056; X64-BMI1:       # %bb.0:
2057; X64-BMI1-NEXT:    movl %esi, %ecx
2058; X64-BMI1-NEXT:    negb %cl
2059; X64-BMI1-NEXT:    movl $-1, %eax
2060; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2061; X64-BMI1-NEXT:    shrl %cl, %eax
2062; X64-BMI1-NEXT:    movl %eax, (%rdx)
2063; X64-BMI1-NEXT:    andl %edi, %eax
2064; X64-BMI1-NEXT:    retq
2065;
2066; X64-BMI2-LABEL: bzhi32_c0:
2067; X64-BMI2:       # %bb.0:
2068; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
2069; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
2070; X64-BMI2-NEXT:    negb %sil
2071; X64-BMI2-NEXT:    movl $-1, %ecx
2072; X64-BMI2-NEXT:    shrxl %esi, %ecx, %ecx
2073; X64-BMI2-NEXT:    movl %ecx, (%rdx)
2074; X64-BMI2-NEXT:    retq
2075  %numhighbits = sub i32 32, %numlowbits
2076  %mask = lshr i32 -1, %numhighbits
2077  store i32 %mask, ptr %escape
2078  %masked = and i32 %mask, %val
2079  ret i32 %masked
2080}
2081
2082define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr %escape) nounwind {
2083; X86-NOBMI-LABEL: bzhi32_c1_indexzext:
2084; X86-NOBMI:       # %bb.0:
2085; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
2086; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2087; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2088; X86-NOBMI-NEXT:    movl $-1, %eax
2089; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2090; X86-NOBMI-NEXT:    shrl %cl, %eax
2091; X86-NOBMI-NEXT:    movl %eax, (%edx)
2092; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
2093; X86-NOBMI-NEXT:    retl
2094;
2095; X86-BMI1-LABEL: bzhi32_c1_indexzext:
2096; X86-BMI1:       # %bb.0:
2097; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
2098; X86-BMI1-NEXT:    xorl %ecx, %ecx
2099; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2100; X86-BMI1-NEXT:    movl $-1, %eax
2101; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2102; X86-BMI1-NEXT:    shrl %cl, %eax
2103; X86-BMI1-NEXT:    movl %eax, (%edx)
2104; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
2105; X86-BMI1-NEXT:    retl
2106;
2107; X86-BMI2-LABEL: bzhi32_c1_indexzext:
2108; X86-BMI2:       # %bb.0:
2109; X86-BMI2-NEXT:    pushl %esi
2110; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2111; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
2112; X86-BMI2-NEXT:    bzhil %edx, {{[0-9]+}}(%esp), %eax
2113; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
2114; X86-BMI2-NEXT:    negb %dl
2115; X86-BMI2-NEXT:    movl $-1, %esi
2116; X86-BMI2-NEXT:    shrxl %edx, %esi, %edx
2117; X86-BMI2-NEXT:    movl %edx, (%ecx)
2118; X86-BMI2-NEXT:    popl %esi
2119; X86-BMI2-NEXT:    retl
2120;
2121; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
2122; X64-NOBMI:       # %bb.0:
2123; X64-NOBMI-NEXT:    movl %esi, %ecx
2124; X64-NOBMI-NEXT:    negb %cl
2125; X64-NOBMI-NEXT:    movl $-1, %eax
2126; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2127; X64-NOBMI-NEXT:    shrl %cl, %eax
2128; X64-NOBMI-NEXT:    movl %eax, (%rdx)
2129; X64-NOBMI-NEXT:    andl %edi, %eax
2130; X64-NOBMI-NEXT:    retq
2131;
2132; X64-BMI1-LABEL: bzhi32_c1_indexzext:
2133; X64-BMI1:       # %bb.0:
2134; X64-BMI1-NEXT:    movl %esi, %ecx
2135; X64-BMI1-NEXT:    negb %cl
2136; X64-BMI1-NEXT:    movl $-1, %eax
2137; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2138; X64-BMI1-NEXT:    shrl %cl, %eax
2139; X64-BMI1-NEXT:    movl %eax, (%rdx)
2140; X64-BMI1-NEXT:    andl %edi, %eax
2141; X64-BMI1-NEXT:    retq
2142;
2143; X64-BMI2-LABEL: bzhi32_c1_indexzext:
2144; X64-BMI2:       # %bb.0:
2145; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
2146; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
2147; X64-BMI2-NEXT:    negb %sil
2148; X64-BMI2-NEXT:    movl $-1, %ecx
2149; X64-BMI2-NEXT:    shrxl %esi, %ecx, %ecx
2150; X64-BMI2-NEXT:    movl %ecx, (%rdx)
2151; X64-BMI2-NEXT:    retq
2152  %numhighbits = sub i8 32, %numlowbits
2153  %sh_prom = zext i8 %numhighbits to i32
2154  %mask = lshr i32 -1, %sh_prom
2155  store i32 %mask, ptr %escape
2156  %masked = and i32 %mask, %val
2157  ret i32 %masked
2158}
2159
2160define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits, ptr %escape) nounwind {
2161; X86-NOBMI-LABEL: bzhi32_c2_load:
2162; X86-NOBMI:       # %bb.0:
2163; X86-NOBMI-NEXT:    pushl %esi
2164; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
2165; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
2166; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2167; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2168; X86-NOBMI-NEXT:    movl $-1, %esi
2169; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2170; X86-NOBMI-NEXT:    shrl %cl, %esi
2171; X86-NOBMI-NEXT:    movl (%eax), %eax
2172; X86-NOBMI-NEXT:    andl %esi, %eax
2173; X86-NOBMI-NEXT:    movl %esi, (%edx)
2174; X86-NOBMI-NEXT:    popl %esi
2175; X86-NOBMI-NEXT:    retl
2176;
2177; X86-BMI1-LABEL: bzhi32_c2_load:
2178; X86-BMI1:       # %bb.0:
2179; X86-BMI1-NEXT:    pushl %esi
2180; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
2181; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2182; X86-BMI1-NEXT:    xorl %ecx, %ecx
2183; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2184; X86-BMI1-NEXT:    movl $-1, %esi
2185; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2186; X86-BMI1-NEXT:    shrl %cl, %esi
2187; X86-BMI1-NEXT:    movl (%eax), %eax
2188; X86-BMI1-NEXT:    andl %esi, %eax
2189; X86-BMI1-NEXT:    movl %esi, (%edx)
2190; X86-BMI1-NEXT:    popl %esi
2191; X86-BMI1-NEXT:    retl
2192;
2193; X86-BMI2-LABEL: bzhi32_c2_load:
2194; X86-BMI2:       # %bb.0:
2195; X86-BMI2-NEXT:    pushl %esi
2196; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2197; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2198; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
2199; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
2200; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
2201; X86-BMI2-NEXT:    negb %dl
2202; X86-BMI2-NEXT:    movl $-1, %esi
2203; X86-BMI2-NEXT:    shrxl %edx, %esi, %edx
2204; X86-BMI2-NEXT:    movl %edx, (%ecx)
2205; X86-BMI2-NEXT:    popl %esi
2206; X86-BMI2-NEXT:    retl
2207;
2208; X64-NOBMI-LABEL: bzhi32_c2_load:
2209; X64-NOBMI:       # %bb.0:
2210; X64-NOBMI-NEXT:    movl %esi, %ecx
2211; X64-NOBMI-NEXT:    negb %cl
2212; X64-NOBMI-NEXT:    movl $-1, %esi
2213; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2214; X64-NOBMI-NEXT:    shrl %cl, %esi
2215; X64-NOBMI-NEXT:    movl (%rdi), %eax
2216; X64-NOBMI-NEXT:    andl %esi, %eax
2217; X64-NOBMI-NEXT:    movl %esi, (%rdx)
2218; X64-NOBMI-NEXT:    retq
2219;
2220; X64-BMI1-LABEL: bzhi32_c2_load:
2221; X64-BMI1:       # %bb.0:
2222; X64-BMI1-NEXT:    movl %esi, %ecx
2223; X64-BMI1-NEXT:    negb %cl
2224; X64-BMI1-NEXT:    movl $-1, %esi
2225; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2226; X64-BMI1-NEXT:    shrl %cl, %esi
2227; X64-BMI1-NEXT:    movl (%rdi), %eax
2228; X64-BMI1-NEXT:    andl %esi, %eax
2229; X64-BMI1-NEXT:    movl %esi, (%rdx)
2230; X64-BMI1-NEXT:    retq
2231;
2232; X64-BMI2-LABEL: bzhi32_c2_load:
2233; X64-BMI2:       # %bb.0:
2234; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
2235; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
2236; X64-BMI2-NEXT:    negb %sil
2237; X64-BMI2-NEXT:    movl $-1, %ecx
2238; X64-BMI2-NEXT:    shrxl %esi, %ecx, %ecx
2239; X64-BMI2-NEXT:    movl %ecx, (%rdx)
2240; X64-BMI2-NEXT:    retq
2241  %val = load i32, ptr %w
2242  %numhighbits = sub i32 32, %numlowbits
2243  %mask = lshr i32 -1, %numhighbits
2244  store i32 %mask, ptr %escape
2245  %masked = and i32 %mask, %val
2246  ret i32 %masked
2247}
2248
2249define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwind {
2250; X86-NOBMI-LABEL: bzhi32_c3_load_indexzext:
2251; X86-NOBMI:       # %bb.0:
2252; X86-NOBMI-NEXT:    pushl %esi
2253; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
2254; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
2255; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2256; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2257; X86-NOBMI-NEXT:    movl $-1, %esi
2258; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2259; X86-NOBMI-NEXT:    shrl %cl, %esi
2260; X86-NOBMI-NEXT:    movl (%eax), %eax
2261; X86-NOBMI-NEXT:    andl %esi, %eax
2262; X86-NOBMI-NEXT:    movl %esi, (%edx)
2263; X86-NOBMI-NEXT:    popl %esi
2264; X86-NOBMI-NEXT:    retl
2265;
2266; X86-BMI1-LABEL: bzhi32_c3_load_indexzext:
2267; X86-BMI1:       # %bb.0:
2268; X86-BMI1-NEXT:    pushl %esi
2269; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
2270; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2271; X86-BMI1-NEXT:    xorl %ecx, %ecx
2272; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2273; X86-BMI1-NEXT:    movl $-1, %esi
2274; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2275; X86-BMI1-NEXT:    shrl %cl, %esi
2276; X86-BMI1-NEXT:    movl (%eax), %eax
2277; X86-BMI1-NEXT:    andl %esi, %eax
2278; X86-BMI1-NEXT:    movl %esi, (%edx)
2279; X86-BMI1-NEXT:    popl %esi
2280; X86-BMI1-NEXT:    retl
2281;
2282; X86-BMI2-LABEL: bzhi32_c3_load_indexzext:
2283; X86-BMI2:       # %bb.0:
2284; X86-BMI2-NEXT:    pushl %esi
2285; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2286; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2287; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
2288; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
2289; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
2290; X86-BMI2-NEXT:    negb %dl
2291; X86-BMI2-NEXT:    movl $-1, %esi
2292; X86-BMI2-NEXT:    shrxl %edx, %esi, %edx
2293; X86-BMI2-NEXT:    movl %edx, (%ecx)
2294; X86-BMI2-NEXT:    popl %esi
2295; X86-BMI2-NEXT:    retl
2296;
2297; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
2298; X64-NOBMI:       # %bb.0:
2299; X64-NOBMI-NEXT:    movl %esi, %ecx
2300; X64-NOBMI-NEXT:    negb %cl
2301; X64-NOBMI-NEXT:    movl $-1, %esi
2302; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2303; X64-NOBMI-NEXT:    shrl %cl, %esi
2304; X64-NOBMI-NEXT:    movl (%rdi), %eax
2305; X64-NOBMI-NEXT:    andl %esi, %eax
2306; X64-NOBMI-NEXT:    movl %esi, (%rdx)
2307; X64-NOBMI-NEXT:    retq
2308;
2309; X64-BMI1-LABEL: bzhi32_c3_load_indexzext:
2310; X64-BMI1:       # %bb.0:
2311; X64-BMI1-NEXT:    movl %esi, %ecx
2312; X64-BMI1-NEXT:    negb %cl
2313; X64-BMI1-NEXT:    movl $-1, %esi
2314; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2315; X64-BMI1-NEXT:    shrl %cl, %esi
2316; X64-BMI1-NEXT:    movl (%rdi), %eax
2317; X64-BMI1-NEXT:    andl %esi, %eax
2318; X64-BMI1-NEXT:    movl %esi, (%rdx)
2319; X64-BMI1-NEXT:    retq
2320;
2321; X64-BMI2-LABEL: bzhi32_c3_load_indexzext:
2322; X64-BMI2:       # %bb.0:
2323; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
2324; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
2325; X64-BMI2-NEXT:    negb %sil
2326; X64-BMI2-NEXT:    movl $-1, %ecx
2327; X64-BMI2-NEXT:    shrxl %esi, %ecx, %ecx
2328; X64-BMI2-NEXT:    movl %ecx, (%rdx)
2329; X64-BMI2-NEXT:    retq
2330  %val = load i32, ptr %w
2331  %numhighbits = sub i8 32, %numlowbits
2332  %sh_prom = zext i8 %numhighbits to i32
2333  %mask = lshr i32 -1, %sh_prom
2334  store i32 %mask, ptr %escape
2335  %masked = and i32 %mask, %val
2336  ret i32 %masked
2337}
2338
2339define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, ptr %escape) nounwind {
2340; X86-NOBMI-LABEL: bzhi32_c4_commutative:
2341; X86-NOBMI:       # %bb.0:
2342; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
2343; X86-NOBMI-NEXT:    xorl %ecx, %ecx
2344; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2345; X86-NOBMI-NEXT:    movl $-1, %eax
2346; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2347; X86-NOBMI-NEXT:    shrl %cl, %eax
2348; X86-NOBMI-NEXT:    movl %eax, (%edx)
2349; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
2350; X86-NOBMI-NEXT:    retl
2351;
2352; X86-BMI1-LABEL: bzhi32_c4_commutative:
2353; X86-BMI1:       # %bb.0:
2354; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
2355; X86-BMI1-NEXT:    xorl %ecx, %ecx
2356; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2357; X86-BMI1-NEXT:    movl $-1, %eax
2358; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2359; X86-BMI1-NEXT:    shrl %cl, %eax
2360; X86-BMI1-NEXT:    movl %eax, (%edx)
2361; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
2362; X86-BMI1-NEXT:    retl
2363;
2364; X86-BMI2-LABEL: bzhi32_c4_commutative:
2365; X86-BMI2:       # %bb.0:
2366; X86-BMI2-NEXT:    pushl %esi
2367; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2368; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
2369; X86-BMI2-NEXT:    bzhil %edx, {{[0-9]+}}(%esp), %eax
2370; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
2371; X86-BMI2-NEXT:    negb %dl
2372; X86-BMI2-NEXT:    movl $-1, %esi
2373; X86-BMI2-NEXT:    shrxl %edx, %esi, %edx
2374; X86-BMI2-NEXT:    movl %edx, (%ecx)
2375; X86-BMI2-NEXT:    popl %esi
2376; X86-BMI2-NEXT:    retl
2377;
2378; X64-NOBMI-LABEL: bzhi32_c4_commutative:
2379; X64-NOBMI:       # %bb.0:
2380; X64-NOBMI-NEXT:    movl %esi, %ecx
2381; X64-NOBMI-NEXT:    negb %cl
2382; X64-NOBMI-NEXT:    movl $-1, %eax
2383; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2384; X64-NOBMI-NEXT:    shrl %cl, %eax
2385; X64-NOBMI-NEXT:    movl %eax, (%rdx)
2386; X64-NOBMI-NEXT:    andl %edi, %eax
2387; X64-NOBMI-NEXT:    retq
2388;
2389; X64-BMI1-LABEL: bzhi32_c4_commutative:
2390; X64-BMI1:       # %bb.0:
2391; X64-BMI1-NEXT:    movl %esi, %ecx
2392; X64-BMI1-NEXT:    negb %cl
2393; X64-BMI1-NEXT:    movl $-1, %eax
2394; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2395; X64-BMI1-NEXT:    shrl %cl, %eax
2396; X64-BMI1-NEXT:    movl %eax, (%rdx)
2397; X64-BMI1-NEXT:    andl %edi, %eax
2398; X64-BMI1-NEXT:    retq
2399;
2400; X64-BMI2-LABEL: bzhi32_c4_commutative:
2401; X64-BMI2:       # %bb.0:
2402; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
2403; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
2404; X64-BMI2-NEXT:    negb %sil
2405; X64-BMI2-NEXT:    movl $-1, %ecx
2406; X64-BMI2-NEXT:    shrxl %esi, %ecx, %ecx
2407; X64-BMI2-NEXT:    movl %ecx, (%rdx)
2408; X64-BMI2-NEXT:    retq
2409  %numhighbits = sub i32 32, %numlowbits
2410  %mask = lshr i32 -1, %numhighbits
2411  store i32 %mask, ptr %escape
2412  %masked = and i32 %val, %mask ; swapped order
2413  ret i32 %masked
2414}
2415
2416; 64-bit
2417
2418define i64 @bzhi64_c0(i64 %val, i64 %numlowbits, ptr %escape) nounwind {
2419; X86-NOBMI-LABEL: bzhi64_c0:
2420; X86-NOBMI:       # %bb.0:
2421; X86-NOBMI-NEXT:    pushl %esi
2422; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
2423; X86-NOBMI-NEXT:    movb $64, %cl
2424; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2425; X86-NOBMI-NEXT:    movl $-1, %eax
2426; X86-NOBMI-NEXT:    movl $-1, %edx
2427; X86-NOBMI-NEXT:    shrl %cl, %edx
2428; X86-NOBMI-NEXT:    testb $32, %cl
2429; X86-NOBMI-NEXT:    je .LBB34_2
2430; X86-NOBMI-NEXT:  # %bb.1:
2431; X86-NOBMI-NEXT:    movl %edx, %eax
2432; X86-NOBMI-NEXT:    xorl %edx, %edx
2433; X86-NOBMI-NEXT:  .LBB34_2:
2434; X86-NOBMI-NEXT:    movl %edx, 4(%esi)
2435; X86-NOBMI-NEXT:    movl %eax, (%esi)
2436; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
2437; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
2438; X86-NOBMI-NEXT:    popl %esi
2439; X86-NOBMI-NEXT:    retl
2440;
2441; X86-BMI1-LABEL: bzhi64_c0:
2442; X86-BMI1:       # %bb.0:
2443; X86-BMI1-NEXT:    pushl %esi
2444; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
2445; X86-BMI1-NEXT:    movb $64, %cl
2446; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2447; X86-BMI1-NEXT:    movl $-1, %eax
2448; X86-BMI1-NEXT:    movl $-1, %edx
2449; X86-BMI1-NEXT:    shrl %cl, %edx
2450; X86-BMI1-NEXT:    testb $32, %cl
2451; X86-BMI1-NEXT:    je .LBB34_2
2452; X86-BMI1-NEXT:  # %bb.1:
2453; X86-BMI1-NEXT:    movl %edx, %eax
2454; X86-BMI1-NEXT:    xorl %edx, %edx
2455; X86-BMI1-NEXT:  .LBB34_2:
2456; X86-BMI1-NEXT:    movl %edx, 4(%esi)
2457; X86-BMI1-NEXT:    movl %eax, (%esi)
2458; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
2459; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
2460; X86-BMI1-NEXT:    popl %esi
2461; X86-BMI1-NEXT:    retl
2462;
2463; X86-BMI2-LABEL: bzhi64_c0:
2464; X86-BMI2:       # %bb.0:
2465; X86-BMI2-NEXT:    pushl %ebx
2466; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2467; X86-BMI2-NEXT:    movb $64, %bl
2468; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %bl
2469; X86-BMI2-NEXT:    movl $-1, %eax
2470; X86-BMI2-NEXT:    shrxl %ebx, %eax, %edx
2471; X86-BMI2-NEXT:    testb $32, %bl
2472; X86-BMI2-NEXT:    je .LBB34_2
2473; X86-BMI2-NEXT:  # %bb.1:
2474; X86-BMI2-NEXT:    movl %edx, %eax
2475; X86-BMI2-NEXT:    xorl %edx, %edx
2476; X86-BMI2-NEXT:  .LBB34_2:
2477; X86-BMI2-NEXT:    movl %edx, 4(%ecx)
2478; X86-BMI2-NEXT:    movl %eax, (%ecx)
2479; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
2480; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
2481; X86-BMI2-NEXT:    popl %ebx
2482; X86-BMI2-NEXT:    retl
2483;
2484; X64-NOBMI-LABEL: bzhi64_c0:
2485; X64-NOBMI:       # %bb.0:
2486; X64-NOBMI-NEXT:    movq %rsi, %rcx
2487; X64-NOBMI-NEXT:    negb %cl
2488; X64-NOBMI-NEXT:    movq $-1, %rax
2489; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
2490; X64-NOBMI-NEXT:    shrq %cl, %rax
2491; X64-NOBMI-NEXT:    movq %rax, (%rdx)
2492; X64-NOBMI-NEXT:    andq %rdi, %rax
2493; X64-NOBMI-NEXT:    retq
2494;
2495; X64-BMI1-LABEL: bzhi64_c0:
2496; X64-BMI1:       # %bb.0:
2497; X64-BMI1-NEXT:    movq %rsi, %rcx
2498; X64-BMI1-NEXT:    negb %cl
2499; X64-BMI1-NEXT:    movq $-1, %rax
2500; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
2501; X64-BMI1-NEXT:    shrq %cl, %rax
2502; X64-BMI1-NEXT:    movq %rax, (%rdx)
2503; X64-BMI1-NEXT:    andq %rdi, %rax
2504; X64-BMI1-NEXT:    retq
2505;
2506; X64-BMI2-LABEL: bzhi64_c0:
2507; X64-BMI2:       # %bb.0:
2508; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
2509; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
2510; X64-BMI2-NEXT:    negb %sil
2511; X64-BMI2-NEXT:    movq $-1, %rcx
2512; X64-BMI2-NEXT:    shrxq %rsi, %rcx, %rcx
2513; X64-BMI2-NEXT:    movq %rcx, (%rdx)
2514; X64-BMI2-NEXT:    retq
2515  %numhighbits = sub i64 64, %numlowbits
2516  %mask = lshr i64 -1, %numhighbits
2517  store i64 %mask, ptr %escape
2518  %masked = and i64 %mask, %val
2519  ret i64 %masked
2520}
2521
2522define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits, ptr %escape) nounwind {
2523; X86-NOBMI-LABEL: bzhi64_c1_indexzext:
2524; X86-NOBMI:       # %bb.0:
2525; X86-NOBMI-NEXT:    pushl %esi
2526; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
2527; X86-NOBMI-NEXT:    movb $64, %cl
2528; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2529; X86-NOBMI-NEXT:    movl $-1, %eax
2530; X86-NOBMI-NEXT:    movl $-1, %edx
2531; X86-NOBMI-NEXT:    shrl %cl, %edx
2532; X86-NOBMI-NEXT:    testb $32, %cl
2533; X86-NOBMI-NEXT:    je .LBB35_2
2534; X86-NOBMI-NEXT:  # %bb.1:
2535; X86-NOBMI-NEXT:    movl %edx, %eax
2536; X86-NOBMI-NEXT:    xorl %edx, %edx
2537; X86-NOBMI-NEXT:  .LBB35_2:
2538; X86-NOBMI-NEXT:    movl %edx, 4(%esi)
2539; X86-NOBMI-NEXT:    movl %eax, (%esi)
2540; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
2541; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
2542; X86-NOBMI-NEXT:    popl %esi
2543; X86-NOBMI-NEXT:    retl
2544;
2545; X86-BMI1-LABEL: bzhi64_c1_indexzext:
2546; X86-BMI1:       # %bb.0:
2547; X86-BMI1-NEXT:    pushl %esi
2548; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
2549; X86-BMI1-NEXT:    movb $64, %cl
2550; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2551; X86-BMI1-NEXT:    movl $-1, %eax
2552; X86-BMI1-NEXT:    movl $-1, %edx
2553; X86-BMI1-NEXT:    shrl %cl, %edx
2554; X86-BMI1-NEXT:    testb $32, %cl
2555; X86-BMI1-NEXT:    je .LBB35_2
2556; X86-BMI1-NEXT:  # %bb.1:
2557; X86-BMI1-NEXT:    movl %edx, %eax
2558; X86-BMI1-NEXT:    xorl %edx, %edx
2559; X86-BMI1-NEXT:  .LBB35_2:
2560; X86-BMI1-NEXT:    movl %edx, 4(%esi)
2561; X86-BMI1-NEXT:    movl %eax, (%esi)
2562; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
2563; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
2564; X86-BMI1-NEXT:    popl %esi
2565; X86-BMI1-NEXT:    retl
2566;
2567; X86-BMI2-LABEL: bzhi64_c1_indexzext:
2568; X86-BMI2:       # %bb.0:
2569; X86-BMI2-NEXT:    pushl %ebx
2570; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2571; X86-BMI2-NEXT:    movb $64, %bl
2572; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %bl
2573; X86-BMI2-NEXT:    movl $-1, %eax
2574; X86-BMI2-NEXT:    shrxl %ebx, %eax, %edx
2575; X86-BMI2-NEXT:    testb $32, %bl
2576; X86-BMI2-NEXT:    je .LBB35_2
2577; X86-BMI2-NEXT:  # %bb.1:
2578; X86-BMI2-NEXT:    movl %edx, %eax
2579; X86-BMI2-NEXT:    xorl %edx, %edx
2580; X86-BMI2-NEXT:  .LBB35_2:
2581; X86-BMI2-NEXT:    movl %edx, 4(%ecx)
2582; X86-BMI2-NEXT:    movl %eax, (%ecx)
2583; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
2584; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
2585; X86-BMI2-NEXT:    popl %ebx
2586; X86-BMI2-NEXT:    retl
2587;
2588; X64-NOBMI-LABEL: bzhi64_c1_indexzext:
2589; X64-NOBMI:       # %bb.0:
2590; X64-NOBMI-NEXT:    movl %esi, %ecx
2591; X64-NOBMI-NEXT:    negb %cl
2592; X64-NOBMI-NEXT:    movq $-1, %rax
2593; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2594; X64-NOBMI-NEXT:    shrq %cl, %rax
2595; X64-NOBMI-NEXT:    movq %rax, (%rdx)
2596; X64-NOBMI-NEXT:    andq %rdi, %rax
2597; X64-NOBMI-NEXT:    retq
2598;
2599; X64-BMI1-LABEL: bzhi64_c1_indexzext:
2600; X64-BMI1:       # %bb.0:
2601; X64-BMI1-NEXT:    movl %esi, %ecx
2602; X64-BMI1-NEXT:    negb %cl
2603; X64-BMI1-NEXT:    movq $-1, %rax
2604; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2605; X64-BMI1-NEXT:    shrq %cl, %rax
2606; X64-BMI1-NEXT:    movq %rax, (%rdx)
2607; X64-BMI1-NEXT:    andq %rdi, %rax
2608; X64-BMI1-NEXT:    retq
2609;
2610; X64-BMI2-LABEL: bzhi64_c1_indexzext:
2611; X64-BMI2:       # %bb.0:
2612; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
2613; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
2614; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
2615; X64-BMI2-NEXT:    negb %sil
2616; X64-BMI2-NEXT:    movq $-1, %rcx
2617; X64-BMI2-NEXT:    shrxq %rsi, %rcx, %rcx
2618; X64-BMI2-NEXT:    movq %rcx, (%rdx)
2619; X64-BMI2-NEXT:    retq
2620  %numhighbits = sub i8 64, %numlowbits
2621  %sh_prom = zext i8 %numhighbits to i64
2622  %mask = lshr i64 -1, %sh_prom
2623  store i64 %mask, ptr %escape
2624  %masked = and i64 %mask, %val
2625  ret i64 %masked
2626}
2627
2628define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits, ptr %escape) nounwind {
2629; X86-NOBMI-LABEL: bzhi64_c2_load:
2630; X86-NOBMI:       # %bb.0:
2631; X86-NOBMI-NEXT:    pushl %ebx
2632; X86-NOBMI-NEXT:    pushl %edi
2633; X86-NOBMI-NEXT:    pushl %esi
2634; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
2635; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
2636; X86-NOBMI-NEXT:    movb $64, %cl
2637; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2638; X86-NOBMI-NEXT:    movl $-1, %edi
2639; X86-NOBMI-NEXT:    movl $-1, %ebx
2640; X86-NOBMI-NEXT:    shrl %cl, %ebx
2641; X86-NOBMI-NEXT:    testb $32, %cl
2642; X86-NOBMI-NEXT:    je .LBB36_2
2643; X86-NOBMI-NEXT:  # %bb.1:
2644; X86-NOBMI-NEXT:    movl %ebx, %edi
2645; X86-NOBMI-NEXT:    xorl %ebx, %ebx
2646; X86-NOBMI-NEXT:  .LBB36_2:
2647; X86-NOBMI-NEXT:    movl 4(%eax), %edx
2648; X86-NOBMI-NEXT:    andl %ebx, %edx
2649; X86-NOBMI-NEXT:    movl (%eax), %eax
2650; X86-NOBMI-NEXT:    andl %edi, %eax
2651; X86-NOBMI-NEXT:    movl %ebx, 4(%esi)
2652; X86-NOBMI-NEXT:    movl %edi, (%esi)
2653; X86-NOBMI-NEXT:    popl %esi
2654; X86-NOBMI-NEXT:    popl %edi
2655; X86-NOBMI-NEXT:    popl %ebx
2656; X86-NOBMI-NEXT:    retl
2657;
2658; X86-BMI1-LABEL: bzhi64_c2_load:
2659; X86-BMI1:       # %bb.0:
2660; X86-BMI1-NEXT:    pushl %ebx
2661; X86-BMI1-NEXT:    pushl %edi
2662; X86-BMI1-NEXT:    pushl %esi
2663; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
2664; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2665; X86-BMI1-NEXT:    movb $64, %cl
2666; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2667; X86-BMI1-NEXT:    movl $-1, %edi
2668; X86-BMI1-NEXT:    movl $-1, %ebx
2669; X86-BMI1-NEXT:    shrl %cl, %ebx
2670; X86-BMI1-NEXT:    testb $32, %cl
2671; X86-BMI1-NEXT:    je .LBB36_2
2672; X86-BMI1-NEXT:  # %bb.1:
2673; X86-BMI1-NEXT:    movl %ebx, %edi
2674; X86-BMI1-NEXT:    xorl %ebx, %ebx
2675; X86-BMI1-NEXT:  .LBB36_2:
2676; X86-BMI1-NEXT:    movl 4(%eax), %edx
2677; X86-BMI1-NEXT:    andl %ebx, %edx
2678; X86-BMI1-NEXT:    movl (%eax), %eax
2679; X86-BMI1-NEXT:    andl %edi, %eax
2680; X86-BMI1-NEXT:    movl %ebx, 4(%esi)
2681; X86-BMI1-NEXT:    movl %edi, (%esi)
2682; X86-BMI1-NEXT:    popl %esi
2683; X86-BMI1-NEXT:    popl %edi
2684; X86-BMI1-NEXT:    popl %ebx
2685; X86-BMI1-NEXT:    retl
2686;
2687; X86-BMI2-LABEL: bzhi64_c2_load:
2688; X86-BMI2:       # %bb.0:
2689; X86-BMI2-NEXT:    pushl %edi
2690; X86-BMI2-NEXT:    pushl %esi
2691; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2692; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2693; X86-BMI2-NEXT:    movb $64, %dl
2694; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %dl
2695; X86-BMI2-NEXT:    movl $-1, %esi
2696; X86-BMI2-NEXT:    shrxl %edx, %esi, %edi
2697; X86-BMI2-NEXT:    testb $32, %dl
2698; X86-BMI2-NEXT:    je .LBB36_2
2699; X86-BMI2-NEXT:  # %bb.1:
2700; X86-BMI2-NEXT:    movl %edi, %esi
2701; X86-BMI2-NEXT:    xorl %edi, %edi
2702; X86-BMI2-NEXT:  .LBB36_2:
2703; X86-BMI2-NEXT:    movl 4(%eax), %edx
2704; X86-BMI2-NEXT:    andl %edi, %edx
2705; X86-BMI2-NEXT:    movl (%eax), %eax
2706; X86-BMI2-NEXT:    andl %esi, %eax
2707; X86-BMI2-NEXT:    movl %edi, 4(%ecx)
2708; X86-BMI2-NEXT:    movl %esi, (%ecx)
2709; X86-BMI2-NEXT:    popl %esi
2710; X86-BMI2-NEXT:    popl %edi
2711; X86-BMI2-NEXT:    retl
2712;
2713; X64-NOBMI-LABEL: bzhi64_c2_load:
2714; X64-NOBMI:       # %bb.0:
2715; X64-NOBMI-NEXT:    movq %rsi, %rcx
2716; X64-NOBMI-NEXT:    negb %cl
2717; X64-NOBMI-NEXT:    movq $-1, %rsi
2718; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
2719; X64-NOBMI-NEXT:    shrq %cl, %rsi
2720; X64-NOBMI-NEXT:    movq (%rdi), %rax
2721; X64-NOBMI-NEXT:    andq %rsi, %rax
2722; X64-NOBMI-NEXT:    movq %rsi, (%rdx)
2723; X64-NOBMI-NEXT:    retq
2724;
2725; X64-BMI1-LABEL: bzhi64_c2_load:
2726; X64-BMI1:       # %bb.0:
2727; X64-BMI1-NEXT:    movq %rsi, %rcx
2728; X64-BMI1-NEXT:    negb %cl
2729; X64-BMI1-NEXT:    movq $-1, %rsi
2730; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
2731; X64-BMI1-NEXT:    shrq %cl, %rsi
2732; X64-BMI1-NEXT:    movq (%rdi), %rax
2733; X64-BMI1-NEXT:    andq %rsi, %rax
2734; X64-BMI1-NEXT:    movq %rsi, (%rdx)
2735; X64-BMI1-NEXT:    retq
2736;
2737; X64-BMI2-LABEL: bzhi64_c2_load:
2738; X64-BMI2:       # %bb.0:
2739; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
2740; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
2741; X64-BMI2-NEXT:    negb %sil
2742; X64-BMI2-NEXT:    movq $-1, %rcx
2743; X64-BMI2-NEXT:    shrxq %rsi, %rcx, %rcx
2744; X64-BMI2-NEXT:    movq %rcx, (%rdx)
2745; X64-BMI2-NEXT:    retq
2746  %val = load i64, ptr %w
2747  %numhighbits = sub i64 64, %numlowbits
2748  %mask = lshr i64 -1, %numhighbits
2749  store i64 %mask, ptr %escape
2750  %masked = and i64 %mask, %val
2751  ret i64 %masked
2752}
2753
2754define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwind {
2755; X86-NOBMI-LABEL: bzhi64_c3_load_indexzext:
2756; X86-NOBMI:       # %bb.0:
2757; X86-NOBMI-NEXT:    pushl %ebx
2758; X86-NOBMI-NEXT:    pushl %edi
2759; X86-NOBMI-NEXT:    pushl %esi
2760; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
2761; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
2762; X86-NOBMI-NEXT:    movb $64, %cl
2763; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2764; X86-NOBMI-NEXT:    movl $-1, %edi
2765; X86-NOBMI-NEXT:    movl $-1, %ebx
2766; X86-NOBMI-NEXT:    shrl %cl, %ebx
2767; X86-NOBMI-NEXT:    testb $32, %cl
2768; X86-NOBMI-NEXT:    je .LBB37_2
2769; X86-NOBMI-NEXT:  # %bb.1:
2770; X86-NOBMI-NEXT:    movl %ebx, %edi
2771; X86-NOBMI-NEXT:    xorl %ebx, %ebx
2772; X86-NOBMI-NEXT:  .LBB37_2:
2773; X86-NOBMI-NEXT:    movl 4(%eax), %edx
2774; X86-NOBMI-NEXT:    andl %ebx, %edx
2775; X86-NOBMI-NEXT:    movl (%eax), %eax
2776; X86-NOBMI-NEXT:    andl %edi, %eax
2777; X86-NOBMI-NEXT:    movl %ebx, 4(%esi)
2778; X86-NOBMI-NEXT:    movl %edi, (%esi)
2779; X86-NOBMI-NEXT:    popl %esi
2780; X86-NOBMI-NEXT:    popl %edi
2781; X86-NOBMI-NEXT:    popl %ebx
2782; X86-NOBMI-NEXT:    retl
2783;
2784; X86-BMI1-LABEL: bzhi64_c3_load_indexzext:
2785; X86-BMI1:       # %bb.0:
2786; X86-BMI1-NEXT:    pushl %ebx
2787; X86-BMI1-NEXT:    pushl %edi
2788; X86-BMI1-NEXT:    pushl %esi
2789; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
2790; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2791; X86-BMI1-NEXT:    movb $64, %cl
2792; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2793; X86-BMI1-NEXT:    movl $-1, %edi
2794; X86-BMI1-NEXT:    movl $-1, %ebx
2795; X86-BMI1-NEXT:    shrl %cl, %ebx
2796; X86-BMI1-NEXT:    testb $32, %cl
2797; X86-BMI1-NEXT:    je .LBB37_2
2798; X86-BMI1-NEXT:  # %bb.1:
2799; X86-BMI1-NEXT:    movl %ebx, %edi
2800; X86-BMI1-NEXT:    xorl %ebx, %ebx
2801; X86-BMI1-NEXT:  .LBB37_2:
2802; X86-BMI1-NEXT:    movl 4(%eax), %edx
2803; X86-BMI1-NEXT:    andl %ebx, %edx
2804; X86-BMI1-NEXT:    movl (%eax), %eax
2805; X86-BMI1-NEXT:    andl %edi, %eax
2806; X86-BMI1-NEXT:    movl %ebx, 4(%esi)
2807; X86-BMI1-NEXT:    movl %edi, (%esi)
2808; X86-BMI1-NEXT:    popl %esi
2809; X86-BMI1-NEXT:    popl %edi
2810; X86-BMI1-NEXT:    popl %ebx
2811; X86-BMI1-NEXT:    retl
2812;
2813; X86-BMI2-LABEL: bzhi64_c3_load_indexzext:
2814; X86-BMI2:       # %bb.0:
2815; X86-BMI2-NEXT:    pushl %edi
2816; X86-BMI2-NEXT:    pushl %esi
2817; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2818; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2819; X86-BMI2-NEXT:    movb $64, %dl
2820; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %dl
2821; X86-BMI2-NEXT:    movl $-1, %esi
2822; X86-BMI2-NEXT:    shrxl %edx, %esi, %edi
2823; X86-BMI2-NEXT:    testb $32, %dl
2824; X86-BMI2-NEXT:    je .LBB37_2
2825; X86-BMI2-NEXT:  # %bb.1:
2826; X86-BMI2-NEXT:    movl %edi, %esi
2827; X86-BMI2-NEXT:    xorl %edi, %edi
2828; X86-BMI2-NEXT:  .LBB37_2:
2829; X86-BMI2-NEXT:    movl 4(%eax), %edx
2830; X86-BMI2-NEXT:    andl %edi, %edx
2831; X86-BMI2-NEXT:    movl (%eax), %eax
2832; X86-BMI2-NEXT:    andl %esi, %eax
2833; X86-BMI2-NEXT:    movl %edi, 4(%ecx)
2834; X86-BMI2-NEXT:    movl %esi, (%ecx)
2835; X86-BMI2-NEXT:    popl %esi
2836; X86-BMI2-NEXT:    popl %edi
2837; X86-BMI2-NEXT:    retl
2838;
2839; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext:
2840; X64-NOBMI:       # %bb.0:
2841; X64-NOBMI-NEXT:    movl %esi, %ecx
2842; X64-NOBMI-NEXT:    negb %cl
2843; X64-NOBMI-NEXT:    movq $-1, %rsi
2844; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
2845; X64-NOBMI-NEXT:    shrq %cl, %rsi
2846; X64-NOBMI-NEXT:    movq (%rdi), %rax
2847; X64-NOBMI-NEXT:    andq %rsi, %rax
2848; X64-NOBMI-NEXT:    movq %rsi, (%rdx)
2849; X64-NOBMI-NEXT:    retq
2850;
2851; X64-BMI1-LABEL: bzhi64_c3_load_indexzext:
2852; X64-BMI1:       # %bb.0:
2853; X64-BMI1-NEXT:    movl %esi, %ecx
2854; X64-BMI1-NEXT:    negb %cl
2855; X64-BMI1-NEXT:    movq $-1, %rsi
2856; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
2857; X64-BMI1-NEXT:    shrq %cl, %rsi
2858; X64-BMI1-NEXT:    movq (%rdi), %rax
2859; X64-BMI1-NEXT:    andq %rsi, %rax
2860; X64-BMI1-NEXT:    movq %rsi, (%rdx)
2861; X64-BMI1-NEXT:    retq
2862;
2863; X64-BMI2-LABEL: bzhi64_c3_load_indexzext:
2864; X64-BMI2:       # %bb.0:
2865; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
2866; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
2867; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
2868; X64-BMI2-NEXT:    negb %sil
2869; X64-BMI2-NEXT:    movq $-1, %rcx
2870; X64-BMI2-NEXT:    shrxq %rsi, %rcx, %rcx
2871; X64-BMI2-NEXT:    movq %rcx, (%rdx)
2872; X64-BMI2-NEXT:    retq
2873  %val = load i64, ptr %w
2874  %numhighbits = sub i8 64, %numlowbits
2875  %sh_prom = zext i8 %numhighbits to i64
2876  %mask = lshr i64 -1, %sh_prom
2877  store i64 %mask, ptr %escape
2878  %masked = and i64 %mask, %val
2879  ret i64 %masked
2880}
2881
2882define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits, ptr %escape) nounwind {
2883; X86-NOBMI-LABEL: bzhi64_c4_commutative:
2884; X86-NOBMI:       # %bb.0:
2885; X86-NOBMI-NEXT:    pushl %esi
2886; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
2887; X86-NOBMI-NEXT:    movb $64, %cl
2888; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2889; X86-NOBMI-NEXT:    movl $-1, %eax
2890; X86-NOBMI-NEXT:    movl $-1, %edx
2891; X86-NOBMI-NEXT:    shrl %cl, %edx
2892; X86-NOBMI-NEXT:    testb $32, %cl
2893; X86-NOBMI-NEXT:    je .LBB38_2
2894; X86-NOBMI-NEXT:  # %bb.1:
2895; X86-NOBMI-NEXT:    movl %edx, %eax
2896; X86-NOBMI-NEXT:    xorl %edx, %edx
2897; X86-NOBMI-NEXT:  .LBB38_2:
2898; X86-NOBMI-NEXT:    movl %edx, 4(%esi)
2899; X86-NOBMI-NEXT:    movl %eax, (%esi)
2900; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
2901; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
2902; X86-NOBMI-NEXT:    popl %esi
2903; X86-NOBMI-NEXT:    retl
2904;
2905; X86-BMI1-LABEL: bzhi64_c4_commutative:
2906; X86-BMI1:       # %bb.0:
2907; X86-BMI1-NEXT:    pushl %esi
2908; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
2909; X86-BMI1-NEXT:    movb $64, %cl
2910; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
2911; X86-BMI1-NEXT:    movl $-1, %eax
2912; X86-BMI1-NEXT:    movl $-1, %edx
2913; X86-BMI1-NEXT:    shrl %cl, %edx
2914; X86-BMI1-NEXT:    testb $32, %cl
2915; X86-BMI1-NEXT:    je .LBB38_2
2916; X86-BMI1-NEXT:  # %bb.1:
2917; X86-BMI1-NEXT:    movl %edx, %eax
2918; X86-BMI1-NEXT:    xorl %edx, %edx
2919; X86-BMI1-NEXT:  .LBB38_2:
2920; X86-BMI1-NEXT:    movl %edx, 4(%esi)
2921; X86-BMI1-NEXT:    movl %eax, (%esi)
2922; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
2923; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
2924; X86-BMI1-NEXT:    popl %esi
2925; X86-BMI1-NEXT:    retl
2926;
2927; X86-BMI2-LABEL: bzhi64_c4_commutative:
2928; X86-BMI2:       # %bb.0:
2929; X86-BMI2-NEXT:    pushl %ebx
2930; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2931; X86-BMI2-NEXT:    movb $64, %bl
2932; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %bl
2933; X86-BMI2-NEXT:    movl $-1, %eax
2934; X86-BMI2-NEXT:    shrxl %ebx, %eax, %edx
2935; X86-BMI2-NEXT:    testb $32, %bl
2936; X86-BMI2-NEXT:    je .LBB38_2
2937; X86-BMI2-NEXT:  # %bb.1:
2938; X86-BMI2-NEXT:    movl %edx, %eax
2939; X86-BMI2-NEXT:    xorl %edx, %edx
2940; X86-BMI2-NEXT:  .LBB38_2:
2941; X86-BMI2-NEXT:    movl %edx, 4(%ecx)
2942; X86-BMI2-NEXT:    movl %eax, (%ecx)
2943; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
2944; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
2945; X86-BMI2-NEXT:    popl %ebx
2946; X86-BMI2-NEXT:    retl
2947;
2948; X64-NOBMI-LABEL: bzhi64_c4_commutative:
2949; X64-NOBMI:       # %bb.0:
2950; X64-NOBMI-NEXT:    movq %rsi, %rcx
2951; X64-NOBMI-NEXT:    negb %cl
2952; X64-NOBMI-NEXT:    movq $-1, %rax
2953; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
2954; X64-NOBMI-NEXT:    shrq %cl, %rax
2955; X64-NOBMI-NEXT:    movq %rax, (%rdx)
2956; X64-NOBMI-NEXT:    andq %rdi, %rax
2957; X64-NOBMI-NEXT:    retq
2958;
2959; X64-BMI1-LABEL: bzhi64_c4_commutative:
2960; X64-BMI1:       # %bb.0:
2961; X64-BMI1-NEXT:    movq %rsi, %rcx
2962; X64-BMI1-NEXT:    negb %cl
2963; X64-BMI1-NEXT:    movq $-1, %rax
2964; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
2965; X64-BMI1-NEXT:    shrq %cl, %rax
2966; X64-BMI1-NEXT:    movq %rax, (%rdx)
2967; X64-BMI1-NEXT:    andq %rdi, %rax
2968; X64-BMI1-NEXT:    retq
2969;
2970; X64-BMI2-LABEL: bzhi64_c4_commutative:
2971; X64-BMI2:       # %bb.0:
2972; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
2973; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
2974; X64-BMI2-NEXT:    negb %sil
2975; X64-BMI2-NEXT:    movq $-1, %rcx
2976; X64-BMI2-NEXT:    shrxq %rsi, %rcx, %rcx
2977; X64-BMI2-NEXT:    movq %rcx, (%rdx)
2978; X64-BMI2-NEXT:    retq
2979  %numhighbits = sub i64 64, %numlowbits
2980  %mask = lshr i64 -1, %numhighbits
2981  store i64 %mask, ptr %escape
2982  %masked = and i64 %val, %mask ; swapped order
2983  ret i64 %masked
2984}
2985
2986; 64-bit, but with 32-bit output
2987
2988; Everything done in 64-bit, truncation happens last.
2989define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
2990; X86-NOBMI-LABEL: bzhi64_32_c0:
2991; X86-NOBMI:       # %bb.0:
2992; X86-NOBMI-NEXT:    movb $64, %cl
2993; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
2994; X86-NOBMI-NEXT:    movl $-1, %eax
2995; X86-NOBMI-NEXT:    shrl %cl, %eax
2996; X86-NOBMI-NEXT:    testb $32, %cl
2997; X86-NOBMI-NEXT:    jne .LBB39_2
2998; X86-NOBMI-NEXT:  # %bb.1:
2999; X86-NOBMI-NEXT:    movl $-1, %eax
3000; X86-NOBMI-NEXT:  .LBB39_2:
3001; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
3002; X86-NOBMI-NEXT:    retl
3003;
3004; X86-BMI1-LABEL: bzhi64_32_c0:
3005; X86-BMI1:       # %bb.0:
3006; X86-BMI1-NEXT:    movb $64, %cl
3007; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3008; X86-BMI1-NEXT:    movl $-1, %eax
3009; X86-BMI1-NEXT:    shrl %cl, %eax
3010; X86-BMI1-NEXT:    testb $32, %cl
3011; X86-BMI1-NEXT:    jne .LBB39_2
3012; X86-BMI1-NEXT:  # %bb.1:
3013; X86-BMI1-NEXT:    movl $-1, %eax
3014; X86-BMI1-NEXT:  .LBB39_2:
3015; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
3016; X86-BMI1-NEXT:    retl
3017;
3018; X86-BMI2-LABEL: bzhi64_32_c0:
3019; X86-BMI2:       # %bb.0:
3020; X86-BMI2-NEXT:    movb $64, %cl
3021; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
3022; X86-BMI2-NEXT:    movl $-1, %eax
3023; X86-BMI2-NEXT:    testb $32, %cl
3024; X86-BMI2-NEXT:    je .LBB39_2
3025; X86-BMI2-NEXT:  # %bb.1:
3026; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
3027; X86-BMI2-NEXT:  .LBB39_2:
3028; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
3029; X86-BMI2-NEXT:    retl
3030;
3031; X64-NOBMI-LABEL: bzhi64_32_c0:
3032; X64-NOBMI:       # %bb.0:
3033; X64-NOBMI-NEXT:    movq %rsi, %rcx
3034; X64-NOBMI-NEXT:    negb %cl
3035; X64-NOBMI-NEXT:    movq $-1, %rax
3036; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3037; X64-NOBMI-NEXT:    shrq %cl, %rax
3038; X64-NOBMI-NEXT:    andl %edi, %eax
3039; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
3040; X64-NOBMI-NEXT:    retq
3041;
3042; X64-BMI1-LABEL: bzhi64_32_c0:
3043; X64-BMI1:       # %bb.0:
3044; X64-BMI1-NEXT:    shll $8, %esi
3045; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3046; X64-BMI1-NEXT:    retq
3047;
3048; X64-BMI2-LABEL: bzhi64_32_c0:
3049; X64-BMI2:       # %bb.0:
3050; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3051; X64-BMI2-NEXT:    retq
3052  %numhighbits = sub i64 64, %numlowbits
3053  %mask = lshr i64 -1, %numhighbits
3054  %masked = and i64 %mask, %val
3055  %res = trunc i64 %masked to i32
3056  ret i32 %res
3057}
3058
3059; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
3060define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind {
3061; X86-NOBMI-LABEL: bzhi64_32_c1:
3062; X86-NOBMI:       # %bb.0:
3063; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3064; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3065; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3066; X86-NOBMI-NEXT:    shll %cl, %eax
3067; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3068; X86-NOBMI-NEXT:    shrl %cl, %eax
3069; X86-NOBMI-NEXT:    retl
3070;
3071; X86-BMI1-LABEL: bzhi64_32_c1:
3072; X86-BMI1:       # %bb.0:
3073; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3074; X86-BMI1-NEXT:    shll $8, %eax
3075; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
3076; X86-BMI1-NEXT:    retl
3077;
3078; X86-BMI2-LABEL: bzhi64_32_c1:
3079; X86-BMI2:       # %bb.0:
3080; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3081; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
3082; X86-BMI2-NEXT:    retl
3083;
3084; X64-NOBMI-LABEL: bzhi64_32_c1:
3085; X64-NOBMI:       # %bb.0:
3086; X64-NOBMI-NEXT:    movl %esi, %ecx
3087; X64-NOBMI-NEXT:    movq %rdi, %rax
3088; X64-NOBMI-NEXT:    negb %cl
3089; X64-NOBMI-NEXT:    shll %cl, %eax
3090; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3091; X64-NOBMI-NEXT:    shrl %cl, %eax
3092; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
3093; X64-NOBMI-NEXT:    retq
3094;
3095; X64-BMI1-LABEL: bzhi64_32_c1:
3096; X64-BMI1:       # %bb.0:
3097; X64-BMI1-NEXT:    shll $8, %esi
3098; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3099; X64-BMI1-NEXT:    retq
3100;
3101; X64-BMI2-LABEL: bzhi64_32_c1:
3102; X64-BMI2:       # %bb.0:
3103; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3104; X64-BMI2-NEXT:    retq
3105  %truncval = trunc i64 %val to i32
3106  %numhighbits = sub i32 32, %numlowbits
3107  %mask = lshr i32 -1, %numhighbits
3108  %masked = and i32 %mask, %truncval
3109  ret i32 %masked
3110}
3111
3112; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
3113; Masking is 64-bit. Then truncation.
3114define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind {
3115; X86-NOBMI-LABEL: bzhi64_32_c2:
3116; X86-NOBMI:       # %bb.0:
3117; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3118; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3119; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3120; X86-NOBMI-NEXT:    shll %cl, %eax
3121; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3122; X86-NOBMI-NEXT:    shrl %cl, %eax
3123; X86-NOBMI-NEXT:    retl
3124;
3125; X86-BMI1-LABEL: bzhi64_32_c2:
3126; X86-BMI1:       # %bb.0:
3127; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3128; X86-BMI1-NEXT:    shll $8, %eax
3129; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
3130; X86-BMI1-NEXT:    retl
3131;
3132; X86-BMI2-LABEL: bzhi64_32_c2:
3133; X86-BMI2:       # %bb.0:
3134; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3135; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
3136; X86-BMI2-NEXT:    retl
3137;
3138; X64-NOBMI-LABEL: bzhi64_32_c2:
3139; X64-NOBMI:       # %bb.0:
3140; X64-NOBMI-NEXT:    movl %esi, %ecx
3141; X64-NOBMI-NEXT:    movq %rdi, %rax
3142; X64-NOBMI-NEXT:    negb %cl
3143; X64-NOBMI-NEXT:    shll %cl, %eax
3144; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3145; X64-NOBMI-NEXT:    shrl %cl, %eax
3146; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
3147; X64-NOBMI-NEXT:    retq
3148;
3149; X64-BMI1-LABEL: bzhi64_32_c2:
3150; X64-BMI1:       # %bb.0:
3151; X64-BMI1-NEXT:    shll $8, %esi
3152; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3153; X64-BMI1-NEXT:    retq
3154;
3155; X64-BMI2-LABEL: bzhi64_32_c2:
3156; X64-BMI2:       # %bb.0:
3157; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3158; X64-BMI2-NEXT:    retq
3159  %numhighbits = sub i32 32, %numlowbits
3160  %mask = lshr i32 -1, %numhighbits
3161  %zextmask = zext i32 %mask to i64
3162  %masked = and i64 %zextmask, %val
3163  %truncmasked = trunc i64 %masked to i32
3164  ret i32 %truncmasked
3165}
3166
3167; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
3168; Masking is 64-bit. Then truncation.
3169define i32 @bzhi64_32_c3(i64 %val, i64 %numlowbits) nounwind {
3170; X86-LABEL: bzhi64_32_c3:
3171; X86:       # %bb.0:
3172; X86-NEXT:    movb $64, %cl
3173; X86-NEXT:    subb {{[0-9]+}}(%esp), %cl
3174; X86-NEXT:    xorl %eax, %eax
3175; X86-NEXT:    movl $-1, %edx
3176; X86-NEXT:    shrdl %cl, %eax, %edx
3177; X86-NEXT:    testb $32, %cl
3178; X86-NEXT:    jne .LBB42_2
3179; X86-NEXT:  # %bb.1:
3180; X86-NEXT:    movl %edx, %eax
3181; X86-NEXT:  .LBB42_2:
3182; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
3183; X86-NEXT:    retl
3184;
3185; X64-NOBMI-LABEL: bzhi64_32_c3:
3186; X64-NOBMI:       # %bb.0:
3187; X64-NOBMI-NEXT:    movq %rsi, %rcx
3188; X64-NOBMI-NEXT:    negb %cl
3189; X64-NOBMI-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
3190; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3191; X64-NOBMI-NEXT:    shrq %cl, %rax
3192; X64-NOBMI-NEXT:    andl %edi, %eax
3193; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
3194; X64-NOBMI-NEXT:    retq
3195;
3196; X64-BMI1-LABEL: bzhi64_32_c3:
3197; X64-BMI1:       # %bb.0:
3198; X64-BMI1-NEXT:    movq %rsi, %rcx
3199; X64-BMI1-NEXT:    negb %cl
3200; X64-BMI1-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
3201; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
3202; X64-BMI1-NEXT:    shrq %cl, %rax
3203; X64-BMI1-NEXT:    andl %edi, %eax
3204; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
3205; X64-BMI1-NEXT:    retq
3206;
3207; X64-BMI2-LABEL: bzhi64_32_c3:
3208; X64-BMI2:       # %bb.0:
3209; X64-BMI2-NEXT:    negb %sil
3210; X64-BMI2-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
3211; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
3212; X64-BMI2-NEXT:    andl %edi, %eax
3213; X64-BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
3214; X64-BMI2-NEXT:    retq
3215  %numhighbits = sub i64 64, %numlowbits
3216  %mask = lshr i64 4294967295, %numhighbits
3217  %masked = and i64 %mask, %val
3218  %truncmasked = trunc i64 %masked to i32
3219  ret i32 %truncmasked
3220}
3221
3222; ---------------------------------------------------------------------------- ;
3223; Pattern d. 32-bit.
3224; ---------------------------------------------------------------------------- ;
3225
3226define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
3227; X86-NOBMI-LABEL: bzhi32_d0:
3228; X86-NOBMI:       # %bb.0:
3229; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3230; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3231; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3232; X86-NOBMI-NEXT:    shll %cl, %eax
3233; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3234; X86-NOBMI-NEXT:    shrl %cl, %eax
3235; X86-NOBMI-NEXT:    retl
3236;
3237; X86-BMI1-LABEL: bzhi32_d0:
3238; X86-BMI1:       # %bb.0:
3239; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3240; X86-BMI1-NEXT:    shll $8, %eax
3241; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
3242; X86-BMI1-NEXT:    retl
3243;
3244; X86-BMI2-LABEL: bzhi32_d0:
3245; X86-BMI2:       # %bb.0:
3246; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3247; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
3248; X86-BMI2-NEXT:    retl
3249;
3250; X64-NOBMI-LABEL: bzhi32_d0:
3251; X64-NOBMI:       # %bb.0:
3252; X64-NOBMI-NEXT:    movl %esi, %ecx
3253; X64-NOBMI-NEXT:    movl %edi, %eax
3254; X64-NOBMI-NEXT:    negb %cl
3255; X64-NOBMI-NEXT:    shll %cl, %eax
3256; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3257; X64-NOBMI-NEXT:    shrl %cl, %eax
3258; X64-NOBMI-NEXT:    retq
3259;
3260; X64-BMI1-LABEL: bzhi32_d0:
3261; X64-BMI1:       # %bb.0:
3262; X64-BMI1-NEXT:    shll $8, %esi
3263; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3264; X64-BMI1-NEXT:    retq
3265;
3266; X64-BMI2-LABEL: bzhi32_d0:
3267; X64-BMI2:       # %bb.0:
3268; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3269; X64-BMI2-NEXT:    retq
3270  %numhighbits = sub i32 32, %numlowbits
3271  %highbitscleared = shl i32 %val, %numhighbits
3272  %masked = lshr i32 %highbitscleared, %numhighbits
3273  ret i32 %masked
3274}
3275
3276define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
3277; X86-NOBMI-LABEL: bzhi32_d1_indexzext:
3278; X86-NOBMI:       # %bb.0:
3279; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3280; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3281; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3282; X86-NOBMI-NEXT:    shll %cl, %eax
3283; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3284; X86-NOBMI-NEXT:    shrl %cl, %eax
3285; X86-NOBMI-NEXT:    retl
3286;
3287; X86-BMI1-LABEL: bzhi32_d1_indexzext:
3288; X86-BMI1:       # %bb.0:
3289; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3290; X86-BMI1-NEXT:    shll $8, %eax
3291; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
3292; X86-BMI1-NEXT:    retl
3293;
3294; X86-BMI2-LABEL: bzhi32_d1_indexzext:
3295; X86-BMI2:       # %bb.0:
3296; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
3297; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
3298; X86-BMI2-NEXT:    retl
3299;
3300; X64-NOBMI-LABEL: bzhi32_d1_indexzext:
3301; X64-NOBMI:       # %bb.0:
3302; X64-NOBMI-NEXT:    movl %esi, %ecx
3303; X64-NOBMI-NEXT:    movl %edi, %eax
3304; X64-NOBMI-NEXT:    negb %cl
3305; X64-NOBMI-NEXT:    shll %cl, %eax
3306; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3307; X64-NOBMI-NEXT:    shrl %cl, %eax
3308; X64-NOBMI-NEXT:    retq
3309;
3310; X64-BMI1-LABEL: bzhi32_d1_indexzext:
3311; X64-BMI1:       # %bb.0:
3312; X64-BMI1-NEXT:    shll $8, %esi
3313; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
3314; X64-BMI1-NEXT:    retq
3315;
3316; X64-BMI2-LABEL: bzhi32_d1_indexzext:
3317; X64-BMI2:       # %bb.0:
3318; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
3319; X64-BMI2-NEXT:    retq
3320  %numhighbits = sub i8 32, %numlowbits
3321  %sh_prom = zext i8 %numhighbits to i32
3322  %highbitscleared = shl i32 %val, %sh_prom
3323  %masked = lshr i32 %highbitscleared, %sh_prom
3324  ret i32 %masked
3325}
3326
3327define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind {
3328; X86-NOBMI-LABEL: bzhi32_d2_load:
3329; X86-NOBMI:       # %bb.0:
3330; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3331; X86-NOBMI-NEXT:    movl (%eax), %eax
3332; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3333; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3334; X86-NOBMI-NEXT:    shll %cl, %eax
3335; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3336; X86-NOBMI-NEXT:    shrl %cl, %eax
3337; X86-NOBMI-NEXT:    retl
3338;
3339; X86-BMI1-LABEL: bzhi32_d2_load:
3340; X86-BMI1:       # %bb.0:
3341; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3342; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
3343; X86-BMI1-NEXT:    shll $8, %ecx
3344; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
3345; X86-BMI1-NEXT:    retl
3346;
3347; X86-BMI2-LABEL: bzhi32_d2_load:
3348; X86-BMI2:       # %bb.0:
3349; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3350; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
3351; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
3352; X86-BMI2-NEXT:    retl
3353;
3354; X64-NOBMI-LABEL: bzhi32_d2_load:
3355; X64-NOBMI:       # %bb.0:
3356; X64-NOBMI-NEXT:    movl %esi, %ecx
3357; X64-NOBMI-NEXT:    movl (%rdi), %eax
3358; X64-NOBMI-NEXT:    negb %cl
3359; X64-NOBMI-NEXT:    shll %cl, %eax
3360; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3361; X64-NOBMI-NEXT:    shrl %cl, %eax
3362; X64-NOBMI-NEXT:    retq
3363;
3364; X64-BMI1-LABEL: bzhi32_d2_load:
3365; X64-BMI1:       # %bb.0:
3366; X64-BMI1-NEXT:    shll $8, %esi
3367; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
3368; X64-BMI1-NEXT:    retq
3369;
3370; X64-BMI2-LABEL: bzhi32_d2_load:
3371; X64-BMI2:       # %bb.0:
3372; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
3373; X64-BMI2-NEXT:    retq
3374  %val = load i32, ptr %w
3375  %numhighbits = sub i32 32, %numlowbits
3376  %highbitscleared = shl i32 %val, %numhighbits
3377  %masked = lshr i32 %highbitscleared, %numhighbits
3378  ret i32 %masked
3379}
3380
3381define i32 @bzhi32_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
3382; X86-NOBMI-LABEL: bzhi32_d3_load_indexzext:
3383; X86-NOBMI:       # %bb.0:
3384; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3385; X86-NOBMI-NEXT:    movl (%eax), %eax
3386; X86-NOBMI-NEXT:    xorl %ecx, %ecx
3387; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3388; X86-NOBMI-NEXT:    shll %cl, %eax
3389; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3390; X86-NOBMI-NEXT:    shrl %cl, %eax
3391; X86-NOBMI-NEXT:    retl
3392;
3393; X86-BMI1-LABEL: bzhi32_d3_load_indexzext:
3394; X86-BMI1:       # %bb.0:
3395; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3396; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
3397; X86-BMI1-NEXT:    shll $8, %ecx
3398; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
3399; X86-BMI1-NEXT:    retl
3400;
3401; X86-BMI2-LABEL: bzhi32_d3_load_indexzext:
3402; X86-BMI2:       # %bb.0:
3403; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3404; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
3405; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
3406; X86-BMI2-NEXT:    retl
3407;
3408; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext:
3409; X64-NOBMI:       # %bb.0:
3410; X64-NOBMI-NEXT:    movl %esi, %ecx
3411; X64-NOBMI-NEXT:    movl (%rdi), %eax
3412; X64-NOBMI-NEXT:    negb %cl
3413; X64-NOBMI-NEXT:    shll %cl, %eax
3414; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3415; X64-NOBMI-NEXT:    shrl %cl, %eax
3416; X64-NOBMI-NEXT:    retq
3417;
3418; X64-BMI1-LABEL: bzhi32_d3_load_indexzext:
3419; X64-BMI1:       # %bb.0:
3420; X64-BMI1-NEXT:    shll $8, %esi
3421; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
3422; X64-BMI1-NEXT:    retq
3423;
3424; X64-BMI2-LABEL: bzhi32_d3_load_indexzext:
3425; X64-BMI2:       # %bb.0:
3426; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
3427; X64-BMI2-NEXT:    retq
3428  %val = load i32, ptr %w
3429  %numhighbits = sub i8 32, %numlowbits
3430  %sh_prom = zext i8 %numhighbits to i32
3431  %highbitscleared = shl i32 %val, %sh_prom
3432  %masked = lshr i32 %highbitscleared, %sh_prom
3433  ret i32 %masked
3434}
3435
3436; 64-bit.
3437
3438define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
3439; X86-NOBMI-LABEL: bzhi64_d0:
3440; X86-NOBMI:       # %bb.0:
3441; X86-NOBMI-NEXT:    pushl %ebx
3442; X86-NOBMI-NEXT:    pushl %edi
3443; X86-NOBMI-NEXT:    pushl %esi
3444; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
3445; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3446; X86-NOBMI-NEXT:    movb $64, %cl
3447; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3448; X86-NOBMI-NEXT:    movl %edx, %esi
3449; X86-NOBMI-NEXT:    shll %cl, %esi
3450; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
3451; X86-NOBMI-NEXT:    testb $32, %cl
3452; X86-NOBMI-NEXT:    movl %esi, %edi
3453; X86-NOBMI-NEXT:    jne .LBB47_2
3454; X86-NOBMI-NEXT:  # %bb.1:
3455; X86-NOBMI-NEXT:    movl %eax, %edi
3456; X86-NOBMI-NEXT:  .LBB47_2:
3457; X86-NOBMI-NEXT:    movl %edi, %eax
3458; X86-NOBMI-NEXT:    shrl %cl, %eax
3459; X86-NOBMI-NEXT:    xorl %ebx, %ebx
3460; X86-NOBMI-NEXT:    testb $32, %cl
3461; X86-NOBMI-NEXT:    movl $0, %edx
3462; X86-NOBMI-NEXT:    jne .LBB47_4
3463; X86-NOBMI-NEXT:  # %bb.3:
3464; X86-NOBMI-NEXT:    movl %esi, %ebx
3465; X86-NOBMI-NEXT:    movl %eax, %edx
3466; X86-NOBMI-NEXT:  .LBB47_4:
3467; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
3468; X86-NOBMI-NEXT:    testb $32, %cl
3469; X86-NOBMI-NEXT:    jne .LBB47_6
3470; X86-NOBMI-NEXT:  # %bb.5:
3471; X86-NOBMI-NEXT:    movl %ebx, %eax
3472; X86-NOBMI-NEXT:  .LBB47_6:
3473; X86-NOBMI-NEXT:    popl %esi
3474; X86-NOBMI-NEXT:    popl %edi
3475; X86-NOBMI-NEXT:    popl %ebx
3476; X86-NOBMI-NEXT:    retl
3477;
3478; X86-BMI1-LABEL: bzhi64_d0:
3479; X86-BMI1:       # %bb.0:
3480; X86-BMI1-NEXT:    pushl %ebx
3481; X86-BMI1-NEXT:    pushl %edi
3482; X86-BMI1-NEXT:    pushl %esi
3483; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
3484; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3485; X86-BMI1-NEXT:    movb $64, %cl
3486; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3487; X86-BMI1-NEXT:    movl %edx, %esi
3488; X86-BMI1-NEXT:    shll %cl, %esi
3489; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
3490; X86-BMI1-NEXT:    testb $32, %cl
3491; X86-BMI1-NEXT:    movl %esi, %edi
3492; X86-BMI1-NEXT:    jne .LBB47_2
3493; X86-BMI1-NEXT:  # %bb.1:
3494; X86-BMI1-NEXT:    movl %eax, %edi
3495; X86-BMI1-NEXT:  .LBB47_2:
3496; X86-BMI1-NEXT:    movl %edi, %eax
3497; X86-BMI1-NEXT:    shrl %cl, %eax
3498; X86-BMI1-NEXT:    xorl %ebx, %ebx
3499; X86-BMI1-NEXT:    testb $32, %cl
3500; X86-BMI1-NEXT:    movl $0, %edx
3501; X86-BMI1-NEXT:    jne .LBB47_4
3502; X86-BMI1-NEXT:  # %bb.3:
3503; X86-BMI1-NEXT:    movl %esi, %ebx
3504; X86-BMI1-NEXT:    movl %eax, %edx
3505; X86-BMI1-NEXT:  .LBB47_4:
3506; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
3507; X86-BMI1-NEXT:    testb $32, %cl
3508; X86-BMI1-NEXT:    jne .LBB47_6
3509; X86-BMI1-NEXT:  # %bb.5:
3510; X86-BMI1-NEXT:    movl %ebx, %eax
3511; X86-BMI1-NEXT:  .LBB47_6:
3512; X86-BMI1-NEXT:    popl %esi
3513; X86-BMI1-NEXT:    popl %edi
3514; X86-BMI1-NEXT:    popl %ebx
3515; X86-BMI1-NEXT:    retl
3516;
3517; X86-BMI2-LABEL: bzhi64_d0:
3518; X86-BMI2:       # %bb.0:
3519; X86-BMI2-NEXT:    pushl %edi
3520; X86-BMI2-NEXT:    pushl %esi
3521; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3522; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
3523; X86-BMI2-NEXT:    movb $64, %cl
3524; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
3525; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
3526; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
3527; X86-BMI2-NEXT:    xorl %edx, %edx
3528; X86-BMI2-NEXT:    testb $32, %cl
3529; X86-BMI2-NEXT:    je .LBB47_2
3530; X86-BMI2-NEXT:  # %bb.1:
3531; X86-BMI2-NEXT:    movl %edi, %esi
3532; X86-BMI2-NEXT:    movl $0, %edi
3533; X86-BMI2-NEXT:  .LBB47_2:
3534; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
3535; X86-BMI2-NEXT:    jne .LBB47_4
3536; X86-BMI2-NEXT:  # %bb.3:
3537; X86-BMI2-NEXT:    movl %eax, %edx
3538; X86-BMI2-NEXT:  .LBB47_4:
3539; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
3540; X86-BMI2-NEXT:    testb $32, %cl
3541; X86-BMI2-NEXT:    jne .LBB47_6
3542; X86-BMI2-NEXT:  # %bb.5:
3543; X86-BMI2-NEXT:    movl %edi, %eax
3544; X86-BMI2-NEXT:  .LBB47_6:
3545; X86-BMI2-NEXT:    popl %esi
3546; X86-BMI2-NEXT:    popl %edi
3547; X86-BMI2-NEXT:    retl
3548;
3549; X64-NOBMI-LABEL: bzhi64_d0:
3550; X64-NOBMI:       # %bb.0:
3551; X64-NOBMI-NEXT:    movq %rsi, %rcx
3552; X64-NOBMI-NEXT:    movq %rdi, %rax
3553; X64-NOBMI-NEXT:    negb %cl
3554; X64-NOBMI-NEXT:    shlq %cl, %rax
3555; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3556; X64-NOBMI-NEXT:    shrq %cl, %rax
3557; X64-NOBMI-NEXT:    retq
3558;
3559; X64-BMI1-LABEL: bzhi64_d0:
3560; X64-BMI1:       # %bb.0:
3561; X64-BMI1-NEXT:    shll $8, %esi
3562; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
3563; X64-BMI1-NEXT:    retq
3564;
3565; X64-BMI2-LABEL: bzhi64_d0:
3566; X64-BMI2:       # %bb.0:
3567; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
3568; X64-BMI2-NEXT:    retq
3569  %numhighbits = sub i64 64, %numlowbits
3570  %highbitscleared = shl i64 %val, %numhighbits
3571  %masked = lshr i64 %highbitscleared, %numhighbits
3572  ret i64 %masked
3573}
3574
3575define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
3576; X86-NOBMI-LABEL: bzhi64_d1_indexzext:
3577; X86-NOBMI:       # %bb.0:
3578; X86-NOBMI-NEXT:    pushl %ebx
3579; X86-NOBMI-NEXT:    pushl %edi
3580; X86-NOBMI-NEXT:    pushl %esi
3581; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
3582; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3583; X86-NOBMI-NEXT:    movb $64, %cl
3584; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3585; X86-NOBMI-NEXT:    movl %edx, %esi
3586; X86-NOBMI-NEXT:    shll %cl, %esi
3587; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
3588; X86-NOBMI-NEXT:    testb $32, %cl
3589; X86-NOBMI-NEXT:    movl %esi, %edi
3590; X86-NOBMI-NEXT:    jne .LBB48_2
3591; X86-NOBMI-NEXT:  # %bb.1:
3592; X86-NOBMI-NEXT:    movl %eax, %edi
3593; X86-NOBMI-NEXT:  .LBB48_2:
3594; X86-NOBMI-NEXT:    movl %edi, %eax
3595; X86-NOBMI-NEXT:    shrl %cl, %eax
3596; X86-NOBMI-NEXT:    xorl %ebx, %ebx
3597; X86-NOBMI-NEXT:    testb $32, %cl
3598; X86-NOBMI-NEXT:    movl $0, %edx
3599; X86-NOBMI-NEXT:    jne .LBB48_4
3600; X86-NOBMI-NEXT:  # %bb.3:
3601; X86-NOBMI-NEXT:    movl %esi, %ebx
3602; X86-NOBMI-NEXT:    movl %eax, %edx
3603; X86-NOBMI-NEXT:  .LBB48_4:
3604; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
3605; X86-NOBMI-NEXT:    testb $32, %cl
3606; X86-NOBMI-NEXT:    jne .LBB48_6
3607; X86-NOBMI-NEXT:  # %bb.5:
3608; X86-NOBMI-NEXT:    movl %ebx, %eax
3609; X86-NOBMI-NEXT:  .LBB48_6:
3610; X86-NOBMI-NEXT:    popl %esi
3611; X86-NOBMI-NEXT:    popl %edi
3612; X86-NOBMI-NEXT:    popl %ebx
3613; X86-NOBMI-NEXT:    retl
3614;
3615; X86-BMI1-LABEL: bzhi64_d1_indexzext:
3616; X86-BMI1:       # %bb.0:
3617; X86-BMI1-NEXT:    pushl %ebx
3618; X86-BMI1-NEXT:    pushl %edi
3619; X86-BMI1-NEXT:    pushl %esi
3620; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
3621; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3622; X86-BMI1-NEXT:    movb $64, %cl
3623; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3624; X86-BMI1-NEXT:    movl %edx, %esi
3625; X86-BMI1-NEXT:    shll %cl, %esi
3626; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
3627; X86-BMI1-NEXT:    testb $32, %cl
3628; X86-BMI1-NEXT:    movl %esi, %edi
3629; X86-BMI1-NEXT:    jne .LBB48_2
3630; X86-BMI1-NEXT:  # %bb.1:
3631; X86-BMI1-NEXT:    movl %eax, %edi
3632; X86-BMI1-NEXT:  .LBB48_2:
3633; X86-BMI1-NEXT:    movl %edi, %eax
3634; X86-BMI1-NEXT:    shrl %cl, %eax
3635; X86-BMI1-NEXT:    xorl %ebx, %ebx
3636; X86-BMI1-NEXT:    testb $32, %cl
3637; X86-BMI1-NEXT:    movl $0, %edx
3638; X86-BMI1-NEXT:    jne .LBB48_4
3639; X86-BMI1-NEXT:  # %bb.3:
3640; X86-BMI1-NEXT:    movl %esi, %ebx
3641; X86-BMI1-NEXT:    movl %eax, %edx
3642; X86-BMI1-NEXT:  .LBB48_4:
3643; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
3644; X86-BMI1-NEXT:    testb $32, %cl
3645; X86-BMI1-NEXT:    jne .LBB48_6
3646; X86-BMI1-NEXT:  # %bb.5:
3647; X86-BMI1-NEXT:    movl %ebx, %eax
3648; X86-BMI1-NEXT:  .LBB48_6:
3649; X86-BMI1-NEXT:    popl %esi
3650; X86-BMI1-NEXT:    popl %edi
3651; X86-BMI1-NEXT:    popl %ebx
3652; X86-BMI1-NEXT:    retl
3653;
3654; X86-BMI2-LABEL: bzhi64_d1_indexzext:
3655; X86-BMI2:       # %bb.0:
3656; X86-BMI2-NEXT:    pushl %edi
3657; X86-BMI2-NEXT:    pushl %esi
3658; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3659; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
3660; X86-BMI2-NEXT:    movb $64, %cl
3661; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
3662; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
3663; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
3664; X86-BMI2-NEXT:    xorl %edx, %edx
3665; X86-BMI2-NEXT:    testb $32, %cl
3666; X86-BMI2-NEXT:    je .LBB48_2
3667; X86-BMI2-NEXT:  # %bb.1:
3668; X86-BMI2-NEXT:    movl %edi, %esi
3669; X86-BMI2-NEXT:    movl $0, %edi
3670; X86-BMI2-NEXT:  .LBB48_2:
3671; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
3672; X86-BMI2-NEXT:    jne .LBB48_4
3673; X86-BMI2-NEXT:  # %bb.3:
3674; X86-BMI2-NEXT:    movl %eax, %edx
3675; X86-BMI2-NEXT:  .LBB48_4:
3676; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
3677; X86-BMI2-NEXT:    testb $32, %cl
3678; X86-BMI2-NEXT:    jne .LBB48_6
3679; X86-BMI2-NEXT:  # %bb.5:
3680; X86-BMI2-NEXT:    movl %edi, %eax
3681; X86-BMI2-NEXT:  .LBB48_6:
3682; X86-BMI2-NEXT:    popl %esi
3683; X86-BMI2-NEXT:    popl %edi
3684; X86-BMI2-NEXT:    retl
3685;
3686; X64-NOBMI-LABEL: bzhi64_d1_indexzext:
3687; X64-NOBMI:       # %bb.0:
3688; X64-NOBMI-NEXT:    movl %esi, %ecx
3689; X64-NOBMI-NEXT:    movq %rdi, %rax
3690; X64-NOBMI-NEXT:    negb %cl
3691; X64-NOBMI-NEXT:    shlq %cl, %rax
3692; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3693; X64-NOBMI-NEXT:    shrq %cl, %rax
3694; X64-NOBMI-NEXT:    retq
3695;
3696; X64-BMI1-LABEL: bzhi64_d1_indexzext:
3697; X64-BMI1:       # %bb.0:
3698; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
3699; X64-BMI1-NEXT:    shll $8, %esi
3700; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
3701; X64-BMI1-NEXT:    retq
3702;
3703; X64-BMI2-LABEL: bzhi64_d1_indexzext:
3704; X64-BMI2:       # %bb.0:
3705; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
3706; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
3707; X64-BMI2-NEXT:    retq
3708  %numhighbits = sub i8 64, %numlowbits
3709  %sh_prom = zext i8 %numhighbits to i64
3710  %highbitscleared = shl i64 %val, %sh_prom
3711  %masked = lshr i64 %highbitscleared, %sh_prom
3712  ret i64 %masked
3713}
3714
3715define i64 @bzhi64_d2_load(ptr %w, i64 %numlowbits) nounwind {
3716; X86-NOBMI-LABEL: bzhi64_d2_load:
3717; X86-NOBMI:       # %bb.0:
3718; X86-NOBMI-NEXT:    pushl %ebx
3719; X86-NOBMI-NEXT:    pushl %edi
3720; X86-NOBMI-NEXT:    pushl %esi
3721; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3722; X86-NOBMI-NEXT:    movl (%eax), %edx
3723; X86-NOBMI-NEXT:    movl 4(%eax), %eax
3724; X86-NOBMI-NEXT:    movb $64, %cl
3725; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3726; X86-NOBMI-NEXT:    movl %edx, %esi
3727; X86-NOBMI-NEXT:    shll %cl, %esi
3728; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
3729; X86-NOBMI-NEXT:    testb $32, %cl
3730; X86-NOBMI-NEXT:    movl %esi, %edi
3731; X86-NOBMI-NEXT:    jne .LBB49_2
3732; X86-NOBMI-NEXT:  # %bb.1:
3733; X86-NOBMI-NEXT:    movl %eax, %edi
3734; X86-NOBMI-NEXT:  .LBB49_2:
3735; X86-NOBMI-NEXT:    movl %edi, %eax
3736; X86-NOBMI-NEXT:    shrl %cl, %eax
3737; X86-NOBMI-NEXT:    xorl %ebx, %ebx
3738; X86-NOBMI-NEXT:    testb $32, %cl
3739; X86-NOBMI-NEXT:    movl $0, %edx
3740; X86-NOBMI-NEXT:    jne .LBB49_4
3741; X86-NOBMI-NEXT:  # %bb.3:
3742; X86-NOBMI-NEXT:    movl %esi, %ebx
3743; X86-NOBMI-NEXT:    movl %eax, %edx
3744; X86-NOBMI-NEXT:  .LBB49_4:
3745; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
3746; X86-NOBMI-NEXT:    testb $32, %cl
3747; X86-NOBMI-NEXT:    jne .LBB49_6
3748; X86-NOBMI-NEXT:  # %bb.5:
3749; X86-NOBMI-NEXT:    movl %ebx, %eax
3750; X86-NOBMI-NEXT:  .LBB49_6:
3751; X86-NOBMI-NEXT:    popl %esi
3752; X86-NOBMI-NEXT:    popl %edi
3753; X86-NOBMI-NEXT:    popl %ebx
3754; X86-NOBMI-NEXT:    retl
3755;
3756; X86-BMI1-LABEL: bzhi64_d2_load:
3757; X86-BMI1:       # %bb.0:
3758; X86-BMI1-NEXT:    pushl %ebx
3759; X86-BMI1-NEXT:    pushl %edi
3760; X86-BMI1-NEXT:    pushl %esi
3761; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3762; X86-BMI1-NEXT:    movl (%eax), %edx
3763; X86-BMI1-NEXT:    movl 4(%eax), %eax
3764; X86-BMI1-NEXT:    movb $64, %cl
3765; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3766; X86-BMI1-NEXT:    movl %edx, %esi
3767; X86-BMI1-NEXT:    shll %cl, %esi
3768; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
3769; X86-BMI1-NEXT:    testb $32, %cl
3770; X86-BMI1-NEXT:    movl %esi, %edi
3771; X86-BMI1-NEXT:    jne .LBB49_2
3772; X86-BMI1-NEXT:  # %bb.1:
3773; X86-BMI1-NEXT:    movl %eax, %edi
3774; X86-BMI1-NEXT:  .LBB49_2:
3775; X86-BMI1-NEXT:    movl %edi, %eax
3776; X86-BMI1-NEXT:    shrl %cl, %eax
3777; X86-BMI1-NEXT:    xorl %ebx, %ebx
3778; X86-BMI1-NEXT:    testb $32, %cl
3779; X86-BMI1-NEXT:    movl $0, %edx
3780; X86-BMI1-NEXT:    jne .LBB49_4
3781; X86-BMI1-NEXT:  # %bb.3:
3782; X86-BMI1-NEXT:    movl %esi, %ebx
3783; X86-BMI1-NEXT:    movl %eax, %edx
3784; X86-BMI1-NEXT:  .LBB49_4:
3785; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
3786; X86-BMI1-NEXT:    testb $32, %cl
3787; X86-BMI1-NEXT:    jne .LBB49_6
3788; X86-BMI1-NEXT:  # %bb.5:
3789; X86-BMI1-NEXT:    movl %ebx, %eax
3790; X86-BMI1-NEXT:  .LBB49_6:
3791; X86-BMI1-NEXT:    popl %esi
3792; X86-BMI1-NEXT:    popl %edi
3793; X86-BMI1-NEXT:    popl %ebx
3794; X86-BMI1-NEXT:    retl
3795;
3796; X86-BMI2-LABEL: bzhi64_d2_load:
3797; X86-BMI2:       # %bb.0:
3798; X86-BMI2-NEXT:    pushl %edi
3799; X86-BMI2-NEXT:    pushl %esi
3800; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3801; X86-BMI2-NEXT:    movl (%eax), %edx
3802; X86-BMI2-NEXT:    movl 4(%eax), %esi
3803; X86-BMI2-NEXT:    movb $64, %cl
3804; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
3805; X86-BMI2-NEXT:    shldl %cl, %edx, %esi
3806; X86-BMI2-NEXT:    shlxl %ecx, %edx, %edi
3807; X86-BMI2-NEXT:    xorl %edx, %edx
3808; X86-BMI2-NEXT:    testb $32, %cl
3809; X86-BMI2-NEXT:    je .LBB49_2
3810; X86-BMI2-NEXT:  # %bb.1:
3811; X86-BMI2-NEXT:    movl %edi, %esi
3812; X86-BMI2-NEXT:    movl $0, %edi
3813; X86-BMI2-NEXT:  .LBB49_2:
3814; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
3815; X86-BMI2-NEXT:    jne .LBB49_4
3816; X86-BMI2-NEXT:  # %bb.3:
3817; X86-BMI2-NEXT:    movl %eax, %edx
3818; X86-BMI2-NEXT:  .LBB49_4:
3819; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
3820; X86-BMI2-NEXT:    testb $32, %cl
3821; X86-BMI2-NEXT:    jne .LBB49_6
3822; X86-BMI2-NEXT:  # %bb.5:
3823; X86-BMI2-NEXT:    movl %edi, %eax
3824; X86-BMI2-NEXT:  .LBB49_6:
3825; X86-BMI2-NEXT:    popl %esi
3826; X86-BMI2-NEXT:    popl %edi
3827; X86-BMI2-NEXT:    retl
3828;
3829; X64-NOBMI-LABEL: bzhi64_d2_load:
3830; X64-NOBMI:       # %bb.0:
3831; X64-NOBMI-NEXT:    movq %rsi, %rcx
3832; X64-NOBMI-NEXT:    movq (%rdi), %rax
3833; X64-NOBMI-NEXT:    negb %cl
3834; X64-NOBMI-NEXT:    shlq %cl, %rax
3835; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
3836; X64-NOBMI-NEXT:    shrq %cl, %rax
3837; X64-NOBMI-NEXT:    retq
3838;
3839; X64-BMI1-LABEL: bzhi64_d2_load:
3840; X64-BMI1:       # %bb.0:
3841; X64-BMI1-NEXT:    shll $8, %esi
3842; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
3843; X64-BMI1-NEXT:    retq
3844;
3845; X64-BMI2-LABEL: bzhi64_d2_load:
3846; X64-BMI2:       # %bb.0:
3847; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
3848; X64-BMI2-NEXT:    retq
3849  %val = load i64, ptr %w
3850  %numhighbits = sub i64 64, %numlowbits
3851  %highbitscleared = shl i64 %val, %numhighbits
3852  %masked = lshr i64 %highbitscleared, %numhighbits
3853  ret i64 %masked
3854}
3855
3856define i64 @bzhi64_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
3857; X86-NOBMI-LABEL: bzhi64_d3_load_indexzext:
3858; X86-NOBMI:       # %bb.0:
3859; X86-NOBMI-NEXT:    pushl %ebx
3860; X86-NOBMI-NEXT:    pushl %edi
3861; X86-NOBMI-NEXT:    pushl %esi
3862; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
3863; X86-NOBMI-NEXT:    movl (%eax), %edx
3864; X86-NOBMI-NEXT:    movl 4(%eax), %eax
3865; X86-NOBMI-NEXT:    movb $64, %cl
3866; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
3867; X86-NOBMI-NEXT:    movl %edx, %esi
3868; X86-NOBMI-NEXT:    shll %cl, %esi
3869; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
3870; X86-NOBMI-NEXT:    testb $32, %cl
3871; X86-NOBMI-NEXT:    movl %esi, %edi
3872; X86-NOBMI-NEXT:    jne .LBB50_2
3873; X86-NOBMI-NEXT:  # %bb.1:
3874; X86-NOBMI-NEXT:    movl %eax, %edi
3875; X86-NOBMI-NEXT:  .LBB50_2:
3876; X86-NOBMI-NEXT:    movl %edi, %eax
3877; X86-NOBMI-NEXT:    shrl %cl, %eax
3878; X86-NOBMI-NEXT:    xorl %ebx, %ebx
3879; X86-NOBMI-NEXT:    testb $32, %cl
3880; X86-NOBMI-NEXT:    movl $0, %edx
3881; X86-NOBMI-NEXT:    jne .LBB50_4
3882; X86-NOBMI-NEXT:  # %bb.3:
3883; X86-NOBMI-NEXT:    movl %esi, %ebx
3884; X86-NOBMI-NEXT:    movl %eax, %edx
3885; X86-NOBMI-NEXT:  .LBB50_4:
3886; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
3887; X86-NOBMI-NEXT:    testb $32, %cl
3888; X86-NOBMI-NEXT:    jne .LBB50_6
3889; X86-NOBMI-NEXT:  # %bb.5:
3890; X86-NOBMI-NEXT:    movl %ebx, %eax
3891; X86-NOBMI-NEXT:  .LBB50_6:
3892; X86-NOBMI-NEXT:    popl %esi
3893; X86-NOBMI-NEXT:    popl %edi
3894; X86-NOBMI-NEXT:    popl %ebx
3895; X86-NOBMI-NEXT:    retl
3896;
3897; X86-BMI1-LABEL: bzhi64_d3_load_indexzext:
3898; X86-BMI1:       # %bb.0:
3899; X86-BMI1-NEXT:    pushl %ebx
3900; X86-BMI1-NEXT:    pushl %edi
3901; X86-BMI1-NEXT:    pushl %esi
3902; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
3903; X86-BMI1-NEXT:    movl (%eax), %edx
3904; X86-BMI1-NEXT:    movl 4(%eax), %eax
3905; X86-BMI1-NEXT:    movb $64, %cl
3906; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
3907; X86-BMI1-NEXT:    movl %edx, %esi
3908; X86-BMI1-NEXT:    shll %cl, %esi
3909; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
3910; X86-BMI1-NEXT:    testb $32, %cl
3911; X86-BMI1-NEXT:    movl %esi, %edi
3912; X86-BMI1-NEXT:    jne .LBB50_2
3913; X86-BMI1-NEXT:  # %bb.1:
3914; X86-BMI1-NEXT:    movl %eax, %edi
3915; X86-BMI1-NEXT:  .LBB50_2:
3916; X86-BMI1-NEXT:    movl %edi, %eax
3917; X86-BMI1-NEXT:    shrl %cl, %eax
3918; X86-BMI1-NEXT:    xorl %ebx, %ebx
3919; X86-BMI1-NEXT:    testb $32, %cl
3920; X86-BMI1-NEXT:    movl $0, %edx
3921; X86-BMI1-NEXT:    jne .LBB50_4
3922; X86-BMI1-NEXT:  # %bb.3:
3923; X86-BMI1-NEXT:    movl %esi, %ebx
3924; X86-BMI1-NEXT:    movl %eax, %edx
3925; X86-BMI1-NEXT:  .LBB50_4:
3926; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
3927; X86-BMI1-NEXT:    testb $32, %cl
3928; X86-BMI1-NEXT:    jne .LBB50_6
3929; X86-BMI1-NEXT:  # %bb.5:
3930; X86-BMI1-NEXT:    movl %ebx, %eax
3931; X86-BMI1-NEXT:  .LBB50_6:
3932; X86-BMI1-NEXT:    popl %esi
3933; X86-BMI1-NEXT:    popl %edi
3934; X86-BMI1-NEXT:    popl %ebx
3935; X86-BMI1-NEXT:    retl
3936;
3937; X86-BMI2-LABEL: bzhi64_d3_load_indexzext:
3938; X86-BMI2:       # %bb.0:
3939; X86-BMI2-NEXT:    pushl %edi
3940; X86-BMI2-NEXT:    pushl %esi
3941; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
3942; X86-BMI2-NEXT:    movl (%eax), %edx
3943; X86-BMI2-NEXT:    movl 4(%eax), %esi
3944; X86-BMI2-NEXT:    movb $64, %cl
3945; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
3946; X86-BMI2-NEXT:    shldl %cl, %edx, %esi
3947; X86-BMI2-NEXT:    shlxl %ecx, %edx, %edi
3948; X86-BMI2-NEXT:    xorl %edx, %edx
3949; X86-BMI2-NEXT:    testb $32, %cl
3950; X86-BMI2-NEXT:    je .LBB50_2
3951; X86-BMI2-NEXT:  # %bb.1:
3952; X86-BMI2-NEXT:    movl %edi, %esi
3953; X86-BMI2-NEXT:    movl $0, %edi
3954; X86-BMI2-NEXT:  .LBB50_2:
3955; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
3956; X86-BMI2-NEXT:    jne .LBB50_4
3957; X86-BMI2-NEXT:  # %bb.3:
3958; X86-BMI2-NEXT:    movl %eax, %edx
3959; X86-BMI2-NEXT:  .LBB50_4:
3960; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
3961; X86-BMI2-NEXT:    testb $32, %cl
3962; X86-BMI2-NEXT:    jne .LBB50_6
3963; X86-BMI2-NEXT:  # %bb.5:
3964; X86-BMI2-NEXT:    movl %edi, %eax
3965; X86-BMI2-NEXT:  .LBB50_6:
3966; X86-BMI2-NEXT:    popl %esi
3967; X86-BMI2-NEXT:    popl %edi
3968; X86-BMI2-NEXT:    retl
3969;
3970; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext:
3971; X64-NOBMI:       # %bb.0:
3972; X64-NOBMI-NEXT:    movl %esi, %ecx
3973; X64-NOBMI-NEXT:    movq (%rdi), %rax
3974; X64-NOBMI-NEXT:    negb %cl
3975; X64-NOBMI-NEXT:    shlq %cl, %rax
3976; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
3977; X64-NOBMI-NEXT:    shrq %cl, %rax
3978; X64-NOBMI-NEXT:    retq
3979;
3980; X64-BMI1-LABEL: bzhi64_d3_load_indexzext:
3981; X64-BMI1:       # %bb.0:
3982; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
3983; X64-BMI1-NEXT:    shll $8, %esi
3984; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
3985; X64-BMI1-NEXT:    retq
3986;
3987; X64-BMI2-LABEL: bzhi64_d3_load_indexzext:
3988; X64-BMI2:       # %bb.0:
3989; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
3990; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
3991; X64-BMI2-NEXT:    retq
3992  %val = load i64, ptr %w
3993  %numhighbits = sub i8 64, %numlowbits
3994  %sh_prom = zext i8 %numhighbits to i64
3995  %highbitscleared = shl i64 %val, %sh_prom
3996  %masked = lshr i64 %highbitscleared, %sh_prom
3997  ret i64 %masked
3998}
3999
4000; 64-bit, but with 32-bit output
4001
4002; Everything done in 64-bit, truncation happens last.
4003define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
4004; X86-NOBMI-LABEL: bzhi64_32_d0:
4005; X86-NOBMI:       # %bb.0:
4006; X86-NOBMI-NEXT:    pushl %esi
4007; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
4008; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
4009; X86-NOBMI-NEXT:    movb $64, %cl
4010; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
4011; X86-NOBMI-NEXT:    movl %esi, %edx
4012; X86-NOBMI-NEXT:    shll %cl, %edx
4013; X86-NOBMI-NEXT:    shldl %cl, %esi, %eax
4014; X86-NOBMI-NEXT:    testb $32, %cl
4015; X86-NOBMI-NEXT:    je .LBB51_2
4016; X86-NOBMI-NEXT:  # %bb.1:
4017; X86-NOBMI-NEXT:    movl %edx, %eax
4018; X86-NOBMI-NEXT:    xorl %edx, %edx
4019; X86-NOBMI-NEXT:  .LBB51_2:
4020; X86-NOBMI-NEXT:    shrdl %cl, %eax, %edx
4021; X86-NOBMI-NEXT:    shrl %cl, %eax
4022; X86-NOBMI-NEXT:    testb $32, %cl
4023; X86-NOBMI-NEXT:    jne .LBB51_4
4024; X86-NOBMI-NEXT:  # %bb.3:
4025; X86-NOBMI-NEXT:    movl %edx, %eax
4026; X86-NOBMI-NEXT:  .LBB51_4:
4027; X86-NOBMI-NEXT:    popl %esi
4028; X86-NOBMI-NEXT:    retl
4029;
4030; X86-BMI1-LABEL: bzhi64_32_d0:
4031; X86-BMI1:       # %bb.0:
4032; X86-BMI1-NEXT:    pushl %esi
4033; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
4034; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
4035; X86-BMI1-NEXT:    movb $64, %cl
4036; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
4037; X86-BMI1-NEXT:    movl %esi, %edx
4038; X86-BMI1-NEXT:    shll %cl, %edx
4039; X86-BMI1-NEXT:    shldl %cl, %esi, %eax
4040; X86-BMI1-NEXT:    testb $32, %cl
4041; X86-BMI1-NEXT:    je .LBB51_2
4042; X86-BMI1-NEXT:  # %bb.1:
4043; X86-BMI1-NEXT:    movl %edx, %eax
4044; X86-BMI1-NEXT:    xorl %edx, %edx
4045; X86-BMI1-NEXT:  .LBB51_2:
4046; X86-BMI1-NEXT:    shrdl %cl, %eax, %edx
4047; X86-BMI1-NEXT:    shrl %cl, %eax
4048; X86-BMI1-NEXT:    testb $32, %cl
4049; X86-BMI1-NEXT:    jne .LBB51_4
4050; X86-BMI1-NEXT:  # %bb.3:
4051; X86-BMI1-NEXT:    movl %edx, %eax
4052; X86-BMI1-NEXT:  .LBB51_4:
4053; X86-BMI1-NEXT:    popl %esi
4054; X86-BMI1-NEXT:    retl
4055;
4056; X86-BMI2-LABEL: bzhi64_32_d0:
4057; X86-BMI2:       # %bb.0:
4058; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
4059; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
4060; X86-BMI2-NEXT:    movb $64, %cl
4061; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
4062; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
4063; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
4064; X86-BMI2-NEXT:    testb $32, %cl
4065; X86-BMI2-NEXT:    je .LBB51_2
4066; X86-BMI2-NEXT:  # %bb.1:
4067; X86-BMI2-NEXT:    movl %eax, %edx
4068; X86-BMI2-NEXT:    xorl %eax, %eax
4069; X86-BMI2-NEXT:  .LBB51_2:
4070; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
4071; X86-BMI2-NEXT:    testb $32, %cl
4072; X86-BMI2-NEXT:    je .LBB51_4
4073; X86-BMI2-NEXT:  # %bb.3:
4074; X86-BMI2-NEXT:    shrxl %ecx, %edx, %eax
4075; X86-BMI2-NEXT:  .LBB51_4:
4076; X86-BMI2-NEXT:    retl
4077;
4078; X64-NOBMI-LABEL: bzhi64_32_d0:
4079; X64-NOBMI:       # %bb.0:
4080; X64-NOBMI-NEXT:    movq %rsi, %rcx
4081; X64-NOBMI-NEXT:    movq %rdi, %rax
4082; X64-NOBMI-NEXT:    negb %cl
4083; X64-NOBMI-NEXT:    shlq %cl, %rax
4084; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
4085; X64-NOBMI-NEXT:    shrq %cl, %rax
4086; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
4087; X64-NOBMI-NEXT:    retq
4088;
4089; X64-BMI1-LABEL: bzhi64_32_d0:
4090; X64-BMI1:       # %bb.0:
4091; X64-BMI1-NEXT:    shll $8, %esi
4092; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
4093; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
4094; X64-BMI1-NEXT:    retq
4095;
4096; X64-BMI2-LABEL: bzhi64_32_d0:
4097; X64-BMI2:       # %bb.0:
4098; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
4099; X64-BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
4100; X64-BMI2-NEXT:    retq
4101  %numhighbits = sub i64 64, %numlowbits
4102  %highbitscleared = shl i64 %val, %numhighbits
4103  %masked = lshr i64 %highbitscleared, %numhighbits
4104  %res = trunc i64 %masked to i32
4105  ret i32 %res
4106}
4107
4108; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
4109define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind {
4110; X86-NOBMI-LABEL: bzhi64_32_d1:
4111; X86-NOBMI:       # %bb.0:
4112; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
4113; X86-NOBMI-NEXT:    xorl %ecx, %ecx
4114; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
4115; X86-NOBMI-NEXT:    shll %cl, %eax
4116; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
4117; X86-NOBMI-NEXT:    shrl %cl, %eax
4118; X86-NOBMI-NEXT:    retl
4119;
4120; X86-BMI1-LABEL: bzhi64_32_d1:
4121; X86-BMI1:       # %bb.0:
4122; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
4123; X86-BMI1-NEXT:    shll $8, %eax
4124; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
4125; X86-BMI1-NEXT:    retl
4126;
4127; X86-BMI2-LABEL: bzhi64_32_d1:
4128; X86-BMI2:       # %bb.0:
4129; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
4130; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
4131; X86-BMI2-NEXT:    retl
4132;
4133; X64-NOBMI-LABEL: bzhi64_32_d1:
4134; X64-NOBMI:       # %bb.0:
4135; X64-NOBMI-NEXT:    movl %esi, %ecx
4136; X64-NOBMI-NEXT:    movq %rdi, %rax
4137; X64-NOBMI-NEXT:    negb %cl
4138; X64-NOBMI-NEXT:    shll %cl, %eax
4139; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
4140; X64-NOBMI-NEXT:    shrl %cl, %eax
4141; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
4142; X64-NOBMI-NEXT:    retq
4143;
4144; X64-BMI1-LABEL: bzhi64_32_d1:
4145; X64-BMI1:       # %bb.0:
4146; X64-BMI1-NEXT:    shll $8, %esi
4147; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
4148; X64-BMI1-NEXT:    retq
4149;
4150; X64-BMI2-LABEL: bzhi64_32_d1:
4151; X64-BMI2:       # %bb.0:
4152; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
4153; X64-BMI2-NEXT:    retq
4154  %truncval = trunc i64 %val to i32
4155  %numhighbits = sub i32 32, %numlowbits
4156  %highbitscleared = shl i32 %truncval, %numhighbits
4157  %masked = lshr i32 %highbitscleared, %numhighbits
4158  ret i32 %masked
4159}
4160
4161; ---------------------------------------------------------------------------- ;
4162; Constant mask
4163; ---------------------------------------------------------------------------- ;
4164
4165; 32-bit
4166
4167define i32 @bzhi32_constant_mask32(i32 %val) nounwind {
4168; X86-LABEL: bzhi32_constant_mask32:
4169; X86:       # %bb.0:
4170; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4171; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
4172; X86-NEXT:    retl
4173;
4174; X64-LABEL: bzhi32_constant_mask32:
4175; X64:       # %bb.0:
4176; X64-NEXT:    movl %edi, %eax
4177; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
4178; X64-NEXT:    retq
4179  %masked = and i32 %val, 2147483647
4180  ret i32 %masked
4181}
4182
4183define i32 @bzhi32_constant_mask32_load(ptr %val) nounwind {
4184; X86-LABEL: bzhi32_constant_mask32_load:
4185; X86:       # %bb.0:
4186; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4187; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4188; X86-NEXT:    andl (%ecx), %eax
4189; X86-NEXT:    retl
4190;
4191; X64-LABEL: bzhi32_constant_mask32_load:
4192; X64:       # %bb.0:
4193; X64-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4194; X64-NEXT:    andl (%rdi), %eax
4195; X64-NEXT:    retq
4196  %val1 = load i32, ptr %val
4197  %masked = and i32 %val1, 2147483647
4198  ret i32 %masked
4199}
4200
4201define i32 @bzhi32_constant_mask16(i32 %val) nounwind {
4202; X86-LABEL: bzhi32_constant_mask16:
4203; X86:       # %bb.0:
4204; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
4205; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
4206; X86-NEXT:    retl
4207;
4208; X64-LABEL: bzhi32_constant_mask16:
4209; X64:       # %bb.0:
4210; X64-NEXT:    movl %edi, %eax
4211; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
4212; X64-NEXT:    retq
4213  %masked = and i32 %val, 32767
4214  ret i32 %masked
4215}
4216
4217define i32 @bzhi32_constant_mask16_load(ptr %val) nounwind {
4218; X86-LABEL: bzhi32_constant_mask16_load:
4219; X86:       # %bb.0:
4220; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4221; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
4222; X86-NEXT:    andl (%ecx), %eax
4223; X86-NEXT:    retl
4224;
4225; X64-LABEL: bzhi32_constant_mask16_load:
4226; X64:       # %bb.0:
4227; X64-NEXT:    movl $32767, %eax # imm = 0x7FFF
4228; X64-NEXT:    andl (%rdi), %eax
4229; X64-NEXT:    retq
4230  %val1 = load i32, ptr %val
4231  %masked = and i32 %val1, 32767
4232  ret i32 %masked
4233}
4234
4235define i32 @bzhi32_constant_mask8(i32 %val) nounwind {
4236; X86-LABEL: bzhi32_constant_mask8:
4237; X86:       # %bb.0:
4238; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4239; X86-NEXT:    andl $127, %eax
4240; X86-NEXT:    retl
4241;
4242; X64-LABEL: bzhi32_constant_mask8:
4243; X64:       # %bb.0:
4244; X64-NEXT:    movl %edi, %eax
4245; X64-NEXT:    andl $127, %eax
4246; X64-NEXT:    retq
4247  %masked = and i32 %val, 127
4248  ret i32 %masked
4249}
4250
4251define i32 @bzhi32_constant_mask8_load(ptr %val) nounwind {
4252; X86-LABEL: bzhi32_constant_mask8_load:
4253; X86:       # %bb.0:
4254; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4255; X86-NEXT:    movl (%eax), %eax
4256; X86-NEXT:    andl $127, %eax
4257; X86-NEXT:    retl
4258;
4259; X64-LABEL: bzhi32_constant_mask8_load:
4260; X64:       # %bb.0:
4261; X64-NEXT:    movl (%rdi), %eax
4262; X64-NEXT:    andl $127, %eax
4263; X64-NEXT:    retq
4264  %val1 = load i32, ptr %val
4265  %masked = and i32 %val1, 127
4266  ret i32 %masked
4267}
4268
4269; 64-bit
4270
4271define i64 @bzhi64_constant_mask64(i64 %val) nounwind {
4272; X86-LABEL: bzhi64_constant_mask64:
4273; X86:       # %bb.0:
4274; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4275; X86-NEXT:    movl $1073741823, %edx # imm = 0x3FFFFFFF
4276; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
4277; X86-NEXT:    retl
4278;
4279; X64-NOBMI-LABEL: bzhi64_constant_mask64:
4280; X64-NOBMI:       # %bb.0:
4281; X64-NOBMI-NEXT:    movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF
4282; X64-NOBMI-NEXT:    andq %rdi, %rax
4283; X64-NOBMI-NEXT:    retq
4284;
4285; X64-BMI1NOTBM-LABEL: bzhi64_constant_mask64:
4286; X64-BMI1NOTBM:       # %bb.0:
4287; X64-BMI1NOTBM-NEXT:    movl $15872, %eax # imm = 0x3E00
4288; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
4289; X64-BMI1NOTBM-NEXT:    retq
4290;
4291; X64-BMI1TBM-LABEL: bzhi64_constant_mask64:
4292; X64-BMI1TBM:       # %bb.0:
4293; X64-BMI1TBM-NEXT:    bextrq $15872, %rdi, %rax # imm = 0x3E00
4294; X64-BMI1TBM-NEXT:    retq
4295;
4296; X64-BMI2TBM-LABEL: bzhi64_constant_mask64:
4297; X64-BMI2TBM:       # %bb.0:
4298; X64-BMI2TBM-NEXT:    bextrq $15872, %rdi, %rax # imm = 0x3E00
4299; X64-BMI2TBM-NEXT:    retq
4300;
4301; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64:
4302; X64-BMI2NOTBM:       # %bb.0:
4303; X64-BMI2NOTBM-NEXT:    movb $62, %al
4304; X64-BMI2NOTBM-NEXT:    bzhiq %rax, %rdi, %rax
4305; X64-BMI2NOTBM-NEXT:    retq
4306  %masked = and i64 %val, 4611686018427387903
4307  ret i64 %masked
4308}
4309
4310define i64 @bzhi64_constant_mask64_load(ptr %val) nounwind {
4311; X86-LABEL: bzhi64_constant_mask64_load:
4312; X86:       # %bb.0:
4313; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4314; X86-NEXT:    movl (%ecx), %eax
4315; X86-NEXT:    movl $1073741823, %edx # imm = 0x3FFFFFFF
4316; X86-NEXT:    andl 4(%ecx), %edx
4317; X86-NEXT:    retl
4318;
4319; X64-NOBMI-LABEL: bzhi64_constant_mask64_load:
4320; X64-NOBMI:       # %bb.0:
4321; X64-NOBMI-NEXT:    movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF
4322; X64-NOBMI-NEXT:    andq (%rdi), %rax
4323; X64-NOBMI-NEXT:    retq
4324;
4325; X64-BMI1NOTBM-LABEL: bzhi64_constant_mask64_load:
4326; X64-BMI1NOTBM:       # %bb.0:
4327; X64-BMI1NOTBM-NEXT:    movl $15872, %eax # imm = 0x3E00
4328; X64-BMI1NOTBM-NEXT:    bextrq %rax, (%rdi), %rax
4329; X64-BMI1NOTBM-NEXT:    retq
4330;
4331; X64-BMI1TBM-LABEL: bzhi64_constant_mask64_load:
4332; X64-BMI1TBM:       # %bb.0:
4333; X64-BMI1TBM-NEXT:    bextrq $15872, (%rdi), %rax # imm = 0x3E00
4334; X64-BMI1TBM-NEXT:    retq
4335;
4336; X64-BMI2TBM-LABEL: bzhi64_constant_mask64_load:
4337; X64-BMI2TBM:       # %bb.0:
4338; X64-BMI2TBM-NEXT:    bextrq $15872, (%rdi), %rax # imm = 0x3E00
4339; X64-BMI2TBM-NEXT:    retq
4340;
4341; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64_load:
4342; X64-BMI2NOTBM:       # %bb.0:
4343; X64-BMI2NOTBM-NEXT:    movb $62, %al
4344; X64-BMI2NOTBM-NEXT:    bzhiq %rax, (%rdi), %rax
4345; X64-BMI2NOTBM-NEXT:    retq
4346  %val1 = load i64, ptr %val
4347  %masked = and i64 %val1, 4611686018427387903
4348  ret i64 %masked
4349}
4350
4351define i64 @bzhi64_constant_mask32(i64 %val) nounwind {
4352; X86-LABEL: bzhi64_constant_mask32:
4353; X86:       # %bb.0:
4354; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4355; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
4356; X86-NEXT:    xorl %edx, %edx
4357; X86-NEXT:    retl
4358;
4359; X64-LABEL: bzhi64_constant_mask32:
4360; X64:       # %bb.0:
4361; X64-NEXT:    movq %rdi, %rax
4362; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
4363; X64-NEXT:    retq
4364  %masked = and i64 %val, 2147483647
4365  ret i64 %masked
4366}
4367
4368define i64 @bzhi64_constant_mask32_load(ptr %val) nounwind {
4369; X86-LABEL: bzhi64_constant_mask32_load:
4370; X86:       # %bb.0:
4371; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4372; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
4373; X86-NEXT:    andl (%ecx), %eax
4374; X86-NEXT:    xorl %edx, %edx
4375; X86-NEXT:    retl
4376;
4377; X64-LABEL: bzhi64_constant_mask32_load:
4378; X64:       # %bb.0:
4379; X64-NEXT:    movq (%rdi), %rax
4380; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
4381; X64-NEXT:    retq
4382  %val1 = load i64, ptr %val
4383  %masked = and i64 %val1, 2147483647
4384  ret i64 %masked
4385}
4386
4387define i64 @bzhi64_constant_mask16(i64 %val) nounwind {
4388; X86-LABEL: bzhi64_constant_mask16:
4389; X86:       # %bb.0:
4390; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
4391; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
4392; X86-NEXT:    xorl %edx, %edx
4393; X86-NEXT:    retl
4394;
4395; X64-LABEL: bzhi64_constant_mask16:
4396; X64:       # %bb.0:
4397; X64-NEXT:    movq %rdi, %rax
4398; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
4399; X64-NEXT:    retq
4400  %masked = and i64 %val, 32767
4401  ret i64 %masked
4402}
4403
4404define i64 @bzhi64_constant_mask16_load(ptr %val) nounwind {
4405; X86-LABEL: bzhi64_constant_mask16_load:
4406; X86:       # %bb.0:
4407; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
4408; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
4409; X86-NEXT:    andl (%ecx), %eax
4410; X86-NEXT:    xorl %edx, %edx
4411; X86-NEXT:    retl
4412;
4413; X64-LABEL: bzhi64_constant_mask16_load:
4414; X64:       # %bb.0:
4415; X64-NEXT:    movq (%rdi), %rax
4416; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
4417; X64-NEXT:    retq
4418  %val1 = load i64, ptr %val
4419  %masked = and i64 %val1, 32767
4420  ret i64 %masked
4421}
4422
4423define i64 @bzhi64_constant_mask8(i64 %val) nounwind {
4424; X86-LABEL: bzhi64_constant_mask8:
4425; X86:       # %bb.0:
4426; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4427; X86-NEXT:    andl $127, %eax
4428; X86-NEXT:    xorl %edx, %edx
4429; X86-NEXT:    retl
4430;
4431; X64-LABEL: bzhi64_constant_mask8:
4432; X64:       # %bb.0:
4433; X64-NEXT:    movq %rdi, %rax
4434; X64-NEXT:    andl $127, %eax
4435; X64-NEXT:    retq
4436  %masked = and i64 %val, 127
4437  ret i64 %masked
4438}
4439
4440define i64 @bzhi64_constant_mask8_load(ptr %val) nounwind {
4441; X86-LABEL: bzhi64_constant_mask8_load:
4442; X86:       # %bb.0:
4443; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
4444; X86-NEXT:    movl (%eax), %eax
4445; X86-NEXT:    andl $127, %eax
4446; X86-NEXT:    xorl %edx, %edx
4447; X86-NEXT:    retl
4448;
4449; X64-LABEL: bzhi64_constant_mask8_load:
4450; X64:       # %bb.0:
4451; X64-NEXT:    movq (%rdi), %rax
4452; X64-NEXT:    andl $127, %eax
4453; X64-NEXT:    retq
4454  %val1 = load i64, ptr %val
4455  %masked = and i64 %val1, 127
4456  ret i64 %masked
4457}
4458