1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi | FileCheck %s --check-prefixes=X86,X86-SLOW-BEXTR
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+bmi2 | FileCheck %s --check-prefixes=X86,X86-SLOW-BEXTR
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+fast-bextr | FileCheck %s --check-prefixes=X86,X86-FAST-BEXTR
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=X64,X64-FAST-BEXTR
8
9define i32 @andn32(i32 %x, i32 %y)   {
10; X86-LABEL: andn32:
11; X86:       # %bb.0:
12; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
13; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
14; X86-NEXT:    retl
15;
16; X64-LABEL: andn32:
17; X64:       # %bb.0:
18; X64-NEXT:    andnl %esi, %edi, %eax
19; X64-NEXT:    retq
20  %tmp1 = xor i32 %x, -1
21  %tmp2 = and i32 %y, %tmp1
22  ret i32 %tmp2
23}
24
25define i32 @andn32_load(i32 %x, ptr %y)   {
26; X86-LABEL: andn32_load:
27; X86:       # %bb.0:
28; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
29; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
30; X86-NEXT:    andnl (%eax), %ecx, %eax
31; X86-NEXT:    retl
32;
33; X64-LABEL: andn32_load:
34; X64:       # %bb.0:
35; X64-NEXT:    andnl (%rsi), %edi, %eax
36; X64-NEXT:    retq
37  %y1 = load i32, ptr %y
38  %tmp1 = xor i32 %x, -1
39  %tmp2 = and i32 %y1, %tmp1
40  ret i32 %tmp2
41}
42
43define i64 @andn64(i64 %x, i64 %y)   {
44; X86-LABEL: andn64:
45; X86:       # %bb.0:
46; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
47; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
48; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
49; X86-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
50; X86-NEXT:    retl
51;
52; X64-LABEL: andn64:
53; X64:       # %bb.0:
54; X64-NEXT:    andnq %rsi, %rdi, %rax
55; X64-NEXT:    retq
56  %tmp1 = xor i64 %x, -1
57  %tmp2 = and i64 %tmp1, %y
58  ret i64 %tmp2
59}
60
61; Don't choose a 'test' if an 'andn' can be used.
62define i1 @andn_cmp(i32 %x, i32 %y) {
63; X86-LABEL: andn_cmp:
64; X86:       # %bb.0:
65; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
66; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
67; X86-NEXT:    sete %al
68; X86-NEXT:    retl
69;
70; X64-LABEL: andn_cmp:
71; X64:       # %bb.0:
72; X64-NEXT:    andnl %esi, %edi, %eax
73; X64-NEXT:    sete %al
74; X64-NEXT:    retq
75  %notx = xor i32 %x, -1
76  %and = and i32 %notx, %y
77  %cmp = icmp eq i32 %and, 0
78  ret i1 %cmp
79}
80
81; Recognize a disguised andn in the following 4 tests.
82define i1 @and_cmp1(i32 %x, i32 %y) {
83; X86-LABEL: and_cmp1:
84; X86:       # %bb.0:
85; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
86; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
87; X86-NEXT:    sete %al
88; X86-NEXT:    retl
89;
90; X64-LABEL: and_cmp1:
91; X64:       # %bb.0:
92; X64-NEXT:    andnl %esi, %edi, %eax
93; X64-NEXT:    sete %al
94; X64-NEXT:    retq
95  %and = and i32 %x, %y
96  %cmp = icmp eq i32 %and, %y
97  ret i1 %cmp
98}
99
100define i1 @and_cmp2(i32 %x, i32 %y) {
101; X86-LABEL: and_cmp2:
102; X86:       # %bb.0:
103; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
104; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
105; X86-NEXT:    setne %al
106; X86-NEXT:    retl
107;
108; X64-LABEL: and_cmp2:
109; X64:       # %bb.0:
110; X64-NEXT:    andnl %esi, %edi, %eax
111; X64-NEXT:    setne %al
112; X64-NEXT:    retq
113  %and = and i32 %y, %x
114  %cmp = icmp ne i32 %and, %y
115  ret i1 %cmp
116}
117
118define i1 @and_cmp3(i32 %x, i32 %y) {
119; X86-LABEL: and_cmp3:
120; X86:       # %bb.0:
121; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
122; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
123; X86-NEXT:    sete %al
124; X86-NEXT:    retl
125;
126; X64-LABEL: and_cmp3:
127; X64:       # %bb.0:
128; X64-NEXT:    andnl %esi, %edi, %eax
129; X64-NEXT:    sete %al
130; X64-NEXT:    retq
131  %and = and i32 %x, %y
132  %cmp = icmp eq i32 %y, %and
133  ret i1 %cmp
134}
135
136define i1 @and_cmp4(i32 %x, i32 %y) {
137; X86-LABEL: and_cmp4:
138; X86:       # %bb.0:
139; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
140; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
141; X86-NEXT:    setne %al
142; X86-NEXT:    retl
143;
144; X64-LABEL: and_cmp4:
145; X64:       # %bb.0:
146; X64-NEXT:    andnl %esi, %edi, %eax
147; X64-NEXT:    setne %al
148; X64-NEXT:    retq
149  %and = and i32 %y, %x
150  %cmp = icmp ne i32 %y, %and
151  ret i1 %cmp
152}
153
154; A mask and compare against constant is ok for an 'andn' too
155; even though the BMI instruction doesn't have an immediate form.
156define i1 @and_cmp_const(i32 %x) {
157; X86-LABEL: and_cmp_const:
158; X86:       # %bb.0:
159; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
160; X86-NEXT:    notl %eax
161; X86-NEXT:    testb $43, %al
162; X86-NEXT:    sete %al
163; X86-NEXT:    retl
164;
165; X64-LABEL: and_cmp_const:
166; X64:       # %bb.0:
167; X64-NEXT:    notl %edi
168; X64-NEXT:    testb $43, %dil
169; X64-NEXT:    sete %al
170; X64-NEXT:    retq
171  %and = and i32 %x, 43
172  %cmp = icmp eq i32 %and, 43
173  ret i1 %cmp
174}
175
176; But don't use 'andn' if the mask is a power-of-two.
177define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) {
178; X86-LABEL: and_cmp_const_power_of_two:
179; X86:       # %bb.0:
180; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
181; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
182; X86-NEXT:    btl %ecx, %eax
183; X86-NEXT:    setae %al
184; X86-NEXT:    retl
185;
186; X64-LABEL: and_cmp_const_power_of_two:
187; X64:       # %bb.0:
188; X64-NEXT:    btl %esi, %edi
189; X64-NEXT:    setae %al
190; X64-NEXT:    retq
191  %shl = shl i32 1, %y
192  %and = and i32 %x, %shl
193  %cmp = icmp ne i32 %and, %shl
194  ret i1 %cmp
195}
196
197; Don't transform to 'andn' if there's another use of the 'and'.
198define i32 @and_cmp_not_one_use(i32 %x) {
199; X86-LABEL: and_cmp_not_one_use:
200; X86:       # %bb.0:
201; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
202; X86-NEXT:    andl $37, %ecx
203; X86-NEXT:    xorl %eax, %eax
204; X86-NEXT:    cmpl $37, %ecx
205; X86-NEXT:    sete %al
206; X86-NEXT:    addl %ecx, %eax
207; X86-NEXT:    retl
208;
209; X64-LABEL: and_cmp_not_one_use:
210; X64:       # %bb.0:
211; X64-NEXT:    andl $37, %edi
212; X64-NEXT:    xorl %eax, %eax
213; X64-NEXT:    cmpl $37, %edi
214; X64-NEXT:    sete %al
215; X64-NEXT:    addl %edi, %eax
216; X64-NEXT:    retq
217  %and = and i32 %x, 37
218  %cmp = icmp eq i32 %and, 37
219  %ext = zext i1 %cmp to i32
220  %add = add i32 %and, %ext
221  ret i32 %add
222}
223
224; Verify that we're not transforming invalid comparison predicates.
225define i1 @not_an_andn1(i32 %x, i32 %y) {
226; X86-LABEL: not_an_andn1:
227; X86:       # %bb.0:
228; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
229; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
230; X86-NEXT:    andl %eax, %ecx
231; X86-NEXT:    cmpl %ecx, %eax
232; X86-NEXT:    setg %al
233; X86-NEXT:    retl
234;
235; X64-LABEL: not_an_andn1:
236; X64:       # %bb.0:
237; X64-NEXT:    andl %esi, %edi
238; X64-NEXT:    cmpl %edi, %esi
239; X64-NEXT:    setg %al
240; X64-NEXT:    retq
241  %and = and i32 %x, %y
242  %cmp = icmp sgt i32 %y, %and
243  ret i1 %cmp
244}
245
246define i1 @not_an_andn2(i32 %x, i32 %y) {
247; X86-LABEL: not_an_andn2:
248; X86:       # %bb.0:
249; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
250; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
251; X86-NEXT:    andl %eax, %ecx
252; X86-NEXT:    cmpl %ecx, %eax
253; X86-NEXT:    setbe %al
254; X86-NEXT:    retl
255;
256; X64-LABEL: not_an_andn2:
257; X64:       # %bb.0:
258; X64-NEXT:    andl %esi, %edi
259; X64-NEXT:    cmpl %edi, %esi
260; X64-NEXT:    setbe %al
261; X64-NEXT:    retq
262  %and = and i32 %y, %x
263  %cmp = icmp ule i32 %y, %and
264  ret i1 %cmp
265}
266
267; Don't choose a 'test' if an 'andn' can be used.
268define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) {
269; X86-LABEL: andn_cmp_swap_ops:
270; X86:       # %bb.0:
271; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
272; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
273; X86-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %ecx
274; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
275; X86-NEXT:    orl %ecx, %eax
276; X86-NEXT:    sete %al
277; X86-NEXT:    retl
278;
279; X64-LABEL: andn_cmp_swap_ops:
280; X64:       # %bb.0:
281; X64-NEXT:    andnq %rsi, %rdi, %rax
282; X64-NEXT:    sete %al
283; X64-NEXT:    retq
284  %notx = xor i64 %x, -1
285  %and = and i64 %y, %notx
286  %cmp = icmp eq i64 %and, 0
287  ret i1 %cmp
288}
289
290; Use a 'test' (not an 'and') because 'andn' only works for i32/i64.
291define i1 @andn_cmp_i8(i8 %x, i8 %y) {
292; X86-LABEL: andn_cmp_i8:
293; X86:       # %bb.0:
294; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
295; X86-NEXT:    notb %al
296; X86-NEXT:    testb %al, {{[0-9]+}}(%esp)
297; X86-NEXT:    sete %al
298; X86-NEXT:    retl
299;
300; X64-LABEL: andn_cmp_i8:
301; X64:       # %bb.0:
302; X64-NEXT:    notb %sil
303; X64-NEXT:    testb %sil, %dil
304; X64-NEXT:    sete %al
305; X64-NEXT:    retq
306  %noty = xor i8 %y, -1
307  %and = and i8 %x, %noty
308  %cmp = icmp eq i8 %and, 0
309  ret i1 %cmp
310}
311
312; PR48768 - 'andn' clears the overflow flag, so we don't need a separate 'test'.
313define i1 @andn_cmp_i32_overflow(i32 %x, i32 %y) {
314; X86-LABEL: andn_cmp_i32_overflow:
315; X86:       # %bb.0:
316; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
317; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
318; X86-NEXT:    setle %al
319; X86-NEXT:    retl
320;
321; X64-LABEL: andn_cmp_i32_overflow:
322; X64:       # %bb.0:
323; X64-NEXT:    andnl %edi, %esi, %eax
324; X64-NEXT:    setle %al
325; X64-NEXT:    retq
326  %noty = xor i32 %y, -1
327  %and = and i32 %x, %noty
328  %cmp = icmp slt i32 %and, 1
329  ret i1 %cmp
330}
331
332declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
333
334define i32 @bextr32(i32 %x, i32 %y)   {
335; X86-LABEL: bextr32:
336; X86:       # %bb.0:
337; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
338; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
339; X86-NEXT:    retl
340;
341; X64-LABEL: bextr32:
342; X64:       # %bb.0:
343; X64-NEXT:    bextrl %esi, %edi, %eax
344; X64-NEXT:    retq
345  %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
346  ret i32 %tmp
347}
348
349define i32 @bextr32_load(ptr %x, i32 %y)   {
350; X86-LABEL: bextr32_load:
351; X86:       # %bb.0:
352; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
353; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
354; X86-NEXT:    bextrl %eax, (%ecx), %eax
355; X86-NEXT:    retl
356;
357; X64-LABEL: bextr32_load:
358; X64:       # %bb.0:
359; X64-NEXT:    bextrl %esi, (%rdi), %eax
360; X64-NEXT:    retq
361  %x1 = load i32, ptr %x
362  %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
363  ret i32 %tmp
364}
365
366define i32 @bextr32b(i32 %x)  uwtable  ssp {
367; X86-SLOW-BEXTR-LABEL: bextr32b:
368; X86-SLOW-BEXTR:       # %bb.0:
369; X86-SLOW-BEXTR-NEXT:    movl {{[0-9]+}}(%esp), %eax
370; X86-SLOW-BEXTR-NEXT:    shrl $4, %eax
371; X86-SLOW-BEXTR-NEXT:    andl $4095, %eax # imm = 0xFFF
372; X86-SLOW-BEXTR-NEXT:    retl
373;
374; X64-SLOW-BEXTR-LABEL: bextr32b:
375; X64-SLOW-BEXTR:       # %bb.0:
376; X64-SLOW-BEXTR-NEXT:    movl %edi, %eax
377; X64-SLOW-BEXTR-NEXT:    shrl $4, %eax
378; X64-SLOW-BEXTR-NEXT:    andl $4095, %eax # imm = 0xFFF
379; X64-SLOW-BEXTR-NEXT:    retq
380;
381; X86-FAST-BEXTR-LABEL: bextr32b:
382; X86-FAST-BEXTR:       # %bb.0:
383; X86-FAST-BEXTR-NEXT:    movl $3076, %eax # imm = 0xC04
384; X86-FAST-BEXTR-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
385; X86-FAST-BEXTR-NEXT:    retl
386;
387; X64-FAST-BEXTR-LABEL: bextr32b:
388; X64-FAST-BEXTR:       # %bb.0:
389; X64-FAST-BEXTR-NEXT:    movl $3076, %eax # imm = 0xC04
390; X64-FAST-BEXTR-NEXT:    bextrl %eax, %edi, %eax
391; X64-FAST-BEXTR-NEXT:    retq
392  %1 = lshr i32 %x, 4
393  %2 = and i32 %1, 4095
394  ret i32 %2
395}
396
397; Make sure we still use AH subreg trick to extract 15:8
398define i32 @bextr32_subreg(i32 %x)  uwtable  ssp {
399; X86-LABEL: bextr32_subreg:
400; X86:       # %bb.0:
401; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
402; X86-NEXT:    retl
403;
404; X64-LABEL: bextr32_subreg:
405; X64:       # %bb.0:
406; X64-NEXT:    movl %edi, %eax
407; X64-NEXT:    movzbl %ah, %eax
408; X64-NEXT:    retq
409  %1 = lshr i32 %x, 8
410  %2 = and i32 %1, 255
411  ret i32 %2
412}
413
414define i32 @bextr32b_load(ptr %x)  uwtable  ssp {
415; X86-SLOW-BEXTR-LABEL: bextr32b_load:
416; X86-SLOW-BEXTR:       # %bb.0:
417; X86-SLOW-BEXTR-NEXT:    movl {{[0-9]+}}(%esp), %eax
418; X86-SLOW-BEXTR-NEXT:    movl (%eax), %eax
419; X86-SLOW-BEXTR-NEXT:    shrl $4, %eax
420; X86-SLOW-BEXTR-NEXT:    andl $4095, %eax # imm = 0xFFF
421; X86-SLOW-BEXTR-NEXT:    retl
422;
423; X64-SLOW-BEXTR-LABEL: bextr32b_load:
424; X64-SLOW-BEXTR:       # %bb.0:
425; X64-SLOW-BEXTR-NEXT:    movl (%rdi), %eax
426; X64-SLOW-BEXTR-NEXT:    shrl $4, %eax
427; X64-SLOW-BEXTR-NEXT:    andl $4095, %eax # imm = 0xFFF
428; X64-SLOW-BEXTR-NEXT:    retq
429;
430; X86-FAST-BEXTR-LABEL: bextr32b_load:
431; X86-FAST-BEXTR:       # %bb.0:
432; X86-FAST-BEXTR-NEXT:    movl {{[0-9]+}}(%esp), %eax
433; X86-FAST-BEXTR-NEXT:    movl $3076, %ecx # imm = 0xC04
434; X86-FAST-BEXTR-NEXT:    bextrl %ecx, (%eax), %eax
435; X86-FAST-BEXTR-NEXT:    retl
436;
437; X64-FAST-BEXTR-LABEL: bextr32b_load:
438; X64-FAST-BEXTR:       # %bb.0:
439; X64-FAST-BEXTR-NEXT:    movl $3076, %eax # imm = 0xC04
440; X64-FAST-BEXTR-NEXT:    bextrl %eax, (%rdi), %eax
441; X64-FAST-BEXTR-NEXT:    retq
442  %1 = load i32, ptr %x
443  %2 = lshr i32 %1, 4
444  %3 = and i32 %2, 4095
445  ret i32 %3
446}
447
448; PR34042
449define i32 @bextr32c(i32 %x, i16 zeroext %y) {
450; X86-LABEL: bextr32c:
451; X86:       # %bb.0:
452; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
453; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
454; X86-NEXT:    retl
455;
456; X64-LABEL: bextr32c:
457; X64:       # %bb.0:
458; X64-NEXT:    bextrl %esi, %edi, %eax
459; X64-NEXT:    retq
460  %tmp0 = sext i16 %y to i32
461  %tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0)
462  ret i32 %tmp1
463}
464
465define i32 @non_bextr32(i32 %x) {
466; X86-LABEL: non_bextr32:
467; X86:       # %bb.0: # %entry
468; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
469; X86-NEXT:    shrl $2, %eax
470; X86-NEXT:    andl $111, %eax
471; X86-NEXT:    retl
472;
473; X64-LABEL: non_bextr32:
474; X64:       # %bb.0: # %entry
475; X64-NEXT:    movl %edi, %eax
476; X64-NEXT:    shrl $2, %eax
477; X64-NEXT:    andl $111, %eax
478; X64-NEXT:    retq
479entry:
480  %shr = lshr i32 %x, 2
481  %and = and i32 %shr, 111
482  ret i32 %and
483}
484
485define i32 @blsi32(i32 %x)   {
486; X86-LABEL: blsi32:
487; X86:       # %bb.0:
488; X86-NEXT:    blsil {{[0-9]+}}(%esp), %eax
489; X86-NEXT:    retl
490;
491; X64-LABEL: blsi32:
492; X64:       # %bb.0:
493; X64-NEXT:    blsil %edi, %eax
494; X64-NEXT:    retq
495  %tmp = sub i32 0, %x
496  %tmp2 = and i32 %x, %tmp
497  ret i32 %tmp2
498}
499
500define i32 @blsi32_load(ptr %x)   {
501; X86-LABEL: blsi32_load:
502; X86:       # %bb.0:
503; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
504; X86-NEXT:    blsil (%eax), %eax
505; X86-NEXT:    retl
506;
507; X64-LABEL: blsi32_load:
508; X64:       # %bb.0:
509; X64-NEXT:    blsil (%rdi), %eax
510; X64-NEXT:    retq
511  %x1 = load i32, ptr %x
512  %tmp = sub i32 0, %x1
513  %tmp2 = and i32 %x1, %tmp
514  ret i32 %tmp2
515}
516
517define i32 @blsi32_z(i32 %a, i32 %b) nounwind {
518; X86-LABEL: blsi32_z:
519; X86:       # %bb.0:
520; X86-NEXT:    blsil {{[0-9]+}}(%esp), %eax
521; X86-NEXT:    jne .LBB25_2
522; X86-NEXT:  # %bb.1:
523; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
524; X86-NEXT:  .LBB25_2:
525; X86-NEXT:    retl
526;
527; X64-LABEL: blsi32_z:
528; X64:       # %bb.0:
529; X64-NEXT:    blsil %edi, %eax
530; X64-NEXT:    cmovel %esi, %eax
531; X64-NEXT:    retq
532  %t0 = sub i32 0, %a
533  %t1 = and i32 %t0, %a
534  %t2 = icmp eq i32 %t1, 0
535  %t3 = select i1 %t2, i32 %b, i32 %t1
536  ret i32 %t3
537}
538
539define i32 @blsi32_z2(i32 %a, i32 %b, i32 %c) nounwind {
540; X86-LABEL: blsi32_z2:
541; X86:       # %bb.0:
542; X86-NEXT:    blsil {{[0-9]+}}(%esp), %eax
543; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
544; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
545; X86-NEXT:    cmovel %eax, %ecx
546; X86-NEXT:    movl (%ecx), %eax
547; X86-NEXT:    retl
548;
549; X64-LABEL: blsi32_z2:
550; X64:       # %bb.0:
551; X64-NEXT:    movl %esi, %eax
552; X64-NEXT:    blsil %edi, %ecx
553; X64-NEXT:    cmovnel %edx, %eax
554; X64-NEXT:    retq
555  %t0 = sub i32 0, %a
556  %t1 = and i32 %t0, %a
557  %t2 = icmp eq i32 %t1, 0
558  %t3 = select i1 %t2, i32 %b, i32 %c
559  ret i32 %t3
560}
561
562; Inspired by PR48768, but using cmovcc instead of setcc. There should be
563; no test instruction.
564define i32 @blsi32_sle(i32 %a, i32 %b, i32 %c) nounwind {
565; X86-LABEL: blsi32_sle:
566; X86:       # %bb.0:
567; X86-NEXT:    blsil {{[0-9]+}}(%esp), %eax
568; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
569; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
570; X86-NEXT:    cmovlel %eax, %ecx
571; X86-NEXT:    movl (%ecx), %eax
572; X86-NEXT:    retl
573;
574; X64-LABEL: blsi32_sle:
575; X64:       # %bb.0:
576; X64-NEXT:    movl %esi, %eax
577; X64-NEXT:    blsil %edi, %ecx
578; X64-NEXT:    cmovgl %edx, %eax
579; X64-NEXT:    retq
580  %t0 = sub i32 0, %a
581  %t1 = and i32 %t0, %a
582  %t2 = icmp sle i32 %t1, 0
583  %t3 = select i1 %t2, i32 %b, i32 %c
584  ret i32 %t3
585}
586
587define i64 @blsi64(i64 %x)   {
588; X86-LABEL: blsi64:
589; X86:       # %bb.0:
590; X86-NEXT:    pushl %esi
591; X86-NEXT:    .cfi_def_cfa_offset 8
592; X86-NEXT:    .cfi_offset %esi, -8
593; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
594; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
595; X86-NEXT:    xorl %edx, %edx
596; X86-NEXT:    movl %ecx, %eax
597; X86-NEXT:    negl %eax
598; X86-NEXT:    sbbl %esi, %edx
599; X86-NEXT:    andl %esi, %edx
600; X86-NEXT:    andl %ecx, %eax
601; X86-NEXT:    popl %esi
602; X86-NEXT:    .cfi_def_cfa_offset 4
603; X86-NEXT:    retl
604;
605; X64-LABEL: blsi64:
606; X64:       # %bb.0:
607; X64-NEXT:    blsiq %rdi, %rax
608; X64-NEXT:    retq
609  %tmp = sub i64 0, %x
610  %tmp2 = and i64 %tmp, %x
611  ret i64 %tmp2
612}
613
614define i64 @blsi64_z(i64 %a, i64 %b) nounwind {
615; X86-LABEL: blsi64_z:
616; X86:       # %bb.0:
617; X86-NEXT:    pushl %esi
618; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
619; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
620; X86-NEXT:    xorl %edx, %edx
621; X86-NEXT:    movl %ecx, %eax
622; X86-NEXT:    negl %eax
623; X86-NEXT:    sbbl %esi, %edx
624; X86-NEXT:    andl %esi, %edx
625; X86-NEXT:    andl %ecx, %eax
626; X86-NEXT:    movl %eax, %ecx
627; X86-NEXT:    orl %edx, %ecx
628; X86-NEXT:    jne .LBB29_2
629; X86-NEXT:  # %bb.1:
630; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
631; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
632; X86-NEXT:  .LBB29_2:
633; X86-NEXT:    popl %esi
634; X86-NEXT:    retl
635;
636; X64-LABEL: blsi64_z:
637; X64:       # %bb.0:
638; X64-NEXT:    blsiq %rdi, %rax
639; X64-NEXT:    cmoveq %rsi, %rax
640; X64-NEXT:    retq
641  %t0 = sub i64 0, %a
642  %t1 = and i64 %t0, %a
643  %t2 = icmp eq i64 %t1, 0
644  %t3 = select i1 %t2, i64 %b, i64 %t1
645  ret i64 %t3
646}
647
648define i64 @blsi64_z2(i64 %a, i64 %b, i64 %c) nounwind {
649; X86-LABEL: blsi64_z2:
650; X86:       # %bb.0:
651; X86-NEXT:    pushl %esi
652; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
653; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
654; X86-NEXT:    xorl %edx, %edx
655; X86-NEXT:    movl %eax, %esi
656; X86-NEXT:    negl %esi
657; X86-NEXT:    sbbl %ecx, %edx
658; X86-NEXT:    andl %ecx, %edx
659; X86-NEXT:    andl %eax, %esi
660; X86-NEXT:    orl %edx, %esi
661; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
662; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
663; X86-NEXT:    cmovel %eax, %ecx
664; X86-NEXT:    movl (%ecx), %eax
665; X86-NEXT:    movl 4(%ecx), %edx
666; X86-NEXT:    popl %esi
667; X86-NEXT:    retl
668;
669; X64-LABEL: blsi64_z2:
670; X64:       # %bb.0:
671; X64-NEXT:    movq %rsi, %rax
672; X64-NEXT:    blsiq %rdi, %rcx
673; X64-NEXT:    cmovneq %rdx, %rax
674; X64-NEXT:    retq
675  %t0 = sub i64 0, %a
676  %t1 = and i64 %t0, %a
677  %t2 = icmp eq i64 %t1, 0
678  %t3 = select i1 %t2, i64 %b, i64 %c
679  ret i64 %t3
680}
681
682define i64 @blsi64_sle(i64 %a, i64 %b, i64 %c) nounwind {
683; X86-LABEL: blsi64_sle:
684; X86:       # %bb.0:
685; X86-NEXT:    pushl %esi
686; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
687; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
688; X86-NEXT:    xorl %edx, %edx
689; X86-NEXT:    movl %eax, %esi
690; X86-NEXT:    negl %esi
691; X86-NEXT:    sbbl %ecx, %edx
692; X86-NEXT:    andl %ecx, %edx
693; X86-NEXT:    andl %eax, %esi
694; X86-NEXT:    cmpl $1, %esi
695; X86-NEXT:    sbbl $0, %edx
696; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
697; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
698; X86-NEXT:    cmovll %eax, %ecx
699; X86-NEXT:    movl (%ecx), %eax
700; X86-NEXT:    movl 4(%ecx), %edx
701; X86-NEXT:    popl %esi
702; X86-NEXT:    retl
703;
704; X64-LABEL: blsi64_sle:
705; X64:       # %bb.0:
706; X64-NEXT:    movq %rsi, %rax
707; X64-NEXT:    blsiq %rdi, %rcx
708; X64-NEXT:    cmovgq %rdx, %rax
709; X64-NEXT:    retq
710  %t0 = sub i64 0, %a
711  %t1 = and i64 %t0, %a
712  %t2 = icmp sle i64 %t1, 0
713  %t3 = select i1 %t2, i64 %b, i64 %c
714  ret i64 %t3
715}
716
717define i32 @blsmsk32(i32 %x)   {
718; X86-LABEL: blsmsk32:
719; X86:       # %bb.0:
720; X86-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
721; X86-NEXT:    retl
722;
723; X64-LABEL: blsmsk32:
724; X64:       # %bb.0:
725; X64-NEXT:    blsmskl %edi, %eax
726; X64-NEXT:    retq
727  %tmp = sub i32 %x, 1
728  %tmp2 = xor i32 %x, %tmp
729  ret i32 %tmp2
730}
731
732define i32 @blsmsk32_load(ptr %x)   {
733; X86-LABEL: blsmsk32_load:
734; X86:       # %bb.0:
735; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
736; X86-NEXT:    blsmskl (%eax), %eax
737; X86-NEXT:    retl
738;
739; X64-LABEL: blsmsk32_load:
740; X64:       # %bb.0:
741; X64-NEXT:    blsmskl (%rdi), %eax
742; X64-NEXT:    retq
743  %x1 = load i32, ptr %x
744  %tmp = sub i32 %x1, 1
745  %tmp2 = xor i32 %x1, %tmp
746  ret i32 %tmp2
747}
748
749define i32 @blsmsk32_z(i32 %a, i32 %b) nounwind {
750; X86-LABEL: blsmsk32_z:
751; X86:       # %bb.0:
752; X86-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
753; X86-NEXT:    jne .LBB34_2
754; X86-NEXT:  # %bb.1:
755; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
756; X86-NEXT:  .LBB34_2:
757; X86-NEXT:    retl
758;
759; X64-LABEL: blsmsk32_z:
760; X64:       # %bb.0:
761; X64-NEXT:    blsmskl %edi, %eax
762; X64-NEXT:    cmovel %esi, %eax
763; X64-NEXT:    retq
764  %t0 = sub i32 %a, 1
765  %t1 = xor i32 %t0, %a
766  %t2 = icmp eq i32 %t1, 0
767  %t3 = select i1 %t2, i32 %b, i32 %t1
768  ret i32 %t3
769}
770
771define i32 @blsmsk32_z2(i32 %a, i32 %b, i32 %c) nounwind {
772; X86-LABEL: blsmsk32_z2:
773; X86:       # %bb.0:
774; X86-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
775; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
776; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
777; X86-NEXT:    cmovel %eax, %ecx
778; X86-NEXT:    movl (%ecx), %eax
779; X86-NEXT:    retl
780;
781; X64-LABEL: blsmsk32_z2:
782; X64:       # %bb.0:
783; X64-NEXT:    movl %esi, %eax
784; X64-NEXT:    blsmskl %edi, %ecx
785; X64-NEXT:    cmovnel %edx, %eax
786; X64-NEXT:    retq
787  %t0 = sub i32 %a, 1
788  %t1 = xor i32 %t0, %a
789  %t2 = icmp eq i32 %t1, 0
790  %t3 = select i1 %t2, i32 %b, i32 %c
791  ret i32 %t3
792}
793
794define i32 @blsmsk32_sle(i32 %a, i32 %b, i32 %c) nounwind {
795; X86-LABEL: blsmsk32_sle:
796; X86:       # %bb.0:
797; X86-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
798; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
799; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
800; X86-NEXT:    cmovlel %eax, %ecx
801; X86-NEXT:    movl (%ecx), %eax
802; X86-NEXT:    retl
803;
804; X64-LABEL: blsmsk32_sle:
805; X64:       # %bb.0:
806; X64-NEXT:    movl %esi, %eax
807; X64-NEXT:    blsmskl %edi, %ecx
808; X64-NEXT:    cmovgl %edx, %eax
809; X64-NEXT:    retq
810  %t0 = sub i32 %a, 1
811  %t1 = xor i32 %t0, %a
812  %t2 = icmp sle i32 %t1, 0
813  %t3 = select i1 %t2, i32 %b, i32 %c
814  ret i32 %t3
815}
816
817define i64 @blsmsk64(i64 %x)   {
818; X86-LABEL: blsmsk64:
819; X86:       # %bb.0:
820; X86-NEXT:    pushl %esi
821; X86-NEXT:    .cfi_def_cfa_offset 8
822; X86-NEXT:    .cfi_offset %esi, -8
823; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
824; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
825; X86-NEXT:    movl %ecx, %eax
826; X86-NEXT:    addl $-1, %eax
827; X86-NEXT:    movl %esi, %edx
828; X86-NEXT:    adcl $-1, %edx
829; X86-NEXT:    xorl %ecx, %eax
830; X86-NEXT:    xorl %esi, %edx
831; X86-NEXT:    popl %esi
832; X86-NEXT:    .cfi_def_cfa_offset 4
833; X86-NEXT:    retl
834;
835; X64-LABEL: blsmsk64:
836; X64:       # %bb.0:
837; X64-NEXT:    blsmskq %rdi, %rax
838; X64-NEXT:    retq
839  %tmp = sub i64 %x, 1
840  %tmp2 = xor i64 %tmp, %x
841  ret i64 %tmp2
842}
843
844define i64 @blsmsk64_z(i64 %a, i64 %b) nounwind {
845; X86-LABEL: blsmsk64_z:
846; X86:       # %bb.0:
847; X86-NEXT:    pushl %esi
848; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
849; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
850; X86-NEXT:    movl %ecx, %eax
851; X86-NEXT:    addl $-1, %eax
852; X86-NEXT:    movl %esi, %edx
853; X86-NEXT:    adcl $-1, %edx
854; X86-NEXT:    xorl %ecx, %eax
855; X86-NEXT:    xorl %esi, %edx
856; X86-NEXT:    movl %eax, %ecx
857; X86-NEXT:    orl %edx, %ecx
858; X86-NEXT:    jne .LBB38_2
859; X86-NEXT:  # %bb.1:
860; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
861; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
862; X86-NEXT:  .LBB38_2:
863; X86-NEXT:    popl %esi
864; X86-NEXT:    retl
865;
866; X64-LABEL: blsmsk64_z:
867; X64:       # %bb.0:
868; X64-NEXT:    blsmskq %rdi, %rax
869; X64-NEXT:    cmoveq %rsi, %rax
870; X64-NEXT:    retq
871  %t0 = sub i64 %a, 1
872  %t1 = xor i64 %t0, %a
873  %t2 = icmp eq i64 %t1, 0
874  %t3 = select i1 %t2, i64 %b, i64 %t1
875  ret i64 %t3
876}
877
878define i64 @blsmsk64_z2(i64 %a, i64 %b, i64 %c) nounwind {
879; X86-LABEL: blsmsk64_z2:
880; X86:       # %bb.0:
881; X86-NEXT:    pushl %esi
882; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
883; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
884; X86-NEXT:    movl %eax, %edx
885; X86-NEXT:    addl $-1, %edx
886; X86-NEXT:    movl %ecx, %esi
887; X86-NEXT:    adcl $-1, %esi
888; X86-NEXT:    xorl %eax, %edx
889; X86-NEXT:    xorl %ecx, %esi
890; X86-NEXT:    orl %edx, %esi
891; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
892; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
893; X86-NEXT:    cmovel %eax, %ecx
894; X86-NEXT:    movl (%ecx), %eax
895; X86-NEXT:    movl 4(%ecx), %edx
896; X86-NEXT:    popl %esi
897; X86-NEXT:    retl
898;
899; X64-LABEL: blsmsk64_z2:
900; X64:       # %bb.0:
901; X64-NEXT:    movq %rsi, %rax
902; X64-NEXT:    blsmskq %rdi, %rcx
903; X64-NEXT:    cmovneq %rdx, %rax
904; X64-NEXT:    retq
905  %t0 = sub i64 %a, 1
906  %t1 = xor i64 %t0, %a
907  %t2 = icmp eq i64 %t1, 0
908  %t3 = select i1 %t2, i64 %b, i64 %c
909  ret i64 %t3
910}
911
912define i64 @blsmsk64_sle(i64 %a, i64 %b, i64 %c) nounwind {
913; X86-LABEL: blsmsk64_sle:
914; X86:       # %bb.0:
915; X86-NEXT:    pushl %esi
916; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
917; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
918; X86-NEXT:    movl %eax, %edx
919; X86-NEXT:    addl $-1, %edx
920; X86-NEXT:    movl %ecx, %esi
921; X86-NEXT:    adcl $-1, %esi
922; X86-NEXT:    xorl %ecx, %esi
923; X86-NEXT:    xorl %eax, %edx
924; X86-NEXT:    cmpl $1, %edx
925; X86-NEXT:    sbbl $0, %esi
926; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
927; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
928; X86-NEXT:    cmovll %eax, %ecx
929; X86-NEXT:    movl (%ecx), %eax
930; X86-NEXT:    movl 4(%ecx), %edx
931; X86-NEXT:    popl %esi
932; X86-NEXT:    retl
933;
934; X64-LABEL: blsmsk64_sle:
935; X64:       # %bb.0:
936; X64-NEXT:    movq %rsi, %rax
937; X64-NEXT:    blsmskq %rdi, %rcx
938; X64-NEXT:    cmovgq %rdx, %rax
939; X64-NEXT:    retq
940  %t0 = sub i64 %a, 1
941  %t1 = xor i64 %t0, %a
942  %t2 = icmp sle i64 %t1, 0
943  %t3 = select i1 %t2, i64 %b, i64 %c
944  ret i64 %t3
945}
946
947define i32 @blsr32(i32 %x)   {
948; X86-LABEL: blsr32:
949; X86:       # %bb.0:
950; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
951; X86-NEXT:    retl
952;
953; X64-LABEL: blsr32:
954; X64:       # %bb.0:
955; X64-NEXT:    blsrl %edi, %eax
956; X64-NEXT:    retq
957  %tmp = sub i32 %x, 1
958  %tmp2 = and i32 %x, %tmp
959  ret i32 %tmp2
960}
961
962define i32 @blsr32_load(ptr %x)   {
963; X86-LABEL: blsr32_load:
964; X86:       # %bb.0:
965; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
966; X86-NEXT:    blsrl (%eax), %eax
967; X86-NEXT:    retl
968;
969; X64-LABEL: blsr32_load:
970; X64:       # %bb.0:
971; X64-NEXT:    blsrl (%rdi), %eax
972; X64-NEXT:    retq
973  %x1 = load i32, ptr %x
974  %tmp = sub i32 %x1, 1
975  %tmp2 = and i32 %x1, %tmp
976  ret i32 %tmp2
977}
978
979define i32 @blsr32_z(i32 %a, i32 %b) nounwind {
980; X86-LABEL: blsr32_z:
981; X86:       # %bb.0:
982; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
983; X86-NEXT:    jne .LBB43_2
984; X86-NEXT:  # %bb.1:
985; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
986; X86-NEXT:  .LBB43_2:
987; X86-NEXT:    retl
988;
989; X64-LABEL: blsr32_z:
990; X64:       # %bb.0:
991; X64-NEXT:    blsrl %edi, %eax
992; X64-NEXT:    cmovel %esi, %eax
993; X64-NEXT:    retq
994  %t0 = sub i32 %a, 1
995  %t1 = and i32 %t0, %a
996  %t2 = icmp eq i32 %t1, 0
997  %t3 = select i1 %t2, i32 %b, i32 %t1
998  ret i32 %t3
999}
1000
1001define i32 @blsr32_z2(i32 %a, i32 %b, i32 %c) nounwind {
1002; X86-LABEL: blsr32_z2:
1003; X86:       # %bb.0:
1004; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
1005; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
1006; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1007; X86-NEXT:    cmovel %eax, %ecx
1008; X86-NEXT:    movl (%ecx), %eax
1009; X86-NEXT:    retl
1010;
1011; X64-LABEL: blsr32_z2:
1012; X64:       # %bb.0:
1013; X64-NEXT:    movl %esi, %eax
1014; X64-NEXT:    blsrl %edi, %ecx
1015; X64-NEXT:    cmovnel %edx, %eax
1016; X64-NEXT:    retq
1017  %t0 = sub i32 %a, 1
1018  %t1 = and i32 %t0, %a
1019  %t2 = icmp eq i32 %t1, 0
1020  %t3 = select i1 %t2, i32 %b, i32 %c
1021  ret i32 %t3
1022}
1023
1024define i32 @blsr32_sle(i32 %a, i32 %b, i32 %c) nounwind {
1025; X86-LABEL: blsr32_sle:
1026; X86:       # %bb.0:
1027; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
1028; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
1029; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1030; X86-NEXT:    cmovlel %eax, %ecx
1031; X86-NEXT:    movl (%ecx), %eax
1032; X86-NEXT:    retl
1033;
1034; X64-LABEL: blsr32_sle:
1035; X64:       # %bb.0:
1036; X64-NEXT:    movl %esi, %eax
1037; X64-NEXT:    blsrl %edi, %ecx
1038; X64-NEXT:    cmovgl %edx, %eax
1039; X64-NEXT:    retq
1040  %t0 = sub i32 %a, 1
1041  %t1 = and i32 %t0, %a
1042  %t2 = icmp sle i32 %t1, 0
1043  %t3 = select i1 %t2, i32 %b, i32 %c
1044  ret i32 %t3
1045}
1046
1047define i64 @blsr64(i64 %x)   {
1048; X86-LABEL: blsr64:
1049; X86:       # %bb.0:
1050; X86-NEXT:    pushl %esi
1051; X86-NEXT:    .cfi_def_cfa_offset 8
1052; X86-NEXT:    .cfi_offset %esi, -8
1053; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1054; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1055; X86-NEXT:    movl %ecx, %eax
1056; X86-NEXT:    addl $-1, %eax
1057; X86-NEXT:    movl %esi, %edx
1058; X86-NEXT:    adcl $-1, %edx
1059; X86-NEXT:    andl %ecx, %eax
1060; X86-NEXT:    andl %esi, %edx
1061; X86-NEXT:    popl %esi
1062; X86-NEXT:    .cfi_def_cfa_offset 4
1063; X86-NEXT:    retl
1064;
1065; X64-LABEL: blsr64:
1066; X64:       # %bb.0:
1067; X64-NEXT:    blsrq %rdi, %rax
1068; X64-NEXT:    retq
1069  %tmp = sub i64 %x, 1
1070  %tmp2 = and i64 %tmp, %x
1071  ret i64 %tmp2
1072}
1073
1074define i64 @blsr64_z(i64 %a, i64 %b) nounwind {
1075; X86-LABEL: blsr64_z:
1076; X86:       # %bb.0:
1077; X86-NEXT:    pushl %esi
1078; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1079; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1080; X86-NEXT:    movl %ecx, %eax
1081; X86-NEXT:    addl $-1, %eax
1082; X86-NEXT:    movl %esi, %edx
1083; X86-NEXT:    adcl $-1, %edx
1084; X86-NEXT:    andl %ecx, %eax
1085; X86-NEXT:    andl %esi, %edx
1086; X86-NEXT:    movl %eax, %ecx
1087; X86-NEXT:    orl %edx, %ecx
1088; X86-NEXT:    jne .LBB47_2
1089; X86-NEXT:  # %bb.1:
1090; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1091; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1092; X86-NEXT:  .LBB47_2:
1093; X86-NEXT:    popl %esi
1094; X86-NEXT:    retl
1095;
1096; X64-LABEL: blsr64_z:
1097; X64:       # %bb.0:
1098; X64-NEXT:    blsrq %rdi, %rax
1099; X64-NEXT:    cmoveq %rsi, %rax
1100; X64-NEXT:    retq
1101  %t0 = sub i64 %a, 1
1102  %t1 = and i64 %t0, %a
1103  %t2 = icmp eq i64 %t1, 0
1104  %t3 = select i1 %t2, i64 %b, i64 %t1
1105  ret i64 %t3
1106}
1107
1108define i64 @blsr64_z2(i64 %a, i64 %b, i64 %c) nounwind {
1109; X86-LABEL: blsr64_z2:
1110; X86:       # %bb.0:
1111; X86-NEXT:    pushl %esi
1112; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1113; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1114; X86-NEXT:    movl %eax, %edx
1115; X86-NEXT:    addl $-1, %edx
1116; X86-NEXT:    movl %ecx, %esi
1117; X86-NEXT:    adcl $-1, %esi
1118; X86-NEXT:    andl %eax, %edx
1119; X86-NEXT:    andl %ecx, %esi
1120; X86-NEXT:    orl %edx, %esi
1121; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
1122; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1123; X86-NEXT:    cmovel %eax, %ecx
1124; X86-NEXT:    movl (%ecx), %eax
1125; X86-NEXT:    movl 4(%ecx), %edx
1126; X86-NEXT:    popl %esi
1127; X86-NEXT:    retl
1128;
1129; X64-LABEL: blsr64_z2:
1130; X64:       # %bb.0:
1131; X64-NEXT:    movq %rsi, %rax
1132; X64-NEXT:    blsrq %rdi, %rcx
1133; X64-NEXT:    cmovneq %rdx, %rax
1134; X64-NEXT:    retq
1135  %t0 = sub i64 %a, 1
1136  %t1 = and i64 %t0, %a
1137  %t2 = icmp eq i64 %t1, 0
1138  %t3 = select i1 %t2, i64 %b, i64 %c
1139  ret i64 %t3
1140}
1141
1142define i64 @blsr64_sle(i64 %a, i64 %b, i64 %c) nounwind {
1143; X86-LABEL: blsr64_sle:
1144; X86:       # %bb.0:
1145; X86-NEXT:    pushl %esi
1146; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1147; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1148; X86-NEXT:    movl %eax, %edx
1149; X86-NEXT:    addl $-1, %edx
1150; X86-NEXT:    movl %ecx, %esi
1151; X86-NEXT:    adcl $-1, %esi
1152; X86-NEXT:    andl %ecx, %esi
1153; X86-NEXT:    andl %eax, %edx
1154; X86-NEXT:    cmpl $1, %edx
1155; X86-NEXT:    sbbl $0, %esi
1156; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
1157; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
1158; X86-NEXT:    cmovll %eax, %ecx
1159; X86-NEXT:    movl (%ecx), %eax
1160; X86-NEXT:    movl 4(%ecx), %edx
1161; X86-NEXT:    popl %esi
1162; X86-NEXT:    retl
1163;
1164; X64-LABEL: blsr64_sle:
1165; X64:       # %bb.0:
1166; X64-NEXT:    movq %rsi, %rax
1167; X64-NEXT:    blsrq %rdi, %rcx
1168; X64-NEXT:    cmovgq %rdx, %rax
1169; X64-NEXT:    retq
1170  %t0 = sub i64 %a, 1
1171  %t1 = and i64 %t0, %a
1172  %t2 = icmp sle i64 %t1, 0
1173  %t3 = select i1 %t2, i64 %b, i64 %c
1174  ret i64 %t3
1175}
1176
1177; PR35792 - https://bugs.llvm.org/show_bug.cgi?id=35792
1178
1179define i64 @blsr_disguised_constant(i64 %x) {
1180; X86-LABEL: blsr_disguised_constant:
1181; X86:       # %bb.0:
1182; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
1183; X86-NEXT:    movzwl %ax, %eax
1184; X86-NEXT:    xorl %edx, %edx
1185; X86-NEXT:    retl
1186;
1187; X64-LABEL: blsr_disguised_constant:
1188; X64:       # %bb.0:
1189; X64-NEXT:    blsrl %edi, %eax
1190; X64-NEXT:    movzwl %ax, %eax
1191; X64-NEXT:    retq
1192  %a1 = and i64 %x, 65535
1193  %a2 = add i64 %x, 65535
1194  %r = and i64 %a1, %a2
1195  ret i64 %r
1196}
1197
1198; The add here used to get shrunk, but the and did not thus hiding the blsr pattern.
1199; We now use the knowledge that upper bits of the shift guarantee the and result has 0s in the upper bits to reduce it too.
1200define i64 @blsr_disguised_shrunk_add(i64 %x) {
1201; X86-LABEL: blsr_disguised_shrunk_add:
1202; X86:       # %bb.0:
1203; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1204; X86-NEXT:    shrl $16, %eax
1205; X86-NEXT:    blsrl %eax, %eax
1206; X86-NEXT:    xorl %edx, %edx
1207; X86-NEXT:    retl
1208;
1209; X64-LABEL: blsr_disguised_shrunk_add:
1210; X64:       # %bb.0:
1211; X64-NEXT:    shrq $48, %rdi
1212; X64-NEXT:    blsrl %edi, %eax
1213; X64-NEXT:    retq
1214  %a = lshr i64 %x, 48
1215  %b = add i64 %a, -1
1216  %c = and i64 %b, %a
1217  ret i64 %c
1218}
1219
1220define void @pr40060(i32, i32) {
1221; X86-LABEL: pr40060:
1222; X86:       # %bb.0:
1223; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1224; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
1225; X86-NEXT:    testl %eax, %eax
1226; X86-NEXT:    js .LBB52_1
1227; X86-NEXT:  # %bb.2:
1228; X86-NEXT:    jmp bar # TAILCALL
1229; X86-NEXT:  .LBB52_1:
1230; X86-NEXT:    retl
1231;
1232; X64-LABEL: pr40060:
1233; X64:       # %bb.0:
1234; X64-NEXT:    bextrl %esi, %edi, %eax
1235; X64-NEXT:    testl %eax, %eax
1236; X64-NEXT:    js .LBB52_1
1237; X64-NEXT:  # %bb.2:
1238; X64-NEXT:    jmp bar # TAILCALL
1239; X64-NEXT:  .LBB52_1:
1240; X64-NEXT:    retq
1241  %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %0, i32 %1)
1242  %4 = icmp sgt i32 %3, -1
1243  br i1 %4, label %5, label %6
1244
1245  tail call void @bar()
1246  br label %6
1247
1248  ret void
1249}
1250
1251define i32 @blsr32_branch(i32 %x) {
1252; X86-LABEL: blsr32_branch:
1253; X86:       # %bb.0:
1254; X86-NEXT:    pushl %esi
1255; X86-NEXT:    .cfi_def_cfa_offset 8
1256; X86-NEXT:    .cfi_offset %esi, -8
1257; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %esi
1258; X86-NEXT:    jne .LBB53_2
1259; X86-NEXT:  # %bb.1:
1260; X86-NEXT:    calll bar
1261; X86-NEXT:  .LBB53_2:
1262; X86-NEXT:    movl %esi, %eax
1263; X86-NEXT:    popl %esi
1264; X86-NEXT:    .cfi_def_cfa_offset 4
1265; X86-NEXT:    retl
1266;
1267; X64-LABEL: blsr32_branch:
1268; X64:       # %bb.0:
1269; X64-NEXT:    pushq %rbx
1270; X64-NEXT:    .cfi_def_cfa_offset 16
1271; X64-NEXT:    .cfi_offset %rbx, -16
1272; X64-NEXT:    blsrl %edi, %ebx
1273; X64-NEXT:    jne .LBB53_2
1274; X64-NEXT:  # %bb.1:
1275; X64-NEXT:    callq bar
1276; X64-NEXT:  .LBB53_2:
1277; X64-NEXT:    movl %ebx, %eax
1278; X64-NEXT:    popq %rbx
1279; X64-NEXT:    .cfi_def_cfa_offset 8
1280; X64-NEXT:    retq
1281  %tmp = sub i32 %x, 1
1282  %tmp2 = and i32 %x, %tmp
1283  %cmp = icmp eq i32 %tmp2, 0
1284  br i1 %cmp, label %1, label %2
1285
1286  tail call void @bar()
1287  br label %2
1288  ret i32 %tmp2
1289}
1290
1291define i64 @blsr64_branch(i64 %x) {
1292; X86-LABEL: blsr64_branch:
1293; X86:       # %bb.0:
1294; X86-NEXT:    pushl %edi
1295; X86-NEXT:    .cfi_def_cfa_offset 8
1296; X86-NEXT:    pushl %esi
1297; X86-NEXT:    .cfi_def_cfa_offset 12
1298; X86-NEXT:    .cfi_offset %esi, -12
1299; X86-NEXT:    .cfi_offset %edi, -8
1300; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1301; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1302; X86-NEXT:    movl %eax, %esi
1303; X86-NEXT:    addl $-1, %esi
1304; X86-NEXT:    movl %ecx, %edi
1305; X86-NEXT:    adcl $-1, %edi
1306; X86-NEXT:    andl %eax, %esi
1307; X86-NEXT:    andl %ecx, %edi
1308; X86-NEXT:    movl %esi, %eax
1309; X86-NEXT:    orl %edi, %eax
1310; X86-NEXT:    jne .LBB54_2
1311; X86-NEXT:  # %bb.1:
1312; X86-NEXT:    calll bar
1313; X86-NEXT:  .LBB54_2:
1314; X86-NEXT:    movl %esi, %eax
1315; X86-NEXT:    movl %edi, %edx
1316; X86-NEXT:    popl %esi
1317; X86-NEXT:    .cfi_def_cfa_offset 8
1318; X86-NEXT:    popl %edi
1319; X86-NEXT:    .cfi_def_cfa_offset 4
1320; X86-NEXT:    retl
1321;
1322; X64-LABEL: blsr64_branch:
1323; X64:       # %bb.0:
1324; X64-NEXT:    pushq %rbx
1325; X64-NEXT:    .cfi_def_cfa_offset 16
1326; X64-NEXT:    .cfi_offset %rbx, -16
1327; X64-NEXT:    blsrq %rdi, %rbx
1328; X64-NEXT:    jne .LBB54_2
1329; X64-NEXT:  # %bb.1:
1330; X64-NEXT:    callq bar
1331; X64-NEXT:  .LBB54_2:
1332; X64-NEXT:    movq %rbx, %rax
1333; X64-NEXT:    popq %rbx
1334; X64-NEXT:    .cfi_def_cfa_offset 8
1335; X64-NEXT:    retq
1336  %tmp = sub i64 %x, 1
1337  %tmp2 = and i64 %x, %tmp
1338  %cmp = icmp eq i64 %tmp2, 0
1339  br i1 %cmp, label %1, label %2
1340
1341  tail call void @bar()
1342  br label %2
1343  ret i64 %tmp2
1344}
1345
1346define i32 @blsi32_branch(i32 %x) {
1347; X86-LABEL: blsi32_branch:
1348; X86:       # %bb.0:
1349; X86-NEXT:    pushl %esi
1350; X86-NEXT:    .cfi_def_cfa_offset 8
1351; X86-NEXT:    .cfi_offset %esi, -8
1352; X86-NEXT:    blsil {{[0-9]+}}(%esp), %esi
1353; X86-NEXT:    jne .LBB55_2
1354; X86-NEXT:  # %bb.1:
1355; X86-NEXT:    calll bar
1356; X86-NEXT:  .LBB55_2:
1357; X86-NEXT:    movl %esi, %eax
1358; X86-NEXT:    popl %esi
1359; X86-NEXT:    .cfi_def_cfa_offset 4
1360; X86-NEXT:    retl
1361;
1362; X64-LABEL: blsi32_branch:
1363; X64:       # %bb.0:
1364; X64-NEXT:    pushq %rbx
1365; X64-NEXT:    .cfi_def_cfa_offset 16
1366; X64-NEXT:    .cfi_offset %rbx, -16
1367; X64-NEXT:    blsil %edi, %ebx
1368; X64-NEXT:    jne .LBB55_2
1369; X64-NEXT:  # %bb.1:
1370; X64-NEXT:    callq bar
1371; X64-NEXT:  .LBB55_2:
1372; X64-NEXT:    movl %ebx, %eax
1373; X64-NEXT:    popq %rbx
1374; X64-NEXT:    .cfi_def_cfa_offset 8
1375; X64-NEXT:    retq
1376  %tmp = sub i32 0, %x
1377  %tmp2 = and i32 %x, %tmp
1378  %cmp = icmp eq i32 %tmp2, 0
1379  br i1 %cmp, label %1, label %2
1380
1381  tail call void @bar()
1382  br label %2
1383  ret i32 %tmp2
1384}
1385
1386define i64 @blsi64_branch(i64 %x) {
1387; X86-LABEL: blsi64_branch:
1388; X86:       # %bb.0:
1389; X86-NEXT:    pushl %edi
1390; X86-NEXT:    .cfi_def_cfa_offset 8
1391; X86-NEXT:    pushl %esi
1392; X86-NEXT:    .cfi_def_cfa_offset 12
1393; X86-NEXT:    .cfi_offset %esi, -12
1394; X86-NEXT:    .cfi_offset %edi, -8
1395; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1396; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1397; X86-NEXT:    xorl %esi, %esi
1398; X86-NEXT:    movl %eax, %edi
1399; X86-NEXT:    negl %edi
1400; X86-NEXT:    sbbl %ecx, %esi
1401; X86-NEXT:    andl %ecx, %esi
1402; X86-NEXT:    andl %eax, %edi
1403; X86-NEXT:    movl %edi, %eax
1404; X86-NEXT:    orl %esi, %eax
1405; X86-NEXT:    jne .LBB56_2
1406; X86-NEXT:  # %bb.1:
1407; X86-NEXT:    calll bar
1408; X86-NEXT:  .LBB56_2:
1409; X86-NEXT:    movl %edi, %eax
1410; X86-NEXT:    movl %esi, %edx
1411; X86-NEXT:    popl %esi
1412; X86-NEXT:    .cfi_def_cfa_offset 8
1413; X86-NEXT:    popl %edi
1414; X86-NEXT:    .cfi_def_cfa_offset 4
1415; X86-NEXT:    retl
1416;
1417; X64-LABEL: blsi64_branch:
1418; X64:       # %bb.0:
1419; X64-NEXT:    pushq %rbx
1420; X64-NEXT:    .cfi_def_cfa_offset 16
1421; X64-NEXT:    .cfi_offset %rbx, -16
1422; X64-NEXT:    blsiq %rdi, %rbx
1423; X64-NEXT:    jne .LBB56_2
1424; X64-NEXT:  # %bb.1:
1425; X64-NEXT:    callq bar
1426; X64-NEXT:  .LBB56_2:
1427; X64-NEXT:    movq %rbx, %rax
1428; X64-NEXT:    popq %rbx
1429; X64-NEXT:    .cfi_def_cfa_offset 8
1430; X64-NEXT:    retq
1431  %tmp = sub i64 0, %x
1432  %tmp2 = and i64 %x, %tmp
1433  %cmp = icmp eq i64 %tmp2, 0
1434  br i1 %cmp, label %1, label %2
1435
1436  tail call void @bar()
1437  br label %2
1438  ret i64 %tmp2
1439}
1440
1441declare dso_local void @bar()
1442
1443define void @pr42118_i32(i32 %x) {
1444; X86-LABEL: pr42118_i32:
1445; X86:       # %bb.0:
1446; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
1447; X86-NEXT:    jne .LBB57_1
1448; X86-NEXT:  # %bb.2:
1449; X86-NEXT:    jmp bar # TAILCALL
1450; X86-NEXT:  .LBB57_1:
1451; X86-NEXT:    retl
1452;
1453; X64-LABEL: pr42118_i32:
1454; X64:       # %bb.0:
1455; X64-NEXT:    blsrl %edi, %eax
1456; X64-NEXT:    jne .LBB57_1
1457; X64-NEXT:  # %bb.2:
1458; X64-NEXT:    jmp bar # TAILCALL
1459; X64-NEXT:  .LBB57_1:
1460; X64-NEXT:    retq
1461  %tmp = sub i32 0, %x
1462  %tmp1 = and i32 %tmp, %x
1463  %cmp = icmp eq i32 %tmp1, %x
1464  br i1 %cmp, label %1, label %2
1465
1466  tail call void @bar()
1467  br label %2
1468
1469  ret void
1470}
1471
1472define void @pr42118_i64(i64 %x) {
1473; X86-LABEL: pr42118_i64:
1474; X86:       # %bb.0:
1475; X86-NEXT:    pushl %esi
1476; X86-NEXT:    .cfi_def_cfa_offset 8
1477; X86-NEXT:    .cfi_offset %esi, -8
1478; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1479; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1480; X86-NEXT:    movl %eax, %edx
1481; X86-NEXT:    addl $-1, %edx
1482; X86-NEXT:    movl %ecx, %esi
1483; X86-NEXT:    adcl $-1, %esi
1484; X86-NEXT:    andl %eax, %edx
1485; X86-NEXT:    andl %ecx, %esi
1486; X86-NEXT:    orl %edx, %esi
1487; X86-NEXT:    jne .LBB58_1
1488; X86-NEXT:  # %bb.2:
1489; X86-NEXT:    popl %esi
1490; X86-NEXT:    .cfi_def_cfa_offset 4
1491; X86-NEXT:    jmp bar # TAILCALL
1492; X86-NEXT:  .LBB58_1:
1493; X86-NEXT:    .cfi_def_cfa_offset 8
1494; X86-NEXT:    popl %esi
1495; X86-NEXT:    .cfi_def_cfa_offset 4
1496; X86-NEXT:    retl
1497;
1498; X64-LABEL: pr42118_i64:
1499; X64:       # %bb.0:
1500; X64-NEXT:    blsrq %rdi, %rax
1501; X64-NEXT:    jne .LBB58_1
1502; X64-NEXT:  # %bb.2:
1503; X64-NEXT:    jmp bar # TAILCALL
1504; X64-NEXT:  .LBB58_1:
1505; X64-NEXT:    retq
1506  %tmp = sub i64 0, %x
1507  %tmp1 = and i64 %tmp, %x
1508  %cmp = icmp eq i64 %tmp1, %x
1509  br i1 %cmp, label %1, label %2
1510
1511  tail call void @bar()
1512  br label %2
1513
1514  ret void
1515}
1516
1517define i32 @blsi_cflag_32(i32 %x, i32 %y) nounwind {
1518; X86-LABEL: blsi_cflag_32:
1519; X86:       # %bb.0:
1520; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1521; X86-NEXT:    testl %eax, %eax
1522; X86-NEXT:    jne .LBB59_1
1523; X86-NEXT:  # %bb.2:
1524; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1525; X86-NEXT:    retl
1526; X86-NEXT:  .LBB59_1:
1527; X86-NEXT:    blsil %eax, %eax
1528; X86-NEXT:    retl
1529;
1530; X64-LABEL: blsi_cflag_32:
1531; X64:       # %bb.0:
1532; X64-NEXT:    blsil %edi, %eax
1533; X64-NEXT:    cmovael %esi, %eax
1534; X64-NEXT:    retq
1535  %tobool = icmp eq i32 %x, 0
1536  %sub = sub nsw i32 0, %x
1537  %and = and i32 %sub, %x
1538  %cond = select i1 %tobool, i32 %y, i32 %and
1539  ret i32 %cond
1540}
1541
1542define i64 @blsi_cflag_64(i64 %x, i64 %y) nounwind {
1543; X86-LABEL: blsi_cflag_64:
1544; X86:       # %bb.0:
1545; X86-NEXT:    pushl %edi
1546; X86-NEXT:    pushl %esi
1547; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1548; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
1549; X86-NEXT:    xorl %edx, %edx
1550; X86-NEXT:    movl %ecx, %eax
1551; X86-NEXT:    negl %eax
1552; X86-NEXT:    sbbl %esi, %edx
1553; X86-NEXT:    movl %ecx, %edi
1554; X86-NEXT:    orl %esi, %edi
1555; X86-NEXT:    jne .LBB60_1
1556; X86-NEXT:  # %bb.2:
1557; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1558; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1559; X86-NEXT:    jmp .LBB60_3
1560; X86-NEXT:  .LBB60_1:
1561; X86-NEXT:    andl %esi, %edx
1562; X86-NEXT:    andl %ecx, %eax
1563; X86-NEXT:  .LBB60_3:
1564; X86-NEXT:    popl %esi
1565; X86-NEXT:    popl %edi
1566; X86-NEXT:    retl
1567;
1568; X64-LABEL: blsi_cflag_64:
1569; X64:       # %bb.0:
1570; X64-NEXT:    blsiq %rdi, %rax
1571; X64-NEXT:    cmovaeq %rsi, %rax
1572; X64-NEXT:    retq
1573  %tobool = icmp eq i64 %x, 0
1574  %sub = sub nsw i64 0, %x
1575  %and = and i64 %sub, %x
1576  %cond = select i1 %tobool, i64 %y, i64 %and
1577  ret i64 %cond
1578}
1579