1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi | FileCheck %s --check-prefixes=X86,X86-SLOW-BEXTR 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+bmi2 | FileCheck %s --check-prefixes=X86,X86-SLOW-BEXTR 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=X64,X64-SLOW-BEXTR 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+bmi,+fast-bextr | FileCheck %s --check-prefixes=X86,X86-FAST-BEXTR 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=X64,X64-FAST-BEXTR 8 9define i32 @andn32(i32 %x, i32 %y) { 10; X86-LABEL: andn32: 11; X86: # %bb.0: 12; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 13; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 14; X86-NEXT: retl 15; 16; X64-LABEL: andn32: 17; X64: # %bb.0: 18; X64-NEXT: andnl %esi, %edi, %eax 19; X64-NEXT: retq 20 %tmp1 = xor i32 %x, -1 21 %tmp2 = and i32 %y, %tmp1 22 ret i32 %tmp2 23} 24 25define i32 @andn32_load(i32 %x, ptr %y) { 26; X86-LABEL: andn32_load: 27; X86: # %bb.0: 28; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 29; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 30; X86-NEXT: andnl (%eax), %ecx, %eax 31; X86-NEXT: retl 32; 33; X64-LABEL: andn32_load: 34; X64: # %bb.0: 35; X64-NEXT: andnl (%rsi), %edi, %eax 36; X64-NEXT: retq 37 %y1 = load i32, ptr %y 38 %tmp1 = xor i32 %x, -1 39 %tmp2 = and i32 %y1, %tmp1 40 ret i32 %tmp2 41} 42 43define i64 @andn64(i64 %x, i64 %y) { 44; X86-LABEL: andn64: 45; X86: # %bb.0: 46; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 47; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 48; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 49; X86-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx 50; X86-NEXT: retl 51; 52; X64-LABEL: andn64: 53; X64: # %bb.0: 54; X64-NEXT: andnq %rsi, %rdi, %rax 55; X64-NEXT: retq 56 %tmp1 = xor i64 %x, -1 57 %tmp2 = and i64 %tmp1, %y 58 ret i64 %tmp2 59} 60 61; Don't choose a 'test' if an 'andn' can be used. 62define i1 @andn_cmp(i32 %x, i32 %y) { 63; X86-LABEL: andn_cmp: 64; X86: # %bb.0: 65; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 66; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 67; X86-NEXT: sete %al 68; X86-NEXT: retl 69; 70; X64-LABEL: andn_cmp: 71; X64: # %bb.0: 72; X64-NEXT: andnl %esi, %edi, %eax 73; X64-NEXT: sete %al 74; X64-NEXT: retq 75 %notx = xor i32 %x, -1 76 %and = and i32 %notx, %y 77 %cmp = icmp eq i32 %and, 0 78 ret i1 %cmp 79} 80 81; Recognize a disguised andn in the following 4 tests. 82define i1 @and_cmp1(i32 %x, i32 %y) { 83; X86-LABEL: and_cmp1: 84; X86: # %bb.0: 85; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 86; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 87; X86-NEXT: sete %al 88; X86-NEXT: retl 89; 90; X64-LABEL: and_cmp1: 91; X64: # %bb.0: 92; X64-NEXT: andnl %esi, %edi, %eax 93; X64-NEXT: sete %al 94; X64-NEXT: retq 95 %and = and i32 %x, %y 96 %cmp = icmp eq i32 %and, %y 97 ret i1 %cmp 98} 99 100define i1 @and_cmp2(i32 %x, i32 %y) { 101; X86-LABEL: and_cmp2: 102; X86: # %bb.0: 103; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 104; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 105; X86-NEXT: setne %al 106; X86-NEXT: retl 107; 108; X64-LABEL: and_cmp2: 109; X64: # %bb.0: 110; X64-NEXT: andnl %esi, %edi, %eax 111; X64-NEXT: setne %al 112; X64-NEXT: retq 113 %and = and i32 %y, %x 114 %cmp = icmp ne i32 %and, %y 115 ret i1 %cmp 116} 117 118define i1 @and_cmp3(i32 %x, i32 %y) { 119; X86-LABEL: and_cmp3: 120; X86: # %bb.0: 121; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 122; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 123; X86-NEXT: sete %al 124; X86-NEXT: retl 125; 126; X64-LABEL: and_cmp3: 127; X64: # %bb.0: 128; X64-NEXT: andnl %esi, %edi, %eax 129; X64-NEXT: sete %al 130; X64-NEXT: retq 131 %and = and i32 %x, %y 132 %cmp = icmp eq i32 %y, %and 133 ret i1 %cmp 134} 135 136define i1 @and_cmp4(i32 %x, i32 %y) { 137; X86-LABEL: and_cmp4: 138; X86: # %bb.0: 139; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 140; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 141; X86-NEXT: setne %al 142; X86-NEXT: retl 143; 144; X64-LABEL: and_cmp4: 145; X64: # %bb.0: 146; X64-NEXT: andnl %esi, %edi, %eax 147; X64-NEXT: setne %al 148; X64-NEXT: retq 149 %and = and i32 %y, %x 150 %cmp = icmp ne i32 %y, %and 151 ret i1 %cmp 152} 153 154; A mask and compare against constant is ok for an 'andn' too 155; even though the BMI instruction doesn't have an immediate form. 156define i1 @and_cmp_const(i32 %x) { 157; X86-LABEL: and_cmp_const: 158; X86: # %bb.0: 159; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 160; X86-NEXT: notl %eax 161; X86-NEXT: testb $43, %al 162; X86-NEXT: sete %al 163; X86-NEXT: retl 164; 165; X64-LABEL: and_cmp_const: 166; X64: # %bb.0: 167; X64-NEXT: notl %edi 168; X64-NEXT: testb $43, %dil 169; X64-NEXT: sete %al 170; X64-NEXT: retq 171 %and = and i32 %x, 43 172 %cmp = icmp eq i32 %and, 43 173 ret i1 %cmp 174} 175 176; But don't use 'andn' if the mask is a power-of-two. 177define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) { 178; X86-LABEL: and_cmp_const_power_of_two: 179; X86: # %bb.0: 180; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 181; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 182; X86-NEXT: btl %ecx, %eax 183; X86-NEXT: setae %al 184; X86-NEXT: retl 185; 186; X64-LABEL: and_cmp_const_power_of_two: 187; X64: # %bb.0: 188; X64-NEXT: btl %esi, %edi 189; X64-NEXT: setae %al 190; X64-NEXT: retq 191 %shl = shl i32 1, %y 192 %and = and i32 %x, %shl 193 %cmp = icmp ne i32 %and, %shl 194 ret i1 %cmp 195} 196 197; Don't transform to 'andn' if there's another use of the 'and'. 198define i32 @and_cmp_not_one_use(i32 %x) { 199; X86-LABEL: and_cmp_not_one_use: 200; X86: # %bb.0: 201; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 202; X86-NEXT: andl $37, %ecx 203; X86-NEXT: xorl %eax, %eax 204; X86-NEXT: cmpl $37, %ecx 205; X86-NEXT: sete %al 206; X86-NEXT: addl %ecx, %eax 207; X86-NEXT: retl 208; 209; X64-LABEL: and_cmp_not_one_use: 210; X64: # %bb.0: 211; X64-NEXT: andl $37, %edi 212; X64-NEXT: xorl %eax, %eax 213; X64-NEXT: cmpl $37, %edi 214; X64-NEXT: sete %al 215; X64-NEXT: addl %edi, %eax 216; X64-NEXT: retq 217 %and = and i32 %x, 37 218 %cmp = icmp eq i32 %and, 37 219 %ext = zext i1 %cmp to i32 220 %add = add i32 %and, %ext 221 ret i32 %add 222} 223 224; Verify that we're not transforming invalid comparison predicates. 225define i1 @not_an_andn1(i32 %x, i32 %y) { 226; X86-LABEL: not_an_andn1: 227; X86: # %bb.0: 228; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 229; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 230; X86-NEXT: andl %eax, %ecx 231; X86-NEXT: cmpl %ecx, %eax 232; X86-NEXT: setg %al 233; X86-NEXT: retl 234; 235; X64-LABEL: not_an_andn1: 236; X64: # %bb.0: 237; X64-NEXT: andl %esi, %edi 238; X64-NEXT: cmpl %edi, %esi 239; X64-NEXT: setg %al 240; X64-NEXT: retq 241 %and = and i32 %x, %y 242 %cmp = icmp sgt i32 %y, %and 243 ret i1 %cmp 244} 245 246define i1 @not_an_andn2(i32 %x, i32 %y) { 247; X86-LABEL: not_an_andn2: 248; X86: # %bb.0: 249; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 250; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 251; X86-NEXT: andl %eax, %ecx 252; X86-NEXT: cmpl %ecx, %eax 253; X86-NEXT: setbe %al 254; X86-NEXT: retl 255; 256; X64-LABEL: not_an_andn2: 257; X64: # %bb.0: 258; X64-NEXT: andl %esi, %edi 259; X64-NEXT: cmpl %edi, %esi 260; X64-NEXT: setbe %al 261; X64-NEXT: retq 262 %and = and i32 %y, %x 263 %cmp = icmp ule i32 %y, %and 264 ret i1 %cmp 265} 266 267; Don't choose a 'test' if an 'andn' can be used. 268define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) { 269; X86-LABEL: andn_cmp_swap_ops: 270; X86: # %bb.0: 271; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 272; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 273; X86-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %ecx 274; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 275; X86-NEXT: orl %ecx, %eax 276; X86-NEXT: sete %al 277; X86-NEXT: retl 278; 279; X64-LABEL: andn_cmp_swap_ops: 280; X64: # %bb.0: 281; X64-NEXT: andnq %rsi, %rdi, %rax 282; X64-NEXT: sete %al 283; X64-NEXT: retq 284 %notx = xor i64 %x, -1 285 %and = and i64 %y, %notx 286 %cmp = icmp eq i64 %and, 0 287 ret i1 %cmp 288} 289 290; Use a 'test' (not an 'and') because 'andn' only works for i32/i64. 291define i1 @andn_cmp_i8(i8 %x, i8 %y) { 292; X86-LABEL: andn_cmp_i8: 293; X86: # %bb.0: 294; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 295; X86-NEXT: notb %al 296; X86-NEXT: testb %al, {{[0-9]+}}(%esp) 297; X86-NEXT: sete %al 298; X86-NEXT: retl 299; 300; X64-LABEL: andn_cmp_i8: 301; X64: # %bb.0: 302; X64-NEXT: notb %sil 303; X64-NEXT: testb %sil, %dil 304; X64-NEXT: sete %al 305; X64-NEXT: retq 306 %noty = xor i8 %y, -1 307 %and = and i8 %x, %noty 308 %cmp = icmp eq i8 %and, 0 309 ret i1 %cmp 310} 311 312; PR48768 - 'andn' clears the overflow flag, so we don't need a separate 'test'. 313define i1 @andn_cmp_i32_overflow(i32 %x, i32 %y) { 314; X86-LABEL: andn_cmp_i32_overflow: 315; X86: # %bb.0: 316; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 317; X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 318; X86-NEXT: setle %al 319; X86-NEXT: retl 320; 321; X64-LABEL: andn_cmp_i32_overflow: 322; X64: # %bb.0: 323; X64-NEXT: andnl %edi, %esi, %eax 324; X64-NEXT: setle %al 325; X64-NEXT: retq 326 %noty = xor i32 %y, -1 327 %and = and i32 %x, %noty 328 %cmp = icmp slt i32 %and, 1 329 ret i1 %cmp 330} 331 332declare i32 @llvm.x86.bmi.bextr.32(i32, i32) 333 334define i32 @bextr32(i32 %x, i32 %y) { 335; X86-LABEL: bextr32: 336; X86: # %bb.0: 337; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 338; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 339; X86-NEXT: retl 340; 341; X64-LABEL: bextr32: 342; X64: # %bb.0: 343; X64-NEXT: bextrl %esi, %edi, %eax 344; X64-NEXT: retq 345 %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y) 346 ret i32 %tmp 347} 348 349define i32 @bextr32_load(ptr %x, i32 %y) { 350; X86-LABEL: bextr32_load: 351; X86: # %bb.0: 352; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 353; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 354; X86-NEXT: bextrl %eax, (%ecx), %eax 355; X86-NEXT: retl 356; 357; X64-LABEL: bextr32_load: 358; X64: # %bb.0: 359; X64-NEXT: bextrl %esi, (%rdi), %eax 360; X64-NEXT: retq 361 %x1 = load i32, ptr %x 362 %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y) 363 ret i32 %tmp 364} 365 366define i32 @bextr32b(i32 %x) uwtable ssp { 367; X86-SLOW-BEXTR-LABEL: bextr32b: 368; X86-SLOW-BEXTR: # %bb.0: 369; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax 370; X86-SLOW-BEXTR-NEXT: shrl $4, %eax 371; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF 372; X86-SLOW-BEXTR-NEXT: retl 373; 374; X64-SLOW-BEXTR-LABEL: bextr32b: 375; X64-SLOW-BEXTR: # %bb.0: 376; X64-SLOW-BEXTR-NEXT: movl %edi, %eax 377; X64-SLOW-BEXTR-NEXT: shrl $4, %eax 378; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF 379; X64-SLOW-BEXTR-NEXT: retq 380; 381; X86-FAST-BEXTR-LABEL: bextr32b: 382; X86-FAST-BEXTR: # %bb.0: 383; X86-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 384; X86-FAST-BEXTR-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 385; X86-FAST-BEXTR-NEXT: retl 386; 387; X64-FAST-BEXTR-LABEL: bextr32b: 388; X64-FAST-BEXTR: # %bb.0: 389; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 390; X64-FAST-BEXTR-NEXT: bextrl %eax, %edi, %eax 391; X64-FAST-BEXTR-NEXT: retq 392 %1 = lshr i32 %x, 4 393 %2 = and i32 %1, 4095 394 ret i32 %2 395} 396 397; Make sure we still use AH subreg trick to extract 15:8 398define i32 @bextr32_subreg(i32 %x) uwtable ssp { 399; X86-LABEL: bextr32_subreg: 400; X86: # %bb.0: 401; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 402; X86-NEXT: retl 403; 404; X64-LABEL: bextr32_subreg: 405; X64: # %bb.0: 406; X64-NEXT: movl %edi, %eax 407; X64-NEXT: movzbl %ah, %eax 408; X64-NEXT: retq 409 %1 = lshr i32 %x, 8 410 %2 = and i32 %1, 255 411 ret i32 %2 412} 413 414define i32 @bextr32b_load(ptr %x) uwtable ssp { 415; X86-SLOW-BEXTR-LABEL: bextr32b_load: 416; X86-SLOW-BEXTR: # %bb.0: 417; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax 418; X86-SLOW-BEXTR-NEXT: movl (%eax), %eax 419; X86-SLOW-BEXTR-NEXT: shrl $4, %eax 420; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF 421; X86-SLOW-BEXTR-NEXT: retl 422; 423; X64-SLOW-BEXTR-LABEL: bextr32b_load: 424; X64-SLOW-BEXTR: # %bb.0: 425; X64-SLOW-BEXTR-NEXT: movl (%rdi), %eax 426; X64-SLOW-BEXTR-NEXT: shrl $4, %eax 427; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF 428; X64-SLOW-BEXTR-NEXT: retq 429; 430; X86-FAST-BEXTR-LABEL: bextr32b_load: 431; X86-FAST-BEXTR: # %bb.0: 432; X86-FAST-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax 433; X86-FAST-BEXTR-NEXT: movl $3076, %ecx # imm = 0xC04 434; X86-FAST-BEXTR-NEXT: bextrl %ecx, (%eax), %eax 435; X86-FAST-BEXTR-NEXT: retl 436; 437; X64-FAST-BEXTR-LABEL: bextr32b_load: 438; X64-FAST-BEXTR: # %bb.0: 439; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 440; X64-FAST-BEXTR-NEXT: bextrl %eax, (%rdi), %eax 441; X64-FAST-BEXTR-NEXT: retq 442 %1 = load i32, ptr %x 443 %2 = lshr i32 %1, 4 444 %3 = and i32 %2, 4095 445 ret i32 %3 446} 447 448; PR34042 449define i32 @bextr32c(i32 %x, i16 zeroext %y) { 450; X86-LABEL: bextr32c: 451; X86: # %bb.0: 452; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax 453; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 454; X86-NEXT: retl 455; 456; X64-LABEL: bextr32c: 457; X64: # %bb.0: 458; X64-NEXT: bextrl %esi, %edi, %eax 459; X64-NEXT: retq 460 %tmp0 = sext i16 %y to i32 461 %tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0) 462 ret i32 %tmp1 463} 464 465define i32 @non_bextr32(i32 %x) { 466; X86-LABEL: non_bextr32: 467; X86: # %bb.0: # %entry 468; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 469; X86-NEXT: shrl $2, %eax 470; X86-NEXT: andl $111, %eax 471; X86-NEXT: retl 472; 473; X64-LABEL: non_bextr32: 474; X64: # %bb.0: # %entry 475; X64-NEXT: movl %edi, %eax 476; X64-NEXT: shrl $2, %eax 477; X64-NEXT: andl $111, %eax 478; X64-NEXT: retq 479entry: 480 %shr = lshr i32 %x, 2 481 %and = and i32 %shr, 111 482 ret i32 %and 483} 484 485define i32 @blsi32(i32 %x) { 486; X86-LABEL: blsi32: 487; X86: # %bb.0: 488; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax 489; X86-NEXT: retl 490; 491; X64-LABEL: blsi32: 492; X64: # %bb.0: 493; X64-NEXT: blsil %edi, %eax 494; X64-NEXT: retq 495 %tmp = sub i32 0, %x 496 %tmp2 = and i32 %x, %tmp 497 ret i32 %tmp2 498} 499 500define i32 @blsi32_load(ptr %x) { 501; X86-LABEL: blsi32_load: 502; X86: # %bb.0: 503; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 504; X86-NEXT: blsil (%eax), %eax 505; X86-NEXT: retl 506; 507; X64-LABEL: blsi32_load: 508; X64: # %bb.0: 509; X64-NEXT: blsil (%rdi), %eax 510; X64-NEXT: retq 511 %x1 = load i32, ptr %x 512 %tmp = sub i32 0, %x1 513 %tmp2 = and i32 %x1, %tmp 514 ret i32 %tmp2 515} 516 517define i32 @blsi32_z(i32 %a, i32 %b) nounwind { 518; X86-LABEL: blsi32_z: 519; X86: # %bb.0: 520; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax 521; X86-NEXT: jne .LBB25_2 522; X86-NEXT: # %bb.1: 523; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 524; X86-NEXT: .LBB25_2: 525; X86-NEXT: retl 526; 527; X64-LABEL: blsi32_z: 528; X64: # %bb.0: 529; X64-NEXT: blsil %edi, %eax 530; X64-NEXT: cmovel %esi, %eax 531; X64-NEXT: retq 532 %t0 = sub i32 0, %a 533 %t1 = and i32 %t0, %a 534 %t2 = icmp eq i32 %t1, 0 535 %t3 = select i1 %t2, i32 %b, i32 %t1 536 ret i32 %t3 537} 538 539define i32 @blsi32_z2(i32 %a, i32 %b, i32 %c) nounwind { 540; X86-LABEL: blsi32_z2: 541; X86: # %bb.0: 542; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax 543; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 544; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 545; X86-NEXT: cmovel %eax, %ecx 546; X86-NEXT: movl (%ecx), %eax 547; X86-NEXT: retl 548; 549; X64-LABEL: blsi32_z2: 550; X64: # %bb.0: 551; X64-NEXT: movl %esi, %eax 552; X64-NEXT: blsil %edi, %ecx 553; X64-NEXT: cmovnel %edx, %eax 554; X64-NEXT: retq 555 %t0 = sub i32 0, %a 556 %t1 = and i32 %t0, %a 557 %t2 = icmp eq i32 %t1, 0 558 %t3 = select i1 %t2, i32 %b, i32 %c 559 ret i32 %t3 560} 561 562; Inspired by PR48768, but using cmovcc instead of setcc. There should be 563; no test instruction. 564define i32 @blsi32_sle(i32 %a, i32 %b, i32 %c) nounwind { 565; X86-LABEL: blsi32_sle: 566; X86: # %bb.0: 567; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax 568; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 569; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 570; X86-NEXT: cmovlel %eax, %ecx 571; X86-NEXT: movl (%ecx), %eax 572; X86-NEXT: retl 573; 574; X64-LABEL: blsi32_sle: 575; X64: # %bb.0: 576; X64-NEXT: movl %esi, %eax 577; X64-NEXT: blsil %edi, %ecx 578; X64-NEXT: cmovgl %edx, %eax 579; X64-NEXT: retq 580 %t0 = sub i32 0, %a 581 %t1 = and i32 %t0, %a 582 %t2 = icmp sle i32 %t1, 0 583 %t3 = select i1 %t2, i32 %b, i32 %c 584 ret i32 %t3 585} 586 587define i64 @blsi64(i64 %x) { 588; X86-LABEL: blsi64: 589; X86: # %bb.0: 590; X86-NEXT: pushl %esi 591; X86-NEXT: .cfi_def_cfa_offset 8 592; X86-NEXT: .cfi_offset %esi, -8 593; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 594; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 595; X86-NEXT: xorl %edx, %edx 596; X86-NEXT: movl %ecx, %eax 597; X86-NEXT: negl %eax 598; X86-NEXT: sbbl %esi, %edx 599; X86-NEXT: andl %esi, %edx 600; X86-NEXT: andl %ecx, %eax 601; X86-NEXT: popl %esi 602; X86-NEXT: .cfi_def_cfa_offset 4 603; X86-NEXT: retl 604; 605; X64-LABEL: blsi64: 606; X64: # %bb.0: 607; X64-NEXT: blsiq %rdi, %rax 608; X64-NEXT: retq 609 %tmp = sub i64 0, %x 610 %tmp2 = and i64 %tmp, %x 611 ret i64 %tmp2 612} 613 614define i64 @blsi64_z(i64 %a, i64 %b) nounwind { 615; X86-LABEL: blsi64_z: 616; X86: # %bb.0: 617; X86-NEXT: pushl %esi 618; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 619; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 620; X86-NEXT: xorl %edx, %edx 621; X86-NEXT: movl %ecx, %eax 622; X86-NEXT: negl %eax 623; X86-NEXT: sbbl %esi, %edx 624; X86-NEXT: andl %esi, %edx 625; X86-NEXT: andl %ecx, %eax 626; X86-NEXT: movl %eax, %ecx 627; X86-NEXT: orl %edx, %ecx 628; X86-NEXT: jne .LBB29_2 629; X86-NEXT: # %bb.1: 630; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 631; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 632; X86-NEXT: .LBB29_2: 633; X86-NEXT: popl %esi 634; X86-NEXT: retl 635; 636; X64-LABEL: blsi64_z: 637; X64: # %bb.0: 638; X64-NEXT: blsiq %rdi, %rax 639; X64-NEXT: cmoveq %rsi, %rax 640; X64-NEXT: retq 641 %t0 = sub i64 0, %a 642 %t1 = and i64 %t0, %a 643 %t2 = icmp eq i64 %t1, 0 644 %t3 = select i1 %t2, i64 %b, i64 %t1 645 ret i64 %t3 646} 647 648define i64 @blsi64_z2(i64 %a, i64 %b, i64 %c) nounwind { 649; X86-LABEL: blsi64_z2: 650; X86: # %bb.0: 651; X86-NEXT: pushl %esi 652; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 653; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 654; X86-NEXT: xorl %edx, %edx 655; X86-NEXT: movl %eax, %esi 656; X86-NEXT: negl %esi 657; X86-NEXT: sbbl %ecx, %edx 658; X86-NEXT: andl %ecx, %edx 659; X86-NEXT: andl %eax, %esi 660; X86-NEXT: orl %edx, %esi 661; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 662; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 663; X86-NEXT: cmovel %eax, %ecx 664; X86-NEXT: movl (%ecx), %eax 665; X86-NEXT: movl 4(%ecx), %edx 666; X86-NEXT: popl %esi 667; X86-NEXT: retl 668; 669; X64-LABEL: blsi64_z2: 670; X64: # %bb.0: 671; X64-NEXT: movq %rsi, %rax 672; X64-NEXT: blsiq %rdi, %rcx 673; X64-NEXT: cmovneq %rdx, %rax 674; X64-NEXT: retq 675 %t0 = sub i64 0, %a 676 %t1 = and i64 %t0, %a 677 %t2 = icmp eq i64 %t1, 0 678 %t3 = select i1 %t2, i64 %b, i64 %c 679 ret i64 %t3 680} 681 682define i64 @blsi64_sle(i64 %a, i64 %b, i64 %c) nounwind { 683; X86-LABEL: blsi64_sle: 684; X86: # %bb.0: 685; X86-NEXT: pushl %esi 686; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 687; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 688; X86-NEXT: xorl %edx, %edx 689; X86-NEXT: movl %eax, %esi 690; X86-NEXT: negl %esi 691; X86-NEXT: sbbl %ecx, %edx 692; X86-NEXT: andl %ecx, %edx 693; X86-NEXT: andl %eax, %esi 694; X86-NEXT: cmpl $1, %esi 695; X86-NEXT: sbbl $0, %edx 696; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 697; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 698; X86-NEXT: cmovll %eax, %ecx 699; X86-NEXT: movl (%ecx), %eax 700; X86-NEXT: movl 4(%ecx), %edx 701; X86-NEXT: popl %esi 702; X86-NEXT: retl 703; 704; X64-LABEL: blsi64_sle: 705; X64: # %bb.0: 706; X64-NEXT: movq %rsi, %rax 707; X64-NEXT: blsiq %rdi, %rcx 708; X64-NEXT: cmovgq %rdx, %rax 709; X64-NEXT: retq 710 %t0 = sub i64 0, %a 711 %t1 = and i64 %t0, %a 712 %t2 = icmp sle i64 %t1, 0 713 %t3 = select i1 %t2, i64 %b, i64 %c 714 ret i64 %t3 715} 716 717define i32 @blsmsk32(i32 %x) { 718; X86-LABEL: blsmsk32: 719; X86: # %bb.0: 720; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax 721; X86-NEXT: retl 722; 723; X64-LABEL: blsmsk32: 724; X64: # %bb.0: 725; X64-NEXT: blsmskl %edi, %eax 726; X64-NEXT: retq 727 %tmp = sub i32 %x, 1 728 %tmp2 = xor i32 %x, %tmp 729 ret i32 %tmp2 730} 731 732define i32 @blsmsk32_load(ptr %x) { 733; X86-LABEL: blsmsk32_load: 734; X86: # %bb.0: 735; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 736; X86-NEXT: blsmskl (%eax), %eax 737; X86-NEXT: retl 738; 739; X64-LABEL: blsmsk32_load: 740; X64: # %bb.0: 741; X64-NEXT: blsmskl (%rdi), %eax 742; X64-NEXT: retq 743 %x1 = load i32, ptr %x 744 %tmp = sub i32 %x1, 1 745 %tmp2 = xor i32 %x1, %tmp 746 ret i32 %tmp2 747} 748 749define i32 @blsmsk32_z(i32 %a, i32 %b) nounwind { 750; X86-LABEL: blsmsk32_z: 751; X86: # %bb.0: 752; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax 753; X86-NEXT: jne .LBB34_2 754; X86-NEXT: # %bb.1: 755; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 756; X86-NEXT: .LBB34_2: 757; X86-NEXT: retl 758; 759; X64-LABEL: blsmsk32_z: 760; X64: # %bb.0: 761; X64-NEXT: blsmskl %edi, %eax 762; X64-NEXT: cmovel %esi, %eax 763; X64-NEXT: retq 764 %t0 = sub i32 %a, 1 765 %t1 = xor i32 %t0, %a 766 %t2 = icmp eq i32 %t1, 0 767 %t3 = select i1 %t2, i32 %b, i32 %t1 768 ret i32 %t3 769} 770 771define i32 @blsmsk32_z2(i32 %a, i32 %b, i32 %c) nounwind { 772; X86-LABEL: blsmsk32_z2: 773; X86: # %bb.0: 774; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax 775; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 776; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 777; X86-NEXT: cmovel %eax, %ecx 778; X86-NEXT: movl (%ecx), %eax 779; X86-NEXT: retl 780; 781; X64-LABEL: blsmsk32_z2: 782; X64: # %bb.0: 783; X64-NEXT: movl %esi, %eax 784; X64-NEXT: blsmskl %edi, %ecx 785; X64-NEXT: cmovnel %edx, %eax 786; X64-NEXT: retq 787 %t0 = sub i32 %a, 1 788 %t1 = xor i32 %t0, %a 789 %t2 = icmp eq i32 %t1, 0 790 %t3 = select i1 %t2, i32 %b, i32 %c 791 ret i32 %t3 792} 793 794define i32 @blsmsk32_sle(i32 %a, i32 %b, i32 %c) nounwind { 795; X86-LABEL: blsmsk32_sle: 796; X86: # %bb.0: 797; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax 798; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 799; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 800; X86-NEXT: cmovlel %eax, %ecx 801; X86-NEXT: movl (%ecx), %eax 802; X86-NEXT: retl 803; 804; X64-LABEL: blsmsk32_sle: 805; X64: # %bb.0: 806; X64-NEXT: movl %esi, %eax 807; X64-NEXT: blsmskl %edi, %ecx 808; X64-NEXT: cmovgl %edx, %eax 809; X64-NEXT: retq 810 %t0 = sub i32 %a, 1 811 %t1 = xor i32 %t0, %a 812 %t2 = icmp sle i32 %t1, 0 813 %t3 = select i1 %t2, i32 %b, i32 %c 814 ret i32 %t3 815} 816 817define i64 @blsmsk64(i64 %x) { 818; X86-LABEL: blsmsk64: 819; X86: # %bb.0: 820; X86-NEXT: pushl %esi 821; X86-NEXT: .cfi_def_cfa_offset 8 822; X86-NEXT: .cfi_offset %esi, -8 823; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 824; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 825; X86-NEXT: movl %ecx, %eax 826; X86-NEXT: addl $-1, %eax 827; X86-NEXT: movl %esi, %edx 828; X86-NEXT: adcl $-1, %edx 829; X86-NEXT: xorl %ecx, %eax 830; X86-NEXT: xorl %esi, %edx 831; X86-NEXT: popl %esi 832; X86-NEXT: .cfi_def_cfa_offset 4 833; X86-NEXT: retl 834; 835; X64-LABEL: blsmsk64: 836; X64: # %bb.0: 837; X64-NEXT: blsmskq %rdi, %rax 838; X64-NEXT: retq 839 %tmp = sub i64 %x, 1 840 %tmp2 = xor i64 %tmp, %x 841 ret i64 %tmp2 842} 843 844define i64 @blsmsk64_z(i64 %a, i64 %b) nounwind { 845; X86-LABEL: blsmsk64_z: 846; X86: # %bb.0: 847; X86-NEXT: pushl %esi 848; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 849; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 850; X86-NEXT: movl %ecx, %eax 851; X86-NEXT: addl $-1, %eax 852; X86-NEXT: movl %esi, %edx 853; X86-NEXT: adcl $-1, %edx 854; X86-NEXT: xorl %ecx, %eax 855; X86-NEXT: xorl %esi, %edx 856; X86-NEXT: movl %eax, %ecx 857; X86-NEXT: orl %edx, %ecx 858; X86-NEXT: jne .LBB38_2 859; X86-NEXT: # %bb.1: 860; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 861; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 862; X86-NEXT: .LBB38_2: 863; X86-NEXT: popl %esi 864; X86-NEXT: retl 865; 866; X64-LABEL: blsmsk64_z: 867; X64: # %bb.0: 868; X64-NEXT: blsmskq %rdi, %rax 869; X64-NEXT: cmoveq %rsi, %rax 870; X64-NEXT: retq 871 %t0 = sub i64 %a, 1 872 %t1 = xor i64 %t0, %a 873 %t2 = icmp eq i64 %t1, 0 874 %t3 = select i1 %t2, i64 %b, i64 %t1 875 ret i64 %t3 876} 877 878define i64 @blsmsk64_z2(i64 %a, i64 %b, i64 %c) nounwind { 879; X86-LABEL: blsmsk64_z2: 880; X86: # %bb.0: 881; X86-NEXT: pushl %esi 882; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 883; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 884; X86-NEXT: movl %eax, %edx 885; X86-NEXT: addl $-1, %edx 886; X86-NEXT: movl %ecx, %esi 887; X86-NEXT: adcl $-1, %esi 888; X86-NEXT: xorl %eax, %edx 889; X86-NEXT: xorl %ecx, %esi 890; X86-NEXT: orl %edx, %esi 891; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 892; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 893; X86-NEXT: cmovel %eax, %ecx 894; X86-NEXT: movl (%ecx), %eax 895; X86-NEXT: movl 4(%ecx), %edx 896; X86-NEXT: popl %esi 897; X86-NEXT: retl 898; 899; X64-LABEL: blsmsk64_z2: 900; X64: # %bb.0: 901; X64-NEXT: movq %rsi, %rax 902; X64-NEXT: blsmskq %rdi, %rcx 903; X64-NEXT: cmovneq %rdx, %rax 904; X64-NEXT: retq 905 %t0 = sub i64 %a, 1 906 %t1 = xor i64 %t0, %a 907 %t2 = icmp eq i64 %t1, 0 908 %t3 = select i1 %t2, i64 %b, i64 %c 909 ret i64 %t3 910} 911 912define i64 @blsmsk64_sle(i64 %a, i64 %b, i64 %c) nounwind { 913; X86-LABEL: blsmsk64_sle: 914; X86: # %bb.0: 915; X86-NEXT: pushl %esi 916; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 917; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 918; X86-NEXT: movl %eax, %edx 919; X86-NEXT: addl $-1, %edx 920; X86-NEXT: movl %ecx, %esi 921; X86-NEXT: adcl $-1, %esi 922; X86-NEXT: xorl %ecx, %esi 923; X86-NEXT: xorl %eax, %edx 924; X86-NEXT: cmpl $1, %edx 925; X86-NEXT: sbbl $0, %esi 926; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 927; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 928; X86-NEXT: cmovll %eax, %ecx 929; X86-NEXT: movl (%ecx), %eax 930; X86-NEXT: movl 4(%ecx), %edx 931; X86-NEXT: popl %esi 932; X86-NEXT: retl 933; 934; X64-LABEL: blsmsk64_sle: 935; X64: # %bb.0: 936; X64-NEXT: movq %rsi, %rax 937; X64-NEXT: blsmskq %rdi, %rcx 938; X64-NEXT: cmovgq %rdx, %rax 939; X64-NEXT: retq 940 %t0 = sub i64 %a, 1 941 %t1 = xor i64 %t0, %a 942 %t2 = icmp sle i64 %t1, 0 943 %t3 = select i1 %t2, i64 %b, i64 %c 944 ret i64 %t3 945} 946 947define i32 @blsr32(i32 %x) { 948; X86-LABEL: blsr32: 949; X86: # %bb.0: 950; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax 951; X86-NEXT: retl 952; 953; X64-LABEL: blsr32: 954; X64: # %bb.0: 955; X64-NEXT: blsrl %edi, %eax 956; X64-NEXT: retq 957 %tmp = sub i32 %x, 1 958 %tmp2 = and i32 %x, %tmp 959 ret i32 %tmp2 960} 961 962define i32 @blsr32_load(ptr %x) { 963; X86-LABEL: blsr32_load: 964; X86: # %bb.0: 965; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 966; X86-NEXT: blsrl (%eax), %eax 967; X86-NEXT: retl 968; 969; X64-LABEL: blsr32_load: 970; X64: # %bb.0: 971; X64-NEXT: blsrl (%rdi), %eax 972; X64-NEXT: retq 973 %x1 = load i32, ptr %x 974 %tmp = sub i32 %x1, 1 975 %tmp2 = and i32 %x1, %tmp 976 ret i32 %tmp2 977} 978 979define i32 @blsr32_z(i32 %a, i32 %b) nounwind { 980; X86-LABEL: blsr32_z: 981; X86: # %bb.0: 982; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax 983; X86-NEXT: jne .LBB43_2 984; X86-NEXT: # %bb.1: 985; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 986; X86-NEXT: .LBB43_2: 987; X86-NEXT: retl 988; 989; X64-LABEL: blsr32_z: 990; X64: # %bb.0: 991; X64-NEXT: blsrl %edi, %eax 992; X64-NEXT: cmovel %esi, %eax 993; X64-NEXT: retq 994 %t0 = sub i32 %a, 1 995 %t1 = and i32 %t0, %a 996 %t2 = icmp eq i32 %t1, 0 997 %t3 = select i1 %t2, i32 %b, i32 %t1 998 ret i32 %t3 999} 1000 1001define i32 @blsr32_z2(i32 %a, i32 %b, i32 %c) nounwind { 1002; X86-LABEL: blsr32_z2: 1003; X86: # %bb.0: 1004; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax 1005; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 1006; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 1007; X86-NEXT: cmovel %eax, %ecx 1008; X86-NEXT: movl (%ecx), %eax 1009; X86-NEXT: retl 1010; 1011; X64-LABEL: blsr32_z2: 1012; X64: # %bb.0: 1013; X64-NEXT: movl %esi, %eax 1014; X64-NEXT: blsrl %edi, %ecx 1015; X64-NEXT: cmovnel %edx, %eax 1016; X64-NEXT: retq 1017 %t0 = sub i32 %a, 1 1018 %t1 = and i32 %t0, %a 1019 %t2 = icmp eq i32 %t1, 0 1020 %t3 = select i1 %t2, i32 %b, i32 %c 1021 ret i32 %t3 1022} 1023 1024define i32 @blsr32_sle(i32 %a, i32 %b, i32 %c) nounwind { 1025; X86-LABEL: blsr32_sle: 1026; X86: # %bb.0: 1027; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax 1028; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 1029; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 1030; X86-NEXT: cmovlel %eax, %ecx 1031; X86-NEXT: movl (%ecx), %eax 1032; X86-NEXT: retl 1033; 1034; X64-LABEL: blsr32_sle: 1035; X64: # %bb.0: 1036; X64-NEXT: movl %esi, %eax 1037; X64-NEXT: blsrl %edi, %ecx 1038; X64-NEXT: cmovgl %edx, %eax 1039; X64-NEXT: retq 1040 %t0 = sub i32 %a, 1 1041 %t1 = and i32 %t0, %a 1042 %t2 = icmp sle i32 %t1, 0 1043 %t3 = select i1 %t2, i32 %b, i32 %c 1044 ret i32 %t3 1045} 1046 1047define i64 @blsr64(i64 %x) { 1048; X86-LABEL: blsr64: 1049; X86: # %bb.0: 1050; X86-NEXT: pushl %esi 1051; X86-NEXT: .cfi_def_cfa_offset 8 1052; X86-NEXT: .cfi_offset %esi, -8 1053; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1054; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1055; X86-NEXT: movl %ecx, %eax 1056; X86-NEXT: addl $-1, %eax 1057; X86-NEXT: movl %esi, %edx 1058; X86-NEXT: adcl $-1, %edx 1059; X86-NEXT: andl %ecx, %eax 1060; X86-NEXT: andl %esi, %edx 1061; X86-NEXT: popl %esi 1062; X86-NEXT: .cfi_def_cfa_offset 4 1063; X86-NEXT: retl 1064; 1065; X64-LABEL: blsr64: 1066; X64: # %bb.0: 1067; X64-NEXT: blsrq %rdi, %rax 1068; X64-NEXT: retq 1069 %tmp = sub i64 %x, 1 1070 %tmp2 = and i64 %tmp, %x 1071 ret i64 %tmp2 1072} 1073 1074define i64 @blsr64_z(i64 %a, i64 %b) nounwind { 1075; X86-LABEL: blsr64_z: 1076; X86: # %bb.0: 1077; X86-NEXT: pushl %esi 1078; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1079; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1080; X86-NEXT: movl %ecx, %eax 1081; X86-NEXT: addl $-1, %eax 1082; X86-NEXT: movl %esi, %edx 1083; X86-NEXT: adcl $-1, %edx 1084; X86-NEXT: andl %ecx, %eax 1085; X86-NEXT: andl %esi, %edx 1086; X86-NEXT: movl %eax, %ecx 1087; X86-NEXT: orl %edx, %ecx 1088; X86-NEXT: jne .LBB47_2 1089; X86-NEXT: # %bb.1: 1090; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1091; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1092; X86-NEXT: .LBB47_2: 1093; X86-NEXT: popl %esi 1094; X86-NEXT: retl 1095; 1096; X64-LABEL: blsr64_z: 1097; X64: # %bb.0: 1098; X64-NEXT: blsrq %rdi, %rax 1099; X64-NEXT: cmoveq %rsi, %rax 1100; X64-NEXT: retq 1101 %t0 = sub i64 %a, 1 1102 %t1 = and i64 %t0, %a 1103 %t2 = icmp eq i64 %t1, 0 1104 %t3 = select i1 %t2, i64 %b, i64 %t1 1105 ret i64 %t3 1106} 1107 1108define i64 @blsr64_z2(i64 %a, i64 %b, i64 %c) nounwind { 1109; X86-LABEL: blsr64_z2: 1110; X86: # %bb.0: 1111; X86-NEXT: pushl %esi 1112; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1113; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1114; X86-NEXT: movl %eax, %edx 1115; X86-NEXT: addl $-1, %edx 1116; X86-NEXT: movl %ecx, %esi 1117; X86-NEXT: adcl $-1, %esi 1118; X86-NEXT: andl %eax, %edx 1119; X86-NEXT: andl %ecx, %esi 1120; X86-NEXT: orl %edx, %esi 1121; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 1122; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 1123; X86-NEXT: cmovel %eax, %ecx 1124; X86-NEXT: movl (%ecx), %eax 1125; X86-NEXT: movl 4(%ecx), %edx 1126; X86-NEXT: popl %esi 1127; X86-NEXT: retl 1128; 1129; X64-LABEL: blsr64_z2: 1130; X64: # %bb.0: 1131; X64-NEXT: movq %rsi, %rax 1132; X64-NEXT: blsrq %rdi, %rcx 1133; X64-NEXT: cmovneq %rdx, %rax 1134; X64-NEXT: retq 1135 %t0 = sub i64 %a, 1 1136 %t1 = and i64 %t0, %a 1137 %t2 = icmp eq i64 %t1, 0 1138 %t3 = select i1 %t2, i64 %b, i64 %c 1139 ret i64 %t3 1140} 1141 1142define i64 @blsr64_sle(i64 %a, i64 %b, i64 %c) nounwind { 1143; X86-LABEL: blsr64_sle: 1144; X86: # %bb.0: 1145; X86-NEXT: pushl %esi 1146; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1147; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1148; X86-NEXT: movl %eax, %edx 1149; X86-NEXT: addl $-1, %edx 1150; X86-NEXT: movl %ecx, %esi 1151; X86-NEXT: adcl $-1, %esi 1152; X86-NEXT: andl %ecx, %esi 1153; X86-NEXT: andl %eax, %edx 1154; X86-NEXT: cmpl $1, %edx 1155; X86-NEXT: sbbl $0, %esi 1156; X86-NEXT: leal {{[0-9]+}}(%esp), %eax 1157; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx 1158; X86-NEXT: cmovll %eax, %ecx 1159; X86-NEXT: movl (%ecx), %eax 1160; X86-NEXT: movl 4(%ecx), %edx 1161; X86-NEXT: popl %esi 1162; X86-NEXT: retl 1163; 1164; X64-LABEL: blsr64_sle: 1165; X64: # %bb.0: 1166; X64-NEXT: movq %rsi, %rax 1167; X64-NEXT: blsrq %rdi, %rcx 1168; X64-NEXT: cmovgq %rdx, %rax 1169; X64-NEXT: retq 1170 %t0 = sub i64 %a, 1 1171 %t1 = and i64 %t0, %a 1172 %t2 = icmp sle i64 %t1, 0 1173 %t3 = select i1 %t2, i64 %b, i64 %c 1174 ret i64 %t3 1175} 1176 1177; PR35792 - https://bugs.llvm.org/show_bug.cgi?id=35792 1178 1179define i64 @blsr_disguised_constant(i64 %x) { 1180; X86-LABEL: blsr_disguised_constant: 1181; X86: # %bb.0: 1182; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax 1183; X86-NEXT: movzwl %ax, %eax 1184; X86-NEXT: xorl %edx, %edx 1185; X86-NEXT: retl 1186; 1187; X64-LABEL: blsr_disguised_constant: 1188; X64: # %bb.0: 1189; X64-NEXT: blsrl %edi, %eax 1190; X64-NEXT: movzwl %ax, %eax 1191; X64-NEXT: retq 1192 %a1 = and i64 %x, 65535 1193 %a2 = add i64 %x, 65535 1194 %r = and i64 %a1, %a2 1195 ret i64 %r 1196} 1197 1198; The add here used to get shrunk, but the and did not thus hiding the blsr pattern. 1199; We now use the knowledge that upper bits of the shift guarantee the and result has 0s in the upper bits to reduce it too. 1200define i64 @blsr_disguised_shrunk_add(i64 %x) { 1201; X86-LABEL: blsr_disguised_shrunk_add: 1202; X86: # %bb.0: 1203; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1204; X86-NEXT: shrl $16, %eax 1205; X86-NEXT: blsrl %eax, %eax 1206; X86-NEXT: xorl %edx, %edx 1207; X86-NEXT: retl 1208; 1209; X64-LABEL: blsr_disguised_shrunk_add: 1210; X64: # %bb.0: 1211; X64-NEXT: shrq $48, %rdi 1212; X64-NEXT: blsrl %edi, %eax 1213; X64-NEXT: retq 1214 %a = lshr i64 %x, 48 1215 %b = add i64 %a, -1 1216 %c = and i64 %b, %a 1217 ret i64 %c 1218} 1219 1220define void @pr40060(i32, i32) { 1221; X86-LABEL: pr40060: 1222; X86: # %bb.0: 1223; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1224; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1225; X86-NEXT: testl %eax, %eax 1226; X86-NEXT: js .LBB52_1 1227; X86-NEXT: # %bb.2: 1228; X86-NEXT: jmp bar # TAILCALL 1229; X86-NEXT: .LBB52_1: 1230; X86-NEXT: retl 1231; 1232; X64-LABEL: pr40060: 1233; X64: # %bb.0: 1234; X64-NEXT: bextrl %esi, %edi, %eax 1235; X64-NEXT: testl %eax, %eax 1236; X64-NEXT: js .LBB52_1 1237; X64-NEXT: # %bb.2: 1238; X64-NEXT: jmp bar # TAILCALL 1239; X64-NEXT: .LBB52_1: 1240; X64-NEXT: retq 1241 %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %0, i32 %1) 1242 %4 = icmp sgt i32 %3, -1 1243 br i1 %4, label %5, label %6 1244 1245 tail call void @bar() 1246 br label %6 1247 1248 ret void 1249} 1250 1251define i32 @blsr32_branch(i32 %x) { 1252; X86-LABEL: blsr32_branch: 1253; X86: # %bb.0: 1254; X86-NEXT: pushl %esi 1255; X86-NEXT: .cfi_def_cfa_offset 8 1256; X86-NEXT: .cfi_offset %esi, -8 1257; X86-NEXT: blsrl {{[0-9]+}}(%esp), %esi 1258; X86-NEXT: jne .LBB53_2 1259; X86-NEXT: # %bb.1: 1260; X86-NEXT: calll bar 1261; X86-NEXT: .LBB53_2: 1262; X86-NEXT: movl %esi, %eax 1263; X86-NEXT: popl %esi 1264; X86-NEXT: .cfi_def_cfa_offset 4 1265; X86-NEXT: retl 1266; 1267; X64-LABEL: blsr32_branch: 1268; X64: # %bb.0: 1269; X64-NEXT: pushq %rbx 1270; X64-NEXT: .cfi_def_cfa_offset 16 1271; X64-NEXT: .cfi_offset %rbx, -16 1272; X64-NEXT: blsrl %edi, %ebx 1273; X64-NEXT: jne .LBB53_2 1274; X64-NEXT: # %bb.1: 1275; X64-NEXT: callq bar 1276; X64-NEXT: .LBB53_2: 1277; X64-NEXT: movl %ebx, %eax 1278; X64-NEXT: popq %rbx 1279; X64-NEXT: .cfi_def_cfa_offset 8 1280; X64-NEXT: retq 1281 %tmp = sub i32 %x, 1 1282 %tmp2 = and i32 %x, %tmp 1283 %cmp = icmp eq i32 %tmp2, 0 1284 br i1 %cmp, label %1, label %2 1285 1286 tail call void @bar() 1287 br label %2 1288 ret i32 %tmp2 1289} 1290 1291define i64 @blsr64_branch(i64 %x) { 1292; X86-LABEL: blsr64_branch: 1293; X86: # %bb.0: 1294; X86-NEXT: pushl %edi 1295; X86-NEXT: .cfi_def_cfa_offset 8 1296; X86-NEXT: pushl %esi 1297; X86-NEXT: .cfi_def_cfa_offset 12 1298; X86-NEXT: .cfi_offset %esi, -12 1299; X86-NEXT: .cfi_offset %edi, -8 1300; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1301; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1302; X86-NEXT: movl %eax, %esi 1303; X86-NEXT: addl $-1, %esi 1304; X86-NEXT: movl %ecx, %edi 1305; X86-NEXT: adcl $-1, %edi 1306; X86-NEXT: andl %eax, %esi 1307; X86-NEXT: andl %ecx, %edi 1308; X86-NEXT: movl %esi, %eax 1309; X86-NEXT: orl %edi, %eax 1310; X86-NEXT: jne .LBB54_2 1311; X86-NEXT: # %bb.1: 1312; X86-NEXT: calll bar 1313; X86-NEXT: .LBB54_2: 1314; X86-NEXT: movl %esi, %eax 1315; X86-NEXT: movl %edi, %edx 1316; X86-NEXT: popl %esi 1317; X86-NEXT: .cfi_def_cfa_offset 8 1318; X86-NEXT: popl %edi 1319; X86-NEXT: .cfi_def_cfa_offset 4 1320; X86-NEXT: retl 1321; 1322; X64-LABEL: blsr64_branch: 1323; X64: # %bb.0: 1324; X64-NEXT: pushq %rbx 1325; X64-NEXT: .cfi_def_cfa_offset 16 1326; X64-NEXT: .cfi_offset %rbx, -16 1327; X64-NEXT: blsrq %rdi, %rbx 1328; X64-NEXT: jne .LBB54_2 1329; X64-NEXT: # %bb.1: 1330; X64-NEXT: callq bar 1331; X64-NEXT: .LBB54_2: 1332; X64-NEXT: movq %rbx, %rax 1333; X64-NEXT: popq %rbx 1334; X64-NEXT: .cfi_def_cfa_offset 8 1335; X64-NEXT: retq 1336 %tmp = sub i64 %x, 1 1337 %tmp2 = and i64 %x, %tmp 1338 %cmp = icmp eq i64 %tmp2, 0 1339 br i1 %cmp, label %1, label %2 1340 1341 tail call void @bar() 1342 br label %2 1343 ret i64 %tmp2 1344} 1345 1346define i32 @blsi32_branch(i32 %x) { 1347; X86-LABEL: blsi32_branch: 1348; X86: # %bb.0: 1349; X86-NEXT: pushl %esi 1350; X86-NEXT: .cfi_def_cfa_offset 8 1351; X86-NEXT: .cfi_offset %esi, -8 1352; X86-NEXT: blsil {{[0-9]+}}(%esp), %esi 1353; X86-NEXT: jne .LBB55_2 1354; X86-NEXT: # %bb.1: 1355; X86-NEXT: calll bar 1356; X86-NEXT: .LBB55_2: 1357; X86-NEXT: movl %esi, %eax 1358; X86-NEXT: popl %esi 1359; X86-NEXT: .cfi_def_cfa_offset 4 1360; X86-NEXT: retl 1361; 1362; X64-LABEL: blsi32_branch: 1363; X64: # %bb.0: 1364; X64-NEXT: pushq %rbx 1365; X64-NEXT: .cfi_def_cfa_offset 16 1366; X64-NEXT: .cfi_offset %rbx, -16 1367; X64-NEXT: blsil %edi, %ebx 1368; X64-NEXT: jne .LBB55_2 1369; X64-NEXT: # %bb.1: 1370; X64-NEXT: callq bar 1371; X64-NEXT: .LBB55_2: 1372; X64-NEXT: movl %ebx, %eax 1373; X64-NEXT: popq %rbx 1374; X64-NEXT: .cfi_def_cfa_offset 8 1375; X64-NEXT: retq 1376 %tmp = sub i32 0, %x 1377 %tmp2 = and i32 %x, %tmp 1378 %cmp = icmp eq i32 %tmp2, 0 1379 br i1 %cmp, label %1, label %2 1380 1381 tail call void @bar() 1382 br label %2 1383 ret i32 %tmp2 1384} 1385 1386define i64 @blsi64_branch(i64 %x) { 1387; X86-LABEL: blsi64_branch: 1388; X86: # %bb.0: 1389; X86-NEXT: pushl %edi 1390; X86-NEXT: .cfi_def_cfa_offset 8 1391; X86-NEXT: pushl %esi 1392; X86-NEXT: .cfi_def_cfa_offset 12 1393; X86-NEXT: .cfi_offset %esi, -12 1394; X86-NEXT: .cfi_offset %edi, -8 1395; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1396; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1397; X86-NEXT: xorl %esi, %esi 1398; X86-NEXT: movl %eax, %edi 1399; X86-NEXT: negl %edi 1400; X86-NEXT: sbbl %ecx, %esi 1401; X86-NEXT: andl %ecx, %esi 1402; X86-NEXT: andl %eax, %edi 1403; X86-NEXT: movl %edi, %eax 1404; X86-NEXT: orl %esi, %eax 1405; X86-NEXT: jne .LBB56_2 1406; X86-NEXT: # %bb.1: 1407; X86-NEXT: calll bar 1408; X86-NEXT: .LBB56_2: 1409; X86-NEXT: movl %edi, %eax 1410; X86-NEXT: movl %esi, %edx 1411; X86-NEXT: popl %esi 1412; X86-NEXT: .cfi_def_cfa_offset 8 1413; X86-NEXT: popl %edi 1414; X86-NEXT: .cfi_def_cfa_offset 4 1415; X86-NEXT: retl 1416; 1417; X64-LABEL: blsi64_branch: 1418; X64: # %bb.0: 1419; X64-NEXT: pushq %rbx 1420; X64-NEXT: .cfi_def_cfa_offset 16 1421; X64-NEXT: .cfi_offset %rbx, -16 1422; X64-NEXT: blsiq %rdi, %rbx 1423; X64-NEXT: jne .LBB56_2 1424; X64-NEXT: # %bb.1: 1425; X64-NEXT: callq bar 1426; X64-NEXT: .LBB56_2: 1427; X64-NEXT: movq %rbx, %rax 1428; X64-NEXT: popq %rbx 1429; X64-NEXT: .cfi_def_cfa_offset 8 1430; X64-NEXT: retq 1431 %tmp = sub i64 0, %x 1432 %tmp2 = and i64 %x, %tmp 1433 %cmp = icmp eq i64 %tmp2, 0 1434 br i1 %cmp, label %1, label %2 1435 1436 tail call void @bar() 1437 br label %2 1438 ret i64 %tmp2 1439} 1440 1441declare dso_local void @bar() 1442 1443define void @pr42118_i32(i32 %x) { 1444; X86-LABEL: pr42118_i32: 1445; X86: # %bb.0: 1446; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax 1447; X86-NEXT: jne .LBB57_1 1448; X86-NEXT: # %bb.2: 1449; X86-NEXT: jmp bar # TAILCALL 1450; X86-NEXT: .LBB57_1: 1451; X86-NEXT: retl 1452; 1453; X64-LABEL: pr42118_i32: 1454; X64: # %bb.0: 1455; X64-NEXT: blsrl %edi, %eax 1456; X64-NEXT: jne .LBB57_1 1457; X64-NEXT: # %bb.2: 1458; X64-NEXT: jmp bar # TAILCALL 1459; X64-NEXT: .LBB57_1: 1460; X64-NEXT: retq 1461 %tmp = sub i32 0, %x 1462 %tmp1 = and i32 %tmp, %x 1463 %cmp = icmp eq i32 %tmp1, %x 1464 br i1 %cmp, label %1, label %2 1465 1466 tail call void @bar() 1467 br label %2 1468 1469 ret void 1470} 1471 1472define void @pr42118_i64(i64 %x) { 1473; X86-LABEL: pr42118_i64: 1474; X86: # %bb.0: 1475; X86-NEXT: pushl %esi 1476; X86-NEXT: .cfi_def_cfa_offset 8 1477; X86-NEXT: .cfi_offset %esi, -8 1478; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1479; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1480; X86-NEXT: movl %eax, %edx 1481; X86-NEXT: addl $-1, %edx 1482; X86-NEXT: movl %ecx, %esi 1483; X86-NEXT: adcl $-1, %esi 1484; X86-NEXT: andl %eax, %edx 1485; X86-NEXT: andl %ecx, %esi 1486; X86-NEXT: orl %edx, %esi 1487; X86-NEXT: jne .LBB58_1 1488; X86-NEXT: # %bb.2: 1489; X86-NEXT: popl %esi 1490; X86-NEXT: .cfi_def_cfa_offset 4 1491; X86-NEXT: jmp bar # TAILCALL 1492; X86-NEXT: .LBB58_1: 1493; X86-NEXT: .cfi_def_cfa_offset 8 1494; X86-NEXT: popl %esi 1495; X86-NEXT: .cfi_def_cfa_offset 4 1496; X86-NEXT: retl 1497; 1498; X64-LABEL: pr42118_i64: 1499; X64: # %bb.0: 1500; X64-NEXT: blsrq %rdi, %rax 1501; X64-NEXT: jne .LBB58_1 1502; X64-NEXT: # %bb.2: 1503; X64-NEXT: jmp bar # TAILCALL 1504; X64-NEXT: .LBB58_1: 1505; X64-NEXT: retq 1506 %tmp = sub i64 0, %x 1507 %tmp1 = and i64 %tmp, %x 1508 %cmp = icmp eq i64 %tmp1, %x 1509 br i1 %cmp, label %1, label %2 1510 1511 tail call void @bar() 1512 br label %2 1513 1514 ret void 1515} 1516 1517define i32 @blsi_cflag_32(i32 %x, i32 %y) nounwind { 1518; X86-LABEL: blsi_cflag_32: 1519; X86: # %bb.0: 1520; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1521; X86-NEXT: testl %eax, %eax 1522; X86-NEXT: jne .LBB59_1 1523; X86-NEXT: # %bb.2: 1524; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1525; X86-NEXT: retl 1526; X86-NEXT: .LBB59_1: 1527; X86-NEXT: blsil %eax, %eax 1528; X86-NEXT: retl 1529; 1530; X64-LABEL: blsi_cflag_32: 1531; X64: # %bb.0: 1532; X64-NEXT: blsil %edi, %eax 1533; X64-NEXT: cmovael %esi, %eax 1534; X64-NEXT: retq 1535 %tobool = icmp eq i32 %x, 0 1536 %sub = sub nsw i32 0, %x 1537 %and = and i32 %sub, %x 1538 %cond = select i1 %tobool, i32 %y, i32 %and 1539 ret i32 %cond 1540} 1541 1542define i64 @blsi_cflag_64(i64 %x, i64 %y) nounwind { 1543; X86-LABEL: blsi_cflag_64: 1544; X86: # %bb.0: 1545; X86-NEXT: pushl %edi 1546; X86-NEXT: pushl %esi 1547; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1548; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1549; X86-NEXT: xorl %edx, %edx 1550; X86-NEXT: movl %ecx, %eax 1551; X86-NEXT: negl %eax 1552; X86-NEXT: sbbl %esi, %edx 1553; X86-NEXT: movl %ecx, %edi 1554; X86-NEXT: orl %esi, %edi 1555; X86-NEXT: jne .LBB60_1 1556; X86-NEXT: # %bb.2: 1557; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1558; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1559; X86-NEXT: jmp .LBB60_3 1560; X86-NEXT: .LBB60_1: 1561; X86-NEXT: andl %esi, %edx 1562; X86-NEXT: andl %ecx, %eax 1563; X86-NEXT: .LBB60_3: 1564; X86-NEXT: popl %esi 1565; X86-NEXT: popl %edi 1566; X86-NEXT: retl 1567; 1568; X64-LABEL: blsi_cflag_64: 1569; X64: # %bb.0: 1570; X64-NEXT: blsiq %rdi, %rax 1571; X64-NEXT: cmovaeq %rsi, %rax 1572; X64-NEXT: retq 1573 %tobool = icmp eq i64 %x, 0 1574 %sub = sub nsw i64 0, %x 1575 %and = and i64 %sub, %x 1576 %cond = select i1 %tobool, i64 %y, i64 %and 1577 ret i64 %cond 1578} 1579