1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI 3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1 4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1 5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2 6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2 7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI 8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1NOTBM 9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1TBM 10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2TBM 11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2NOTBM 12 13; *Please* keep in sync with test/CodeGen/AArch64/extract-lowbits.ll 14 15; https://bugs.llvm.org/show_bug.cgi?id=36419 16; https://bugs.llvm.org/show_bug.cgi?id=37603 17; https://bugs.llvm.org/show_bug.cgi?id=37610 18 19; Patterns: 20; a) x & (1 << nbits) - 1 21; b) x & ~(-1 << nbits) 22; c) x & (-1 >> (32 - y)) 23; d) x << (32 - y) >> (32 - y) 24; are equivalent. 25 26; ---------------------------------------------------------------------------- ; 27; Pattern a. 32-bit 28; ---------------------------------------------------------------------------- ; 29 30define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { 31; X86-NOBMI-LABEL: bzhi32_a0: 32; X86-NOBMI: # %bb.0: 33; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 34; X86-NOBMI-NEXT: movl $1, %eax 35; X86-NOBMI-NEXT: shll %cl, %eax 36; X86-NOBMI-NEXT: decl %eax 37; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 38; X86-NOBMI-NEXT: retl 39; 40; X86-BMI1-LABEL: bzhi32_a0: 41; X86-BMI1: # %bb.0: 42; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 43; X86-BMI1-NEXT: shll $8, %eax 44; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 45; X86-BMI1-NEXT: retl 46; 47; X86-BMI2-LABEL: bzhi32_a0: 48; X86-BMI2: # %bb.0: 49; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 50; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 51; X86-BMI2-NEXT: retl 52; 53; X64-NOBMI-LABEL: bzhi32_a0: 54; X64-NOBMI: # %bb.0: 55; X64-NOBMI-NEXT: movl %esi, %ecx 56; X64-NOBMI-NEXT: movl $1, %eax 57; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 58; X64-NOBMI-NEXT: shll %cl, %eax 59; X64-NOBMI-NEXT: decl %eax 60; X64-NOBMI-NEXT: andl %edi, %eax 61; X64-NOBMI-NEXT: retq 62; 63; X64-BMI1-LABEL: bzhi32_a0: 64; X64-BMI1: # %bb.0: 65; X64-BMI1-NEXT: shll $8, %esi 66; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 67; X64-BMI1-NEXT: retq 68; 69; X64-BMI2-LABEL: bzhi32_a0: 70; X64-BMI2: # %bb.0: 71; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 72; X64-BMI2-NEXT: retq 73 %onebit = shl i32 1, %numlowbits 74 %mask = add nsw i32 %onebit, -1 75 %masked = and i32 %mask, %val 76 ret i32 %masked 77} 78 79define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { 80; X86-NOBMI-LABEL: bzhi32_a1_indexzext: 81; X86-NOBMI: # %bb.0: 82; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 83; X86-NOBMI-NEXT: movl $1, %eax 84; X86-NOBMI-NEXT: shll %cl, %eax 85; X86-NOBMI-NEXT: decl %eax 86; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 87; X86-NOBMI-NEXT: retl 88; 89; X86-BMI1-LABEL: bzhi32_a1_indexzext: 90; X86-BMI1: # %bb.0: 91; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 92; X86-BMI1-NEXT: shll $8, %eax 93; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 94; X86-BMI1-NEXT: retl 95; 96; X86-BMI2-LABEL: bzhi32_a1_indexzext: 97; X86-BMI2: # %bb.0: 98; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 99; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 100; X86-BMI2-NEXT: retl 101; 102; X64-NOBMI-LABEL: bzhi32_a1_indexzext: 103; X64-NOBMI: # %bb.0: 104; X64-NOBMI-NEXT: movl %esi, %ecx 105; X64-NOBMI-NEXT: movl $1, %eax 106; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 107; X64-NOBMI-NEXT: shll %cl, %eax 108; X64-NOBMI-NEXT: decl %eax 109; X64-NOBMI-NEXT: andl %edi, %eax 110; X64-NOBMI-NEXT: retq 111; 112; X64-BMI1-LABEL: bzhi32_a1_indexzext: 113; X64-BMI1: # %bb.0: 114; X64-BMI1-NEXT: shll $8, %esi 115; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 116; X64-BMI1-NEXT: retq 117; 118; X64-BMI2-LABEL: bzhi32_a1_indexzext: 119; X64-BMI2: # %bb.0: 120; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 121; X64-BMI2-NEXT: retq 122 %conv = zext i8 %numlowbits to i32 123 %onebit = shl i32 1, %conv 124 %mask = add nsw i32 %onebit, -1 125 %masked = and i32 %mask, %val 126 ret i32 %masked 127} 128 129define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind { 130; X86-NOBMI-LABEL: bzhi32_a2_load: 131; X86-NOBMI: # %bb.0: 132; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 133; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 134; X86-NOBMI-NEXT: movl $1, %eax 135; X86-NOBMI-NEXT: shll %cl, %eax 136; X86-NOBMI-NEXT: decl %eax 137; X86-NOBMI-NEXT: andl (%edx), %eax 138; X86-NOBMI-NEXT: retl 139; 140; X86-BMI1-LABEL: bzhi32_a2_load: 141; X86-BMI1: # %bb.0: 142; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 143; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 144; X86-BMI1-NEXT: shll $8, %ecx 145; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax 146; X86-BMI1-NEXT: retl 147; 148; X86-BMI2-LABEL: bzhi32_a2_load: 149; X86-BMI2: # %bb.0: 150; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 151; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 152; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax 153; X86-BMI2-NEXT: retl 154; 155; X64-NOBMI-LABEL: bzhi32_a2_load: 156; X64-NOBMI: # %bb.0: 157; X64-NOBMI-NEXT: movl %esi, %ecx 158; X64-NOBMI-NEXT: movl $1, %eax 159; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 160; X64-NOBMI-NEXT: shll %cl, %eax 161; X64-NOBMI-NEXT: decl %eax 162; X64-NOBMI-NEXT: andl (%rdi), %eax 163; X64-NOBMI-NEXT: retq 164; 165; X64-BMI1-LABEL: bzhi32_a2_load: 166; X64-BMI1: # %bb.0: 167; X64-BMI1-NEXT: shll $8, %esi 168; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax 169; X64-BMI1-NEXT: retq 170; 171; X64-BMI2-LABEL: bzhi32_a2_load: 172; X64-BMI2: # %bb.0: 173; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 174; X64-BMI2-NEXT: retq 175 %val = load i32, ptr %w 176 %onebit = shl i32 1, %numlowbits 177 %mask = add nsw i32 %onebit, -1 178 %masked = and i32 %mask, %val 179 ret i32 %masked 180} 181 182define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { 183; X86-NOBMI-LABEL: bzhi32_a3_load_indexzext: 184; X86-NOBMI: # %bb.0: 185; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 186; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 187; X86-NOBMI-NEXT: movl $1, %eax 188; X86-NOBMI-NEXT: shll %cl, %eax 189; X86-NOBMI-NEXT: decl %eax 190; X86-NOBMI-NEXT: andl (%edx), %eax 191; X86-NOBMI-NEXT: retl 192; 193; X86-BMI1-LABEL: bzhi32_a3_load_indexzext: 194; X86-BMI1: # %bb.0: 195; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 196; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 197; X86-BMI1-NEXT: shll $8, %ecx 198; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax 199; X86-BMI1-NEXT: retl 200; 201; X86-BMI2-LABEL: bzhi32_a3_load_indexzext: 202; X86-BMI2: # %bb.0: 203; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 204; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 205; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax 206; X86-BMI2-NEXT: retl 207; 208; X64-NOBMI-LABEL: bzhi32_a3_load_indexzext: 209; X64-NOBMI: # %bb.0: 210; X64-NOBMI-NEXT: movl %esi, %ecx 211; X64-NOBMI-NEXT: movl $1, %eax 212; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 213; X64-NOBMI-NEXT: shll %cl, %eax 214; X64-NOBMI-NEXT: decl %eax 215; X64-NOBMI-NEXT: andl (%rdi), %eax 216; X64-NOBMI-NEXT: retq 217; 218; X64-BMI1-LABEL: bzhi32_a3_load_indexzext: 219; X64-BMI1: # %bb.0: 220; X64-BMI1-NEXT: shll $8, %esi 221; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax 222; X64-BMI1-NEXT: retq 223; 224; X64-BMI2-LABEL: bzhi32_a3_load_indexzext: 225; X64-BMI2: # %bb.0: 226; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 227; X64-BMI2-NEXT: retq 228 %val = load i32, ptr %w 229 %conv = zext i8 %numlowbits to i32 230 %onebit = shl i32 1, %conv 231 %mask = add nsw i32 %onebit, -1 232 %masked = and i32 %mask, %val 233 ret i32 %masked 234} 235 236define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { 237; X86-NOBMI-LABEL: bzhi32_a4_commutative: 238; X86-NOBMI: # %bb.0: 239; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 240; X86-NOBMI-NEXT: movl $1, %eax 241; X86-NOBMI-NEXT: shll %cl, %eax 242; X86-NOBMI-NEXT: decl %eax 243; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 244; X86-NOBMI-NEXT: retl 245; 246; X86-BMI1-LABEL: bzhi32_a4_commutative: 247; X86-BMI1: # %bb.0: 248; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 249; X86-BMI1-NEXT: shll $8, %eax 250; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 251; X86-BMI1-NEXT: retl 252; 253; X86-BMI2-LABEL: bzhi32_a4_commutative: 254; X86-BMI2: # %bb.0: 255; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 256; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 257; X86-BMI2-NEXT: retl 258; 259; X64-NOBMI-LABEL: bzhi32_a4_commutative: 260; X64-NOBMI: # %bb.0: 261; X64-NOBMI-NEXT: movl %esi, %ecx 262; X64-NOBMI-NEXT: movl $1, %eax 263; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 264; X64-NOBMI-NEXT: shll %cl, %eax 265; X64-NOBMI-NEXT: decl %eax 266; X64-NOBMI-NEXT: andl %edi, %eax 267; X64-NOBMI-NEXT: retq 268; 269; X64-BMI1-LABEL: bzhi32_a4_commutative: 270; X64-BMI1: # %bb.0: 271; X64-BMI1-NEXT: shll $8, %esi 272; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 273; X64-BMI1-NEXT: retq 274; 275; X64-BMI2-LABEL: bzhi32_a4_commutative: 276; X64-BMI2: # %bb.0: 277; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 278; X64-BMI2-NEXT: retq 279 %onebit = shl i32 1, %numlowbits 280 %mask = add nsw i32 %onebit, -1 281 %masked = and i32 %val, %mask ; swapped order 282 ret i32 %masked 283} 284 285; 64-bit 286 287define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { 288; X86-NOBMI-LABEL: bzhi64_a0: 289; X86-NOBMI: # %bb.0: 290; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 291; X86-NOBMI-NEXT: movl $1, %eax 292; X86-NOBMI-NEXT: xorl %edx, %edx 293; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 294; X86-NOBMI-NEXT: shll %cl, %eax 295; X86-NOBMI-NEXT: testb $32, %cl 296; X86-NOBMI-NEXT: je .LBB5_2 297; X86-NOBMI-NEXT: # %bb.1: 298; X86-NOBMI-NEXT: movl %eax, %edx 299; X86-NOBMI-NEXT: xorl %eax, %eax 300; X86-NOBMI-NEXT: .LBB5_2: 301; X86-NOBMI-NEXT: addl $-1, %eax 302; X86-NOBMI-NEXT: adcl $-1, %edx 303; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 304; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 305; X86-NOBMI-NEXT: retl 306; 307; X86-BMI1-LABEL: bzhi64_a0: 308; X86-BMI1: # %bb.0: 309; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 310; X86-BMI1-NEXT: movl $1, %eax 311; X86-BMI1-NEXT: xorl %edx, %edx 312; X86-BMI1-NEXT: shldl %cl, %eax, %edx 313; X86-BMI1-NEXT: shll %cl, %eax 314; X86-BMI1-NEXT: testb $32, %cl 315; X86-BMI1-NEXT: je .LBB5_2 316; X86-BMI1-NEXT: # %bb.1: 317; X86-BMI1-NEXT: movl %eax, %edx 318; X86-BMI1-NEXT: xorl %eax, %eax 319; X86-BMI1-NEXT: .LBB5_2: 320; X86-BMI1-NEXT: addl $-1, %eax 321; X86-BMI1-NEXT: adcl $-1, %edx 322; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 323; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 324; X86-BMI1-NEXT: retl 325; 326; X86-BMI2-LABEL: bzhi64_a0: 327; X86-BMI2: # %bb.0: 328; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 329; X86-BMI2-NEXT: movl $1, %eax 330; X86-BMI2-NEXT: xorl %edx, %edx 331; X86-BMI2-NEXT: shldl %cl, %eax, %edx 332; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 333; X86-BMI2-NEXT: testb $32, %cl 334; X86-BMI2-NEXT: je .LBB5_2 335; X86-BMI2-NEXT: # %bb.1: 336; X86-BMI2-NEXT: movl %eax, %edx 337; X86-BMI2-NEXT: xorl %eax, %eax 338; X86-BMI2-NEXT: .LBB5_2: 339; X86-BMI2-NEXT: addl $-1, %eax 340; X86-BMI2-NEXT: adcl $-1, %edx 341; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 342; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 343; X86-BMI2-NEXT: retl 344; 345; X64-NOBMI-LABEL: bzhi64_a0: 346; X64-NOBMI: # %bb.0: 347; X64-NOBMI-NEXT: movq %rsi, %rcx 348; X64-NOBMI-NEXT: movl $1, %eax 349; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 350; X64-NOBMI-NEXT: shlq %cl, %rax 351; X64-NOBMI-NEXT: decq %rax 352; X64-NOBMI-NEXT: andq %rdi, %rax 353; X64-NOBMI-NEXT: retq 354; 355; X64-BMI1-LABEL: bzhi64_a0: 356; X64-BMI1: # %bb.0: 357; X64-BMI1-NEXT: shll $8, %esi 358; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 359; X64-BMI1-NEXT: retq 360; 361; X64-BMI2-LABEL: bzhi64_a0: 362; X64-BMI2: # %bb.0: 363; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 364; X64-BMI2-NEXT: retq 365 %onebit = shl i64 1, %numlowbits 366 %mask = add nsw i64 %onebit, -1 367 %masked = and i64 %mask, %val 368 ret i64 %masked 369} 370 371define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { 372; X86-NOBMI-LABEL: bzhi64_a1_indexzext: 373; X86-NOBMI: # %bb.0: 374; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 375; X86-NOBMI-NEXT: movl $1, %eax 376; X86-NOBMI-NEXT: xorl %edx, %edx 377; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 378; X86-NOBMI-NEXT: shll %cl, %eax 379; X86-NOBMI-NEXT: testb $32, %cl 380; X86-NOBMI-NEXT: je .LBB6_2 381; X86-NOBMI-NEXT: # %bb.1: 382; X86-NOBMI-NEXT: movl %eax, %edx 383; X86-NOBMI-NEXT: xorl %eax, %eax 384; X86-NOBMI-NEXT: .LBB6_2: 385; X86-NOBMI-NEXT: addl $-1, %eax 386; X86-NOBMI-NEXT: adcl $-1, %edx 387; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 388; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 389; X86-NOBMI-NEXT: retl 390; 391; X86-BMI1-LABEL: bzhi64_a1_indexzext: 392; X86-BMI1: # %bb.0: 393; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 394; X86-BMI1-NEXT: movl $1, %eax 395; X86-BMI1-NEXT: xorl %edx, %edx 396; X86-BMI1-NEXT: shldl %cl, %eax, %edx 397; X86-BMI1-NEXT: shll %cl, %eax 398; X86-BMI1-NEXT: testb $32, %cl 399; X86-BMI1-NEXT: je .LBB6_2 400; X86-BMI1-NEXT: # %bb.1: 401; X86-BMI1-NEXT: movl %eax, %edx 402; X86-BMI1-NEXT: xorl %eax, %eax 403; X86-BMI1-NEXT: .LBB6_2: 404; X86-BMI1-NEXT: addl $-1, %eax 405; X86-BMI1-NEXT: adcl $-1, %edx 406; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 407; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 408; X86-BMI1-NEXT: retl 409; 410; X86-BMI2-LABEL: bzhi64_a1_indexzext: 411; X86-BMI2: # %bb.0: 412; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 413; X86-BMI2-NEXT: movl $1, %eax 414; X86-BMI2-NEXT: xorl %edx, %edx 415; X86-BMI2-NEXT: shldl %cl, %eax, %edx 416; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 417; X86-BMI2-NEXT: testb $32, %cl 418; X86-BMI2-NEXT: je .LBB6_2 419; X86-BMI2-NEXT: # %bb.1: 420; X86-BMI2-NEXT: movl %eax, %edx 421; X86-BMI2-NEXT: xorl %eax, %eax 422; X86-BMI2-NEXT: .LBB6_2: 423; X86-BMI2-NEXT: addl $-1, %eax 424; X86-BMI2-NEXT: adcl $-1, %edx 425; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 426; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 427; X86-BMI2-NEXT: retl 428; 429; X64-NOBMI-LABEL: bzhi64_a1_indexzext: 430; X64-NOBMI: # %bb.0: 431; X64-NOBMI-NEXT: movl %esi, %ecx 432; X64-NOBMI-NEXT: movl $1, %eax 433; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 434; X64-NOBMI-NEXT: shlq %cl, %rax 435; X64-NOBMI-NEXT: decq %rax 436; X64-NOBMI-NEXT: andq %rdi, %rax 437; X64-NOBMI-NEXT: retq 438; 439; X64-BMI1-LABEL: bzhi64_a1_indexzext: 440; X64-BMI1: # %bb.0: 441; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi 442; X64-BMI1-NEXT: shll $8, %esi 443; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 444; X64-BMI1-NEXT: retq 445; 446; X64-BMI2-LABEL: bzhi64_a1_indexzext: 447; X64-BMI2: # %bb.0: 448; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 449; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 450; X64-BMI2-NEXT: retq 451 %conv = zext i8 %numlowbits to i64 452 %onebit = shl i64 1, %conv 453 %mask = add nsw i64 %onebit, -1 454 %masked = and i64 %mask, %val 455 ret i64 %masked 456} 457 458define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind { 459; X86-NOBMI-LABEL: bzhi64_a2_load: 460; X86-NOBMI: # %bb.0: 461; X86-NOBMI-NEXT: pushl %esi 462; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 463; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 464; X86-NOBMI-NEXT: movl $1, %eax 465; X86-NOBMI-NEXT: xorl %edx, %edx 466; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 467; X86-NOBMI-NEXT: shll %cl, %eax 468; X86-NOBMI-NEXT: testb $32, %cl 469; X86-NOBMI-NEXT: je .LBB7_2 470; X86-NOBMI-NEXT: # %bb.1: 471; X86-NOBMI-NEXT: movl %eax, %edx 472; X86-NOBMI-NEXT: xorl %eax, %eax 473; X86-NOBMI-NEXT: .LBB7_2: 474; X86-NOBMI-NEXT: addl $-1, %eax 475; X86-NOBMI-NEXT: adcl $-1, %edx 476; X86-NOBMI-NEXT: andl 4(%esi), %edx 477; X86-NOBMI-NEXT: andl (%esi), %eax 478; X86-NOBMI-NEXT: popl %esi 479; X86-NOBMI-NEXT: retl 480; 481; X86-BMI1-LABEL: bzhi64_a2_load: 482; X86-BMI1: # %bb.0: 483; X86-BMI1-NEXT: pushl %esi 484; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 485; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 486; X86-BMI1-NEXT: movl $1, %eax 487; X86-BMI1-NEXT: xorl %edx, %edx 488; X86-BMI1-NEXT: shldl %cl, %eax, %edx 489; X86-BMI1-NEXT: shll %cl, %eax 490; X86-BMI1-NEXT: testb $32, %cl 491; X86-BMI1-NEXT: je .LBB7_2 492; X86-BMI1-NEXT: # %bb.1: 493; X86-BMI1-NEXT: movl %eax, %edx 494; X86-BMI1-NEXT: xorl %eax, %eax 495; X86-BMI1-NEXT: .LBB7_2: 496; X86-BMI1-NEXT: addl $-1, %eax 497; X86-BMI1-NEXT: adcl $-1, %edx 498; X86-BMI1-NEXT: andl 4(%esi), %edx 499; X86-BMI1-NEXT: andl (%esi), %eax 500; X86-BMI1-NEXT: popl %esi 501; X86-BMI1-NEXT: retl 502; 503; X86-BMI2-LABEL: bzhi64_a2_load: 504; X86-BMI2: # %bb.0: 505; X86-BMI2-NEXT: pushl %esi 506; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 507; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 508; X86-BMI2-NEXT: movl $1, %eax 509; X86-BMI2-NEXT: xorl %edx, %edx 510; X86-BMI2-NEXT: shldl %cl, %eax, %edx 511; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 512; X86-BMI2-NEXT: testb $32, %cl 513; X86-BMI2-NEXT: je .LBB7_2 514; X86-BMI2-NEXT: # %bb.1: 515; X86-BMI2-NEXT: movl %eax, %edx 516; X86-BMI2-NEXT: xorl %eax, %eax 517; X86-BMI2-NEXT: .LBB7_2: 518; X86-BMI2-NEXT: addl $-1, %eax 519; X86-BMI2-NEXT: adcl $-1, %edx 520; X86-BMI2-NEXT: andl 4(%esi), %edx 521; X86-BMI2-NEXT: andl (%esi), %eax 522; X86-BMI2-NEXT: popl %esi 523; X86-BMI2-NEXT: retl 524; 525; X64-NOBMI-LABEL: bzhi64_a2_load: 526; X64-NOBMI: # %bb.0: 527; X64-NOBMI-NEXT: movq %rsi, %rcx 528; X64-NOBMI-NEXT: movl $1, %eax 529; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 530; X64-NOBMI-NEXT: shlq %cl, %rax 531; X64-NOBMI-NEXT: decq %rax 532; X64-NOBMI-NEXT: andq (%rdi), %rax 533; X64-NOBMI-NEXT: retq 534; 535; X64-BMI1-LABEL: bzhi64_a2_load: 536; X64-BMI1: # %bb.0: 537; X64-BMI1-NEXT: shll $8, %esi 538; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax 539; X64-BMI1-NEXT: retq 540; 541; X64-BMI2-LABEL: bzhi64_a2_load: 542; X64-BMI2: # %bb.0: 543; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 544; X64-BMI2-NEXT: retq 545 %val = load i64, ptr %w 546 %onebit = shl i64 1, %numlowbits 547 %mask = add nsw i64 %onebit, -1 548 %masked = and i64 %mask, %val 549 ret i64 %masked 550} 551 552define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { 553; X86-NOBMI-LABEL: bzhi64_a3_load_indexzext: 554; X86-NOBMI: # %bb.0: 555; X86-NOBMI-NEXT: pushl %esi 556; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 557; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 558; X86-NOBMI-NEXT: movl $1, %eax 559; X86-NOBMI-NEXT: xorl %edx, %edx 560; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 561; X86-NOBMI-NEXT: shll %cl, %eax 562; X86-NOBMI-NEXT: testb $32, %cl 563; X86-NOBMI-NEXT: je .LBB8_2 564; X86-NOBMI-NEXT: # %bb.1: 565; X86-NOBMI-NEXT: movl %eax, %edx 566; X86-NOBMI-NEXT: xorl %eax, %eax 567; X86-NOBMI-NEXT: .LBB8_2: 568; X86-NOBMI-NEXT: addl $-1, %eax 569; X86-NOBMI-NEXT: adcl $-1, %edx 570; X86-NOBMI-NEXT: andl 4(%esi), %edx 571; X86-NOBMI-NEXT: andl (%esi), %eax 572; X86-NOBMI-NEXT: popl %esi 573; X86-NOBMI-NEXT: retl 574; 575; X86-BMI1-LABEL: bzhi64_a3_load_indexzext: 576; X86-BMI1: # %bb.0: 577; X86-BMI1-NEXT: pushl %esi 578; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 579; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 580; X86-BMI1-NEXT: movl $1, %eax 581; X86-BMI1-NEXT: xorl %edx, %edx 582; X86-BMI1-NEXT: shldl %cl, %eax, %edx 583; X86-BMI1-NEXT: shll %cl, %eax 584; X86-BMI1-NEXT: testb $32, %cl 585; X86-BMI1-NEXT: je .LBB8_2 586; X86-BMI1-NEXT: # %bb.1: 587; X86-BMI1-NEXT: movl %eax, %edx 588; X86-BMI1-NEXT: xorl %eax, %eax 589; X86-BMI1-NEXT: .LBB8_2: 590; X86-BMI1-NEXT: addl $-1, %eax 591; X86-BMI1-NEXT: adcl $-1, %edx 592; X86-BMI1-NEXT: andl 4(%esi), %edx 593; X86-BMI1-NEXT: andl (%esi), %eax 594; X86-BMI1-NEXT: popl %esi 595; X86-BMI1-NEXT: retl 596; 597; X86-BMI2-LABEL: bzhi64_a3_load_indexzext: 598; X86-BMI2: # %bb.0: 599; X86-BMI2-NEXT: pushl %esi 600; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 601; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 602; X86-BMI2-NEXT: movl $1, %eax 603; X86-BMI2-NEXT: xorl %edx, %edx 604; X86-BMI2-NEXT: shldl %cl, %eax, %edx 605; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 606; X86-BMI2-NEXT: testb $32, %cl 607; X86-BMI2-NEXT: je .LBB8_2 608; X86-BMI2-NEXT: # %bb.1: 609; X86-BMI2-NEXT: movl %eax, %edx 610; X86-BMI2-NEXT: xorl %eax, %eax 611; X86-BMI2-NEXT: .LBB8_2: 612; X86-BMI2-NEXT: addl $-1, %eax 613; X86-BMI2-NEXT: adcl $-1, %edx 614; X86-BMI2-NEXT: andl 4(%esi), %edx 615; X86-BMI2-NEXT: andl (%esi), %eax 616; X86-BMI2-NEXT: popl %esi 617; X86-BMI2-NEXT: retl 618; 619; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext: 620; X64-NOBMI: # %bb.0: 621; X64-NOBMI-NEXT: movl %esi, %ecx 622; X64-NOBMI-NEXT: movl $1, %eax 623; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 624; X64-NOBMI-NEXT: shlq %cl, %rax 625; X64-NOBMI-NEXT: decq %rax 626; X64-NOBMI-NEXT: andq (%rdi), %rax 627; X64-NOBMI-NEXT: retq 628; 629; X64-BMI1-LABEL: bzhi64_a3_load_indexzext: 630; X64-BMI1: # %bb.0: 631; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi 632; X64-BMI1-NEXT: shll $8, %esi 633; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax 634; X64-BMI1-NEXT: retq 635; 636; X64-BMI2-LABEL: bzhi64_a3_load_indexzext: 637; X64-BMI2: # %bb.0: 638; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 639; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 640; X64-BMI2-NEXT: retq 641 %val = load i64, ptr %w 642 %conv = zext i8 %numlowbits to i64 643 %onebit = shl i64 1, %conv 644 %mask = add nsw i64 %onebit, -1 645 %masked = and i64 %mask, %val 646 ret i64 %masked 647} 648 649define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { 650; X86-NOBMI-LABEL: bzhi64_a4_commutative: 651; X86-NOBMI: # %bb.0: 652; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 653; X86-NOBMI-NEXT: movl $1, %eax 654; X86-NOBMI-NEXT: xorl %edx, %edx 655; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 656; X86-NOBMI-NEXT: shll %cl, %eax 657; X86-NOBMI-NEXT: testb $32, %cl 658; X86-NOBMI-NEXT: je .LBB9_2 659; X86-NOBMI-NEXT: # %bb.1: 660; X86-NOBMI-NEXT: movl %eax, %edx 661; X86-NOBMI-NEXT: xorl %eax, %eax 662; X86-NOBMI-NEXT: .LBB9_2: 663; X86-NOBMI-NEXT: addl $-1, %eax 664; X86-NOBMI-NEXT: adcl $-1, %edx 665; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 666; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 667; X86-NOBMI-NEXT: retl 668; 669; X86-BMI1-LABEL: bzhi64_a4_commutative: 670; X86-BMI1: # %bb.0: 671; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 672; X86-BMI1-NEXT: movl $1, %eax 673; X86-BMI1-NEXT: xorl %edx, %edx 674; X86-BMI1-NEXT: shldl %cl, %eax, %edx 675; X86-BMI1-NEXT: shll %cl, %eax 676; X86-BMI1-NEXT: testb $32, %cl 677; X86-BMI1-NEXT: je .LBB9_2 678; X86-BMI1-NEXT: # %bb.1: 679; X86-BMI1-NEXT: movl %eax, %edx 680; X86-BMI1-NEXT: xorl %eax, %eax 681; X86-BMI1-NEXT: .LBB9_2: 682; X86-BMI1-NEXT: addl $-1, %eax 683; X86-BMI1-NEXT: adcl $-1, %edx 684; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 685; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 686; X86-BMI1-NEXT: retl 687; 688; X86-BMI2-LABEL: bzhi64_a4_commutative: 689; X86-BMI2: # %bb.0: 690; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 691; X86-BMI2-NEXT: movl $1, %eax 692; X86-BMI2-NEXT: xorl %edx, %edx 693; X86-BMI2-NEXT: shldl %cl, %eax, %edx 694; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 695; X86-BMI2-NEXT: testb $32, %cl 696; X86-BMI2-NEXT: je .LBB9_2 697; X86-BMI2-NEXT: # %bb.1: 698; X86-BMI2-NEXT: movl %eax, %edx 699; X86-BMI2-NEXT: xorl %eax, %eax 700; X86-BMI2-NEXT: .LBB9_2: 701; X86-BMI2-NEXT: addl $-1, %eax 702; X86-BMI2-NEXT: adcl $-1, %edx 703; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 704; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 705; X86-BMI2-NEXT: retl 706; 707; X64-NOBMI-LABEL: bzhi64_a4_commutative: 708; X64-NOBMI: # %bb.0: 709; X64-NOBMI-NEXT: movq %rsi, %rcx 710; X64-NOBMI-NEXT: movl $1, %eax 711; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 712; X64-NOBMI-NEXT: shlq %cl, %rax 713; X64-NOBMI-NEXT: decq %rax 714; X64-NOBMI-NEXT: andq %rdi, %rax 715; X64-NOBMI-NEXT: retq 716; 717; X64-BMI1-LABEL: bzhi64_a4_commutative: 718; X64-BMI1: # %bb.0: 719; X64-BMI1-NEXT: shll $8, %esi 720; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 721; X64-BMI1-NEXT: retq 722; 723; X64-BMI2-LABEL: bzhi64_a4_commutative: 724; X64-BMI2: # %bb.0: 725; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 726; X64-BMI2-NEXT: retq 727 %onebit = shl i64 1, %numlowbits 728 %mask = add nsw i64 %onebit, -1 729 %masked = and i64 %val, %mask ; swapped order 730 ret i64 %masked 731} 732 733; 64-bit, but with 32-bit output 734 735; Everything done in 64-bit, truncation happens last. 736define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind { 737; X86-NOBMI-LABEL: bzhi64_32_a0: 738; X86-NOBMI: # %bb.0: 739; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 740; X86-NOBMI-NEXT: movl $1, %edx 741; X86-NOBMI-NEXT: shll %cl, %edx 742; X86-NOBMI-NEXT: xorl %eax, %eax 743; X86-NOBMI-NEXT: testb $32, %cl 744; X86-NOBMI-NEXT: jne .LBB10_2 745; X86-NOBMI-NEXT: # %bb.1: 746; X86-NOBMI-NEXT: movl %edx, %eax 747; X86-NOBMI-NEXT: .LBB10_2: 748; X86-NOBMI-NEXT: decl %eax 749; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 750; X86-NOBMI-NEXT: retl 751; 752; X86-BMI1-LABEL: bzhi64_32_a0: 753; X86-BMI1: # %bb.0: 754; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 755; X86-BMI1-NEXT: movl $1, %edx 756; X86-BMI1-NEXT: shll %cl, %edx 757; X86-BMI1-NEXT: xorl %eax, %eax 758; X86-BMI1-NEXT: testb $32, %cl 759; X86-BMI1-NEXT: jne .LBB10_2 760; X86-BMI1-NEXT: # %bb.1: 761; X86-BMI1-NEXT: movl %edx, %eax 762; X86-BMI1-NEXT: .LBB10_2: 763; X86-BMI1-NEXT: decl %eax 764; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 765; X86-BMI1-NEXT: retl 766; 767; X86-BMI2-LABEL: bzhi64_32_a0: 768; X86-BMI2: # %bb.0: 769; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 770; X86-BMI2-NEXT: xorl %eax, %eax 771; X86-BMI2-NEXT: testb $32, %cl 772; X86-BMI2-NEXT: jne .LBB10_2 773; X86-BMI2-NEXT: # %bb.1: 774; X86-BMI2-NEXT: movl $1, %eax 775; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 776; X86-BMI2-NEXT: .LBB10_2: 777; X86-BMI2-NEXT: decl %eax 778; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 779; X86-BMI2-NEXT: retl 780; 781; X64-NOBMI-LABEL: bzhi64_32_a0: 782; X64-NOBMI: # %bb.0: 783; X64-NOBMI-NEXT: movq %rsi, %rcx 784; X64-NOBMI-NEXT: movl $1, %eax 785; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 786; X64-NOBMI-NEXT: shlq %cl, %rax 787; X64-NOBMI-NEXT: decl %eax 788; X64-NOBMI-NEXT: andl %edi, %eax 789; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 790; X64-NOBMI-NEXT: retq 791; 792; X64-BMI1-LABEL: bzhi64_32_a0: 793; X64-BMI1: # %bb.0: 794; X64-BMI1-NEXT: shll $8, %esi 795; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 796; X64-BMI1-NEXT: retq 797; 798; X64-BMI2-LABEL: bzhi64_32_a0: 799; X64-BMI2: # %bb.0: 800; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 801; X64-BMI2-NEXT: retq 802 %onebit = shl i64 1, %numlowbits 803 %mask = add nsw i64 %onebit, -1 804 %masked = and i64 %mask, %val 805 %res = trunc i64 %masked to i32 806 ret i32 %res 807} 808 809; Shifting happens in 64-bit, then truncation. Masking is 32-bit. 810define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind { 811; X86-NOBMI-LABEL: bzhi64_32_a1: 812; X86-NOBMI: # %bb.0: 813; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 814; X86-NOBMI-NEXT: movl $1, %eax 815; X86-NOBMI-NEXT: shll %cl, %eax 816; X86-NOBMI-NEXT: decl %eax 817; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 818; X86-NOBMI-NEXT: retl 819; 820; X86-BMI1-LABEL: bzhi64_32_a1: 821; X86-BMI1: # %bb.0: 822; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 823; X86-BMI1-NEXT: shll $8, %eax 824; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 825; X86-BMI1-NEXT: retl 826; 827; X86-BMI2-LABEL: bzhi64_32_a1: 828; X86-BMI2: # %bb.0: 829; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 830; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 831; X86-BMI2-NEXT: retl 832; 833; X64-NOBMI-LABEL: bzhi64_32_a1: 834; X64-NOBMI: # %bb.0: 835; X64-NOBMI-NEXT: movl %esi, %ecx 836; X64-NOBMI-NEXT: movl $1, %eax 837; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 838; X64-NOBMI-NEXT: shll %cl, %eax 839; X64-NOBMI-NEXT: decl %eax 840; X64-NOBMI-NEXT: andl %edi, %eax 841; X64-NOBMI-NEXT: retq 842; 843; X64-BMI1-LABEL: bzhi64_32_a1: 844; X64-BMI1: # %bb.0: 845; X64-BMI1-NEXT: shll $8, %esi 846; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 847; X64-BMI1-NEXT: retq 848; 849; X64-BMI2-LABEL: bzhi64_32_a1: 850; X64-BMI2: # %bb.0: 851; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 852; X64-BMI2-NEXT: retq 853 %truncval = trunc i64 %val to i32 854 %onebit = shl i32 1, %numlowbits 855 %mask = add nsw i32 %onebit, -1 856 %masked = and i32 %mask, %truncval 857 ret i32 %masked 858} 859 860; Shifting happens in 64-bit, then truncation (with extra use). 861; Masking is 32-bit. 862define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, ptr %escape) nounwind { 863; X86-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause: 864; X86-NOBMI: # %bb.0: 865; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 866; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 867; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 868; X86-NOBMI-NEXT: movl %edx, (%eax) 869; X86-NOBMI-NEXT: movl $1, %eax 870; X86-NOBMI-NEXT: shll %cl, %eax 871; X86-NOBMI-NEXT: decl %eax 872; X86-NOBMI-NEXT: andl %edx, %eax 873; X86-NOBMI-NEXT: retl 874; 875; X86-BMI1-LABEL: bzhi64_32_a1_trunc_extrause: 876; X86-BMI1: # %bb.0: 877; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 878; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ecx 879; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 880; X86-BMI1-NEXT: movl %ecx, (%edx) 881; X86-BMI1-NEXT: shll $8, %eax 882; X86-BMI1-NEXT: bextrl %eax, %ecx, %eax 883; X86-BMI1-NEXT: retl 884; 885; X86-BMI2-LABEL: bzhi64_32_a1_trunc_extrause: 886; X86-BMI2: # %bb.0: 887; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 888; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 889; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 890; X86-BMI2-NEXT: movl %ecx, (%edx) 891; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax 892; X86-BMI2-NEXT: retl 893; 894; X64-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause: 895; X64-NOBMI: # %bb.0: 896; X64-NOBMI-NEXT: movl %esi, %ecx 897; X64-NOBMI-NEXT: movl %edi, (%rdx) 898; X64-NOBMI-NEXT: movl $1, %eax 899; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 900; X64-NOBMI-NEXT: shll %cl, %eax 901; X64-NOBMI-NEXT: decl %eax 902; X64-NOBMI-NEXT: andl %edi, %eax 903; X64-NOBMI-NEXT: retq 904; 905; X64-BMI1-LABEL: bzhi64_32_a1_trunc_extrause: 906; X64-BMI1: # %bb.0: 907; X64-BMI1-NEXT: movl %edi, (%rdx) 908; X64-BMI1-NEXT: shll $8, %esi 909; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 910; X64-BMI1-NEXT: retq 911; 912; X64-BMI2-LABEL: bzhi64_32_a1_trunc_extrause: 913; X64-BMI2: # %bb.0: 914; X64-BMI2-NEXT: movl %edi, (%rdx) 915; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 916; X64-BMI2-NEXT: retq 917 %truncval = trunc i64 %val to i32 918 store i32 %truncval, ptr %escape 919 %onebit = shl i32 1, %numlowbits 920 %mask = add nsw i32 %onebit, -1 921 %masked = and i32 %mask, %truncval 922 ret i32 %masked 923} 924 925; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit. 926; Masking is 64-bit. Then truncation. 927define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind { 928; X86-NOBMI-LABEL: bzhi64_32_a2: 929; X86-NOBMI: # %bb.0: 930; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 931; X86-NOBMI-NEXT: movl $1, %eax 932; X86-NOBMI-NEXT: shll %cl, %eax 933; X86-NOBMI-NEXT: decl %eax 934; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 935; X86-NOBMI-NEXT: retl 936; 937; X86-BMI1-LABEL: bzhi64_32_a2: 938; X86-BMI1: # %bb.0: 939; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 940; X86-BMI1-NEXT: shll $8, %eax 941; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 942; X86-BMI1-NEXT: retl 943; 944; X86-BMI2-LABEL: bzhi64_32_a2: 945; X86-BMI2: # %bb.0: 946; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 947; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 948; X86-BMI2-NEXT: retl 949; 950; X64-NOBMI-LABEL: bzhi64_32_a2: 951; X64-NOBMI: # %bb.0: 952; X64-NOBMI-NEXT: movl %esi, %ecx 953; X64-NOBMI-NEXT: movl $1, %eax 954; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 955; X64-NOBMI-NEXT: shll %cl, %eax 956; X64-NOBMI-NEXT: decl %eax 957; X64-NOBMI-NEXT: andl %edi, %eax 958; X64-NOBMI-NEXT: retq 959; 960; X64-BMI1-LABEL: bzhi64_32_a2: 961; X64-BMI1: # %bb.0: 962; X64-BMI1-NEXT: shll $8, %esi 963; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 964; X64-BMI1-NEXT: retq 965; 966; X64-BMI2-LABEL: bzhi64_32_a2: 967; X64-BMI2: # %bb.0: 968; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 969; X64-BMI2-NEXT: retq 970 %onebit = shl i32 1, %numlowbits 971 %mask = add nsw i32 %onebit, -1 972 %zextmask = zext i32 %mask to i64 973 %masked = and i64 %zextmask, %val 974 %truncmasked = trunc i64 %masked to i32 975 ret i32 %truncmasked 976} 977 978; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit. 979; Masking is 64-bit. Then truncation. 980define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind { 981; X86-NOBMI-LABEL: bzhi64_32_a3: 982; X86-NOBMI: # %bb.0: 983; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 984; X86-NOBMI-NEXT: movl $1, %edx 985; X86-NOBMI-NEXT: shll %cl, %edx 986; X86-NOBMI-NEXT: xorl %eax, %eax 987; X86-NOBMI-NEXT: testb $32, %cl 988; X86-NOBMI-NEXT: jne .LBB14_2 989; X86-NOBMI-NEXT: # %bb.1: 990; X86-NOBMI-NEXT: movl %edx, %eax 991; X86-NOBMI-NEXT: .LBB14_2: 992; X86-NOBMI-NEXT: decl %eax 993; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 994; X86-NOBMI-NEXT: retl 995; 996; X86-BMI1-LABEL: bzhi64_32_a3: 997; X86-BMI1: # %bb.0: 998; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 999; X86-BMI1-NEXT: movl $1, %edx 1000; X86-BMI1-NEXT: shll %cl, %edx 1001; X86-BMI1-NEXT: xorl %eax, %eax 1002; X86-BMI1-NEXT: testb $32, %cl 1003; X86-BMI1-NEXT: jne .LBB14_2 1004; X86-BMI1-NEXT: # %bb.1: 1005; X86-BMI1-NEXT: movl %edx, %eax 1006; X86-BMI1-NEXT: .LBB14_2: 1007; X86-BMI1-NEXT: decl %eax 1008; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 1009; X86-BMI1-NEXT: retl 1010; 1011; X86-BMI2-LABEL: bzhi64_32_a3: 1012; X86-BMI2: # %bb.0: 1013; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1014; X86-BMI2-NEXT: xorl %eax, %eax 1015; X86-BMI2-NEXT: testb $32, %cl 1016; X86-BMI2-NEXT: jne .LBB14_2 1017; X86-BMI2-NEXT: # %bb.1: 1018; X86-BMI2-NEXT: movl $1, %eax 1019; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 1020; X86-BMI2-NEXT: .LBB14_2: 1021; X86-BMI2-NEXT: decl %eax 1022; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1023; X86-BMI2-NEXT: retl 1024; 1025; X64-NOBMI-LABEL: bzhi64_32_a3: 1026; X64-NOBMI: # %bb.0: 1027; X64-NOBMI-NEXT: movq %rsi, %rcx 1028; X64-NOBMI-NEXT: movl $1, %eax 1029; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 1030; X64-NOBMI-NEXT: shlq %cl, %rax 1031; X64-NOBMI-NEXT: decl %eax 1032; X64-NOBMI-NEXT: andl %edi, %eax 1033; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 1034; X64-NOBMI-NEXT: retq 1035; 1036; X64-BMI1-LABEL: bzhi64_32_a3: 1037; X64-BMI1: # %bb.0: 1038; X64-BMI1-NEXT: shll $8, %esi 1039; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1040; X64-BMI1-NEXT: retq 1041; 1042; X64-BMI2-LABEL: bzhi64_32_a3: 1043; X64-BMI2: # %bb.0: 1044; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1045; X64-BMI2-NEXT: retq 1046 %onebit = shl i64 1, %numlowbits 1047 %mask = add nsw i64 %onebit, 4294967295 1048 %masked = and i64 %mask, %val 1049 %truncmasked = trunc i64 %masked to i32 1050 ret i32 %truncmasked 1051} 1052 1053; ---------------------------------------------------------------------------- ; 1054; Pattern b. 32-bit 1055; ---------------------------------------------------------------------------- ; 1056 1057define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { 1058; X86-NOBMI-LABEL: bzhi32_b0: 1059; X86-NOBMI: # %bb.0: 1060; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1061; X86-NOBMI-NEXT: movl $-1, %eax 1062; X86-NOBMI-NEXT: shll %cl, %eax 1063; X86-NOBMI-NEXT: notl %eax 1064; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1065; X86-NOBMI-NEXT: retl 1066; 1067; X86-BMI1-LABEL: bzhi32_b0: 1068; X86-BMI1: # %bb.0: 1069; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1070; X86-BMI1-NEXT: shll $8, %eax 1071; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1072; X86-BMI1-NEXT: retl 1073; 1074; X86-BMI2-LABEL: bzhi32_b0: 1075; X86-BMI2: # %bb.0: 1076; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1077; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1078; X86-BMI2-NEXT: retl 1079; 1080; X64-NOBMI-LABEL: bzhi32_b0: 1081; X64-NOBMI: # %bb.0: 1082; X64-NOBMI-NEXT: movl %esi, %ecx 1083; X64-NOBMI-NEXT: movl $-1, %eax 1084; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1085; X64-NOBMI-NEXT: shll %cl, %eax 1086; X64-NOBMI-NEXT: notl %eax 1087; X64-NOBMI-NEXT: andl %edi, %eax 1088; X64-NOBMI-NEXT: retq 1089; 1090; X64-BMI1-LABEL: bzhi32_b0: 1091; X64-BMI1: # %bb.0: 1092; X64-BMI1-NEXT: shll $8, %esi 1093; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1094; X64-BMI1-NEXT: retq 1095; 1096; X64-BMI2-LABEL: bzhi32_b0: 1097; X64-BMI2: # %bb.0: 1098; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1099; X64-BMI2-NEXT: retq 1100 %notmask = shl i32 -1, %numlowbits 1101 %mask = xor i32 %notmask, -1 1102 %masked = and i32 %mask, %val 1103 ret i32 %masked 1104} 1105 1106define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { 1107; X86-NOBMI-LABEL: bzhi32_b1_indexzext: 1108; X86-NOBMI: # %bb.0: 1109; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1110; X86-NOBMI-NEXT: movl $-1, %eax 1111; X86-NOBMI-NEXT: shll %cl, %eax 1112; X86-NOBMI-NEXT: notl %eax 1113; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1114; X86-NOBMI-NEXT: retl 1115; 1116; X86-BMI1-LABEL: bzhi32_b1_indexzext: 1117; X86-BMI1: # %bb.0: 1118; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1119; X86-BMI1-NEXT: shll $8, %eax 1120; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1121; X86-BMI1-NEXT: retl 1122; 1123; X86-BMI2-LABEL: bzhi32_b1_indexzext: 1124; X86-BMI2: # %bb.0: 1125; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1126; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1127; X86-BMI2-NEXT: retl 1128; 1129; X64-NOBMI-LABEL: bzhi32_b1_indexzext: 1130; X64-NOBMI: # %bb.0: 1131; X64-NOBMI-NEXT: movl %esi, %ecx 1132; X64-NOBMI-NEXT: movl $-1, %eax 1133; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1134; X64-NOBMI-NEXT: shll %cl, %eax 1135; X64-NOBMI-NEXT: notl %eax 1136; X64-NOBMI-NEXT: andl %edi, %eax 1137; X64-NOBMI-NEXT: retq 1138; 1139; X64-BMI1-LABEL: bzhi32_b1_indexzext: 1140; X64-BMI1: # %bb.0: 1141; X64-BMI1-NEXT: shll $8, %esi 1142; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1143; X64-BMI1-NEXT: retq 1144; 1145; X64-BMI2-LABEL: bzhi32_b1_indexzext: 1146; X64-BMI2: # %bb.0: 1147; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1148; X64-BMI2-NEXT: retq 1149 %conv = zext i8 %numlowbits to i32 1150 %notmask = shl i32 -1, %conv 1151 %mask = xor i32 %notmask, -1 1152 %masked = and i32 %mask, %val 1153 ret i32 %masked 1154} 1155 1156define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind { 1157; X86-NOBMI-LABEL: bzhi32_b2_load: 1158; X86-NOBMI: # %bb.0: 1159; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 1160; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1161; X86-NOBMI-NEXT: movl $-1, %eax 1162; X86-NOBMI-NEXT: shll %cl, %eax 1163; X86-NOBMI-NEXT: notl %eax 1164; X86-NOBMI-NEXT: andl (%edx), %eax 1165; X86-NOBMI-NEXT: retl 1166; 1167; X86-BMI1-LABEL: bzhi32_b2_load: 1168; X86-BMI1: # %bb.0: 1169; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 1170; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1171; X86-BMI1-NEXT: shll $8, %ecx 1172; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax 1173; X86-BMI1-NEXT: retl 1174; 1175; X86-BMI2-LABEL: bzhi32_b2_load: 1176; X86-BMI2: # %bb.0: 1177; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1178; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1179; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax 1180; X86-BMI2-NEXT: retl 1181; 1182; X64-NOBMI-LABEL: bzhi32_b2_load: 1183; X64-NOBMI: # %bb.0: 1184; X64-NOBMI-NEXT: movl %esi, %ecx 1185; X64-NOBMI-NEXT: movl $-1, %eax 1186; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1187; X64-NOBMI-NEXT: shll %cl, %eax 1188; X64-NOBMI-NEXT: notl %eax 1189; X64-NOBMI-NEXT: andl (%rdi), %eax 1190; X64-NOBMI-NEXT: retq 1191; 1192; X64-BMI1-LABEL: bzhi32_b2_load: 1193; X64-BMI1: # %bb.0: 1194; X64-BMI1-NEXT: shll $8, %esi 1195; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax 1196; X64-BMI1-NEXT: retq 1197; 1198; X64-BMI2-LABEL: bzhi32_b2_load: 1199; X64-BMI2: # %bb.0: 1200; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 1201; X64-BMI2-NEXT: retq 1202 %val = load i32, ptr %w 1203 %notmask = shl i32 -1, %numlowbits 1204 %mask = xor i32 %notmask, -1 1205 %masked = and i32 %mask, %val 1206 ret i32 %masked 1207} 1208 1209define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { 1210; X86-NOBMI-LABEL: bzhi32_b3_load_indexzext: 1211; X86-NOBMI: # %bb.0: 1212; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 1213; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1214; X86-NOBMI-NEXT: movl $-1, %eax 1215; X86-NOBMI-NEXT: shll %cl, %eax 1216; X86-NOBMI-NEXT: notl %eax 1217; X86-NOBMI-NEXT: andl (%edx), %eax 1218; X86-NOBMI-NEXT: retl 1219; 1220; X86-BMI1-LABEL: bzhi32_b3_load_indexzext: 1221; X86-BMI1: # %bb.0: 1222; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 1223; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1224; X86-BMI1-NEXT: shll $8, %ecx 1225; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax 1226; X86-BMI1-NEXT: retl 1227; 1228; X86-BMI2-LABEL: bzhi32_b3_load_indexzext: 1229; X86-BMI2: # %bb.0: 1230; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1231; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1232; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax 1233; X86-BMI2-NEXT: retl 1234; 1235; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext: 1236; X64-NOBMI: # %bb.0: 1237; X64-NOBMI-NEXT: movl %esi, %ecx 1238; X64-NOBMI-NEXT: movl $-1, %eax 1239; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1240; X64-NOBMI-NEXT: shll %cl, %eax 1241; X64-NOBMI-NEXT: notl %eax 1242; X64-NOBMI-NEXT: andl (%rdi), %eax 1243; X64-NOBMI-NEXT: retq 1244; 1245; X64-BMI1-LABEL: bzhi32_b3_load_indexzext: 1246; X64-BMI1: # %bb.0: 1247; X64-BMI1-NEXT: shll $8, %esi 1248; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax 1249; X64-BMI1-NEXT: retq 1250; 1251; X64-BMI2-LABEL: bzhi32_b3_load_indexzext: 1252; X64-BMI2: # %bb.0: 1253; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 1254; X64-BMI2-NEXT: retq 1255 %val = load i32, ptr %w 1256 %conv = zext i8 %numlowbits to i32 1257 %notmask = shl i32 -1, %conv 1258 %mask = xor i32 %notmask, -1 1259 %masked = and i32 %mask, %val 1260 ret i32 %masked 1261} 1262 1263define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { 1264; X86-NOBMI-LABEL: bzhi32_b4_commutative: 1265; X86-NOBMI: # %bb.0: 1266; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1267; X86-NOBMI-NEXT: movl $-1, %eax 1268; X86-NOBMI-NEXT: shll %cl, %eax 1269; X86-NOBMI-NEXT: notl %eax 1270; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1271; X86-NOBMI-NEXT: retl 1272; 1273; X86-BMI1-LABEL: bzhi32_b4_commutative: 1274; X86-BMI1: # %bb.0: 1275; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1276; X86-BMI1-NEXT: shll $8, %eax 1277; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1278; X86-BMI1-NEXT: retl 1279; 1280; X86-BMI2-LABEL: bzhi32_b4_commutative: 1281; X86-BMI2: # %bb.0: 1282; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1283; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1284; X86-BMI2-NEXT: retl 1285; 1286; X64-NOBMI-LABEL: bzhi32_b4_commutative: 1287; X64-NOBMI: # %bb.0: 1288; X64-NOBMI-NEXT: movl %esi, %ecx 1289; X64-NOBMI-NEXT: movl $-1, %eax 1290; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1291; X64-NOBMI-NEXT: shll %cl, %eax 1292; X64-NOBMI-NEXT: notl %eax 1293; X64-NOBMI-NEXT: andl %edi, %eax 1294; X64-NOBMI-NEXT: retq 1295; 1296; X64-BMI1-LABEL: bzhi32_b4_commutative: 1297; X64-BMI1: # %bb.0: 1298; X64-BMI1-NEXT: shll $8, %esi 1299; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1300; X64-BMI1-NEXT: retq 1301; 1302; X64-BMI2-LABEL: bzhi32_b4_commutative: 1303; X64-BMI2: # %bb.0: 1304; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1305; X64-BMI2-NEXT: retq 1306 %notmask = shl i32 -1, %numlowbits 1307 %mask = xor i32 %notmask, -1 1308 %masked = and i32 %val, %mask ; swapped order 1309 ret i32 %masked 1310} 1311 1312; 64-bit 1313 1314define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { 1315; X86-NOBMI-LABEL: bzhi64_b0: 1316; X86-NOBMI: # %bb.0: 1317; X86-NOBMI-NEXT: pushl %esi 1318; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1319; X86-NOBMI-NEXT: movl $-1, %edx 1320; X86-NOBMI-NEXT: movl $-1, %esi 1321; X86-NOBMI-NEXT: shll %cl, %esi 1322; X86-NOBMI-NEXT: xorl %eax, %eax 1323; X86-NOBMI-NEXT: testb $32, %cl 1324; X86-NOBMI-NEXT: jne .LBB20_1 1325; X86-NOBMI-NEXT: # %bb.2: 1326; X86-NOBMI-NEXT: movl %esi, %eax 1327; X86-NOBMI-NEXT: jmp .LBB20_3 1328; X86-NOBMI-NEXT: .LBB20_1: 1329; X86-NOBMI-NEXT: movl %esi, %edx 1330; X86-NOBMI-NEXT: .LBB20_3: 1331; X86-NOBMI-NEXT: notl %edx 1332; X86-NOBMI-NEXT: notl %eax 1333; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1334; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 1335; X86-NOBMI-NEXT: popl %esi 1336; X86-NOBMI-NEXT: retl 1337; 1338; X86-BMI1-LABEL: bzhi64_b0: 1339; X86-BMI1: # %bb.0: 1340; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1341; X86-BMI1-NEXT: movl $-1, %edx 1342; X86-BMI1-NEXT: movl $-1, %eax 1343; X86-BMI1-NEXT: shll %cl, %eax 1344; X86-BMI1-NEXT: testb $32, %cl 1345; X86-BMI1-NEXT: je .LBB20_2 1346; X86-BMI1-NEXT: # %bb.1: 1347; X86-BMI1-NEXT: movl %eax, %edx 1348; X86-BMI1-NEXT: xorl %eax, %eax 1349; X86-BMI1-NEXT: .LBB20_2: 1350; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1351; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx 1352; X86-BMI1-NEXT: retl 1353; 1354; X86-BMI2-LABEL: bzhi64_b0: 1355; X86-BMI2: # %bb.0: 1356; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx 1357; X86-BMI2-NEXT: movl $-1, %ecx 1358; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax 1359; X86-BMI2-NEXT: testb $32, %dl 1360; X86-BMI2-NEXT: je .LBB20_2 1361; X86-BMI2-NEXT: # %bb.1: 1362; X86-BMI2-NEXT: movl %eax, %ecx 1363; X86-BMI2-NEXT: xorl %eax, %eax 1364; X86-BMI2-NEXT: .LBB20_2: 1365; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1366; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx 1367; X86-BMI2-NEXT: retl 1368; 1369; X64-NOBMI-LABEL: bzhi64_b0: 1370; X64-NOBMI: # %bb.0: 1371; X64-NOBMI-NEXT: movq %rsi, %rcx 1372; X64-NOBMI-NEXT: movq $-1, %rax 1373; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 1374; X64-NOBMI-NEXT: shlq %cl, %rax 1375; X64-NOBMI-NEXT: notq %rax 1376; X64-NOBMI-NEXT: andq %rdi, %rax 1377; X64-NOBMI-NEXT: retq 1378; 1379; X64-BMI1-LABEL: bzhi64_b0: 1380; X64-BMI1: # %bb.0: 1381; X64-BMI1-NEXT: shll $8, %esi 1382; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 1383; X64-BMI1-NEXT: retq 1384; 1385; X64-BMI2-LABEL: bzhi64_b0: 1386; X64-BMI2: # %bb.0: 1387; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1388; X64-BMI2-NEXT: retq 1389 %notmask = shl i64 -1, %numlowbits 1390 %mask = xor i64 %notmask, -1 1391 %masked = and i64 %mask, %val 1392 ret i64 %masked 1393} 1394 1395define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { 1396; X86-NOBMI-LABEL: bzhi64_b1_indexzext: 1397; X86-NOBMI: # %bb.0: 1398; X86-NOBMI-NEXT: pushl %esi 1399; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1400; X86-NOBMI-NEXT: movl $-1, %edx 1401; X86-NOBMI-NEXT: movl $-1, %esi 1402; X86-NOBMI-NEXT: shll %cl, %esi 1403; X86-NOBMI-NEXT: xorl %eax, %eax 1404; X86-NOBMI-NEXT: testb $32, %cl 1405; X86-NOBMI-NEXT: jne .LBB21_1 1406; X86-NOBMI-NEXT: # %bb.2: 1407; X86-NOBMI-NEXT: movl %esi, %eax 1408; X86-NOBMI-NEXT: jmp .LBB21_3 1409; X86-NOBMI-NEXT: .LBB21_1: 1410; X86-NOBMI-NEXT: movl %esi, %edx 1411; X86-NOBMI-NEXT: .LBB21_3: 1412; X86-NOBMI-NEXT: notl %edx 1413; X86-NOBMI-NEXT: notl %eax 1414; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1415; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 1416; X86-NOBMI-NEXT: popl %esi 1417; X86-NOBMI-NEXT: retl 1418; 1419; X86-BMI1-LABEL: bzhi64_b1_indexzext: 1420; X86-BMI1: # %bb.0: 1421; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1422; X86-BMI1-NEXT: movl $-1, %edx 1423; X86-BMI1-NEXT: movl $-1, %eax 1424; X86-BMI1-NEXT: shll %cl, %eax 1425; X86-BMI1-NEXT: testb $32, %cl 1426; X86-BMI1-NEXT: je .LBB21_2 1427; X86-BMI1-NEXT: # %bb.1: 1428; X86-BMI1-NEXT: movl %eax, %edx 1429; X86-BMI1-NEXT: xorl %eax, %eax 1430; X86-BMI1-NEXT: .LBB21_2: 1431; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1432; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx 1433; X86-BMI1-NEXT: retl 1434; 1435; X86-BMI2-LABEL: bzhi64_b1_indexzext: 1436; X86-BMI2: # %bb.0: 1437; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx 1438; X86-BMI2-NEXT: movl $-1, %ecx 1439; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax 1440; X86-BMI2-NEXT: testb $32, %dl 1441; X86-BMI2-NEXT: je .LBB21_2 1442; X86-BMI2-NEXT: # %bb.1: 1443; X86-BMI2-NEXT: movl %eax, %ecx 1444; X86-BMI2-NEXT: xorl %eax, %eax 1445; X86-BMI2-NEXT: .LBB21_2: 1446; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1447; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx 1448; X86-BMI2-NEXT: retl 1449; 1450; X64-NOBMI-LABEL: bzhi64_b1_indexzext: 1451; X64-NOBMI: # %bb.0: 1452; X64-NOBMI-NEXT: movl %esi, %ecx 1453; X64-NOBMI-NEXT: movq $-1, %rax 1454; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1455; X64-NOBMI-NEXT: shlq %cl, %rax 1456; X64-NOBMI-NEXT: notq %rax 1457; X64-NOBMI-NEXT: andq %rdi, %rax 1458; X64-NOBMI-NEXT: retq 1459; 1460; X64-BMI1-LABEL: bzhi64_b1_indexzext: 1461; X64-BMI1: # %bb.0: 1462; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi 1463; X64-BMI1-NEXT: shll $8, %esi 1464; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 1465; X64-BMI1-NEXT: retq 1466; 1467; X64-BMI2-LABEL: bzhi64_b1_indexzext: 1468; X64-BMI2: # %bb.0: 1469; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 1470; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1471; X64-BMI2-NEXT: retq 1472 %conv = zext i8 %numlowbits to i64 1473 %notmask = shl i64 -1, %conv 1474 %mask = xor i64 %notmask, -1 1475 %masked = and i64 %mask, %val 1476 ret i64 %masked 1477} 1478 1479define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind { 1480; X86-NOBMI-LABEL: bzhi64_b2_load: 1481; X86-NOBMI: # %bb.0: 1482; X86-NOBMI-NEXT: pushl %edi 1483; X86-NOBMI-NEXT: pushl %esi 1484; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 1485; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1486; X86-NOBMI-NEXT: movl $-1, %edx 1487; X86-NOBMI-NEXT: movl $-1, %edi 1488; X86-NOBMI-NEXT: shll %cl, %edi 1489; X86-NOBMI-NEXT: xorl %eax, %eax 1490; X86-NOBMI-NEXT: testb $32, %cl 1491; X86-NOBMI-NEXT: jne .LBB22_1 1492; X86-NOBMI-NEXT: # %bb.2: 1493; X86-NOBMI-NEXT: movl %edi, %eax 1494; X86-NOBMI-NEXT: jmp .LBB22_3 1495; X86-NOBMI-NEXT: .LBB22_1: 1496; X86-NOBMI-NEXT: movl %edi, %edx 1497; X86-NOBMI-NEXT: .LBB22_3: 1498; X86-NOBMI-NEXT: notl %edx 1499; X86-NOBMI-NEXT: notl %eax 1500; X86-NOBMI-NEXT: andl (%esi), %eax 1501; X86-NOBMI-NEXT: andl 4(%esi), %edx 1502; X86-NOBMI-NEXT: popl %esi 1503; X86-NOBMI-NEXT: popl %edi 1504; X86-NOBMI-NEXT: retl 1505; 1506; X86-BMI1-LABEL: bzhi64_b2_load: 1507; X86-BMI1: # %bb.0: 1508; X86-BMI1-NEXT: pushl %esi 1509; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 1510; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1511; X86-BMI1-NEXT: movl $-1, %esi 1512; X86-BMI1-NEXT: movl $-1, %eax 1513; X86-BMI1-NEXT: shll %cl, %eax 1514; X86-BMI1-NEXT: testb $32, %cl 1515; X86-BMI1-NEXT: je .LBB22_2 1516; X86-BMI1-NEXT: # %bb.1: 1517; X86-BMI1-NEXT: movl %eax, %esi 1518; X86-BMI1-NEXT: xorl %eax, %eax 1519; X86-BMI1-NEXT: .LBB22_2: 1520; X86-BMI1-NEXT: andnl (%edx), %eax, %eax 1521; X86-BMI1-NEXT: andnl 4(%edx), %esi, %edx 1522; X86-BMI1-NEXT: popl %esi 1523; X86-BMI1-NEXT: retl 1524; 1525; X86-BMI2-LABEL: bzhi64_b2_load: 1526; X86-BMI2: # %bb.0: 1527; X86-BMI2-NEXT: pushl %ebx 1528; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 1529; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx 1530; X86-BMI2-NEXT: movl $-1, %edx 1531; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax 1532; X86-BMI2-NEXT: testb $32, %bl 1533; X86-BMI2-NEXT: je .LBB22_2 1534; X86-BMI2-NEXT: # %bb.1: 1535; X86-BMI2-NEXT: movl %eax, %edx 1536; X86-BMI2-NEXT: xorl %eax, %eax 1537; X86-BMI2-NEXT: .LBB22_2: 1538; X86-BMI2-NEXT: andnl (%ecx), %eax, %eax 1539; X86-BMI2-NEXT: andnl 4(%ecx), %edx, %edx 1540; X86-BMI2-NEXT: popl %ebx 1541; X86-BMI2-NEXT: retl 1542; 1543; X64-NOBMI-LABEL: bzhi64_b2_load: 1544; X64-NOBMI: # %bb.0: 1545; X64-NOBMI-NEXT: movq %rsi, %rcx 1546; X64-NOBMI-NEXT: movq $-1, %rax 1547; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 1548; X64-NOBMI-NEXT: shlq %cl, %rax 1549; X64-NOBMI-NEXT: notq %rax 1550; X64-NOBMI-NEXT: andq (%rdi), %rax 1551; X64-NOBMI-NEXT: retq 1552; 1553; X64-BMI1-LABEL: bzhi64_b2_load: 1554; X64-BMI1: # %bb.0: 1555; X64-BMI1-NEXT: shll $8, %esi 1556; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax 1557; X64-BMI1-NEXT: retq 1558; 1559; X64-BMI2-LABEL: bzhi64_b2_load: 1560; X64-BMI2: # %bb.0: 1561; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 1562; X64-BMI2-NEXT: retq 1563 %val = load i64, ptr %w 1564 %notmask = shl i64 -1, %numlowbits 1565 %mask = xor i64 %notmask, -1 1566 %masked = and i64 %mask, %val 1567 ret i64 %masked 1568} 1569 1570define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { 1571; X86-NOBMI-LABEL: bzhi64_b3_load_indexzext: 1572; X86-NOBMI: # %bb.0: 1573; X86-NOBMI-NEXT: pushl %edi 1574; X86-NOBMI-NEXT: pushl %esi 1575; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 1576; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1577; X86-NOBMI-NEXT: movl $-1, %edx 1578; X86-NOBMI-NEXT: movl $-1, %edi 1579; X86-NOBMI-NEXT: shll %cl, %edi 1580; X86-NOBMI-NEXT: xorl %eax, %eax 1581; X86-NOBMI-NEXT: testb $32, %cl 1582; X86-NOBMI-NEXT: jne .LBB23_1 1583; X86-NOBMI-NEXT: # %bb.2: 1584; X86-NOBMI-NEXT: movl %edi, %eax 1585; X86-NOBMI-NEXT: jmp .LBB23_3 1586; X86-NOBMI-NEXT: .LBB23_1: 1587; X86-NOBMI-NEXT: movl %edi, %edx 1588; X86-NOBMI-NEXT: .LBB23_3: 1589; X86-NOBMI-NEXT: notl %edx 1590; X86-NOBMI-NEXT: notl %eax 1591; X86-NOBMI-NEXT: andl (%esi), %eax 1592; X86-NOBMI-NEXT: andl 4(%esi), %edx 1593; X86-NOBMI-NEXT: popl %esi 1594; X86-NOBMI-NEXT: popl %edi 1595; X86-NOBMI-NEXT: retl 1596; 1597; X86-BMI1-LABEL: bzhi64_b3_load_indexzext: 1598; X86-BMI1: # %bb.0: 1599; X86-BMI1-NEXT: pushl %esi 1600; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 1601; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1602; X86-BMI1-NEXT: movl $-1, %esi 1603; X86-BMI1-NEXT: movl $-1, %eax 1604; X86-BMI1-NEXT: shll %cl, %eax 1605; X86-BMI1-NEXT: testb $32, %cl 1606; X86-BMI1-NEXT: je .LBB23_2 1607; X86-BMI1-NEXT: # %bb.1: 1608; X86-BMI1-NEXT: movl %eax, %esi 1609; X86-BMI1-NEXT: xorl %eax, %eax 1610; X86-BMI1-NEXT: .LBB23_2: 1611; X86-BMI1-NEXT: andnl (%edx), %eax, %eax 1612; X86-BMI1-NEXT: andnl 4(%edx), %esi, %edx 1613; X86-BMI1-NEXT: popl %esi 1614; X86-BMI1-NEXT: retl 1615; 1616; X86-BMI2-LABEL: bzhi64_b3_load_indexzext: 1617; X86-BMI2: # %bb.0: 1618; X86-BMI2-NEXT: pushl %ebx 1619; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 1620; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx 1621; X86-BMI2-NEXT: movl $-1, %edx 1622; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax 1623; X86-BMI2-NEXT: testb $32, %bl 1624; X86-BMI2-NEXT: je .LBB23_2 1625; X86-BMI2-NEXT: # %bb.1: 1626; X86-BMI2-NEXT: movl %eax, %edx 1627; X86-BMI2-NEXT: xorl %eax, %eax 1628; X86-BMI2-NEXT: .LBB23_2: 1629; X86-BMI2-NEXT: andnl (%ecx), %eax, %eax 1630; X86-BMI2-NEXT: andnl 4(%ecx), %edx, %edx 1631; X86-BMI2-NEXT: popl %ebx 1632; X86-BMI2-NEXT: retl 1633; 1634; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext: 1635; X64-NOBMI: # %bb.0: 1636; X64-NOBMI-NEXT: movl %esi, %ecx 1637; X64-NOBMI-NEXT: movq $-1, %rax 1638; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1639; X64-NOBMI-NEXT: shlq %cl, %rax 1640; X64-NOBMI-NEXT: notq %rax 1641; X64-NOBMI-NEXT: andq (%rdi), %rax 1642; X64-NOBMI-NEXT: retq 1643; 1644; X64-BMI1-LABEL: bzhi64_b3_load_indexzext: 1645; X64-BMI1: # %bb.0: 1646; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi 1647; X64-BMI1-NEXT: shll $8, %esi 1648; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax 1649; X64-BMI1-NEXT: retq 1650; 1651; X64-BMI2-LABEL: bzhi64_b3_load_indexzext: 1652; X64-BMI2: # %bb.0: 1653; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 1654; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 1655; X64-BMI2-NEXT: retq 1656 %val = load i64, ptr %w 1657 %conv = zext i8 %numlowbits to i64 1658 %notmask = shl i64 -1, %conv 1659 %mask = xor i64 %notmask, -1 1660 %masked = and i64 %mask, %val 1661 ret i64 %masked 1662} 1663 1664define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { 1665; X86-NOBMI-LABEL: bzhi64_b4_commutative: 1666; X86-NOBMI: # %bb.0: 1667; X86-NOBMI-NEXT: pushl %esi 1668; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1669; X86-NOBMI-NEXT: movl $-1, %edx 1670; X86-NOBMI-NEXT: movl $-1, %esi 1671; X86-NOBMI-NEXT: shll %cl, %esi 1672; X86-NOBMI-NEXT: xorl %eax, %eax 1673; X86-NOBMI-NEXT: testb $32, %cl 1674; X86-NOBMI-NEXT: jne .LBB24_1 1675; X86-NOBMI-NEXT: # %bb.2: 1676; X86-NOBMI-NEXT: movl %esi, %eax 1677; X86-NOBMI-NEXT: jmp .LBB24_3 1678; X86-NOBMI-NEXT: .LBB24_1: 1679; X86-NOBMI-NEXT: movl %esi, %edx 1680; X86-NOBMI-NEXT: .LBB24_3: 1681; X86-NOBMI-NEXT: notl %edx 1682; X86-NOBMI-NEXT: notl %eax 1683; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1684; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 1685; X86-NOBMI-NEXT: popl %esi 1686; X86-NOBMI-NEXT: retl 1687; 1688; X86-BMI1-LABEL: bzhi64_b4_commutative: 1689; X86-BMI1: # %bb.0: 1690; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1691; X86-BMI1-NEXT: movl $-1, %edx 1692; X86-BMI1-NEXT: movl $-1, %eax 1693; X86-BMI1-NEXT: shll %cl, %eax 1694; X86-BMI1-NEXT: testb $32, %cl 1695; X86-BMI1-NEXT: je .LBB24_2 1696; X86-BMI1-NEXT: # %bb.1: 1697; X86-BMI1-NEXT: movl %eax, %edx 1698; X86-BMI1-NEXT: xorl %eax, %eax 1699; X86-BMI1-NEXT: .LBB24_2: 1700; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1701; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx 1702; X86-BMI1-NEXT: retl 1703; 1704; X86-BMI2-LABEL: bzhi64_b4_commutative: 1705; X86-BMI2: # %bb.0: 1706; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx 1707; X86-BMI2-NEXT: movl $-1, %ecx 1708; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax 1709; X86-BMI2-NEXT: testb $32, %dl 1710; X86-BMI2-NEXT: je .LBB24_2 1711; X86-BMI2-NEXT: # %bb.1: 1712; X86-BMI2-NEXT: movl %eax, %ecx 1713; X86-BMI2-NEXT: xorl %eax, %eax 1714; X86-BMI2-NEXT: .LBB24_2: 1715; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1716; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx 1717; X86-BMI2-NEXT: retl 1718; 1719; X64-NOBMI-LABEL: bzhi64_b4_commutative: 1720; X64-NOBMI: # %bb.0: 1721; X64-NOBMI-NEXT: movq %rsi, %rcx 1722; X64-NOBMI-NEXT: movq $-1, %rax 1723; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 1724; X64-NOBMI-NEXT: shlq %cl, %rax 1725; X64-NOBMI-NEXT: notq %rax 1726; X64-NOBMI-NEXT: andq %rdi, %rax 1727; X64-NOBMI-NEXT: retq 1728; 1729; X64-BMI1-LABEL: bzhi64_b4_commutative: 1730; X64-BMI1: # %bb.0: 1731; X64-BMI1-NEXT: shll $8, %esi 1732; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 1733; X64-BMI1-NEXT: retq 1734; 1735; X64-BMI2-LABEL: bzhi64_b4_commutative: 1736; X64-BMI2: # %bb.0: 1737; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1738; X64-BMI2-NEXT: retq 1739 %notmask = shl i64 -1, %numlowbits 1740 %mask = xor i64 %notmask, -1 1741 %masked = and i64 %val, %mask ; swapped order 1742 ret i64 %masked 1743} 1744 1745; 64-bit, but with 32-bit output 1746 1747; Everything done in 64-bit, truncation happens last. 1748define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind { 1749; X86-NOBMI-LABEL: bzhi64_32_b0: 1750; X86-NOBMI: # %bb.0: 1751; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1752; X86-NOBMI-NEXT: movl $-1, %edx 1753; X86-NOBMI-NEXT: shll %cl, %edx 1754; X86-NOBMI-NEXT: xorl %eax, %eax 1755; X86-NOBMI-NEXT: testb $32, %cl 1756; X86-NOBMI-NEXT: jne .LBB25_2 1757; X86-NOBMI-NEXT: # %bb.1: 1758; X86-NOBMI-NEXT: movl %edx, %eax 1759; X86-NOBMI-NEXT: .LBB25_2: 1760; X86-NOBMI-NEXT: notl %eax 1761; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1762; X86-NOBMI-NEXT: retl 1763; 1764; X86-BMI1-LABEL: bzhi64_32_b0: 1765; X86-BMI1: # %bb.0: 1766; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1767; X86-BMI1-NEXT: movl $-1, %eax 1768; X86-BMI1-NEXT: shll %cl, %eax 1769; X86-BMI1-NEXT: xorl %edx, %edx 1770; X86-BMI1-NEXT: testb $32, %cl 1771; X86-BMI1-NEXT: jne .LBB25_2 1772; X86-BMI1-NEXT: # %bb.1: 1773; X86-BMI1-NEXT: movl %eax, %edx 1774; X86-BMI1-NEXT: .LBB25_2: 1775; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax 1776; X86-BMI1-NEXT: retl 1777; 1778; X86-BMI2-LABEL: bzhi64_32_b0: 1779; X86-BMI2: # %bb.0: 1780; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1781; X86-BMI2-NEXT: xorl %ecx, %ecx 1782; X86-BMI2-NEXT: testb $32, %al 1783; X86-BMI2-NEXT: jne .LBB25_2 1784; X86-BMI2-NEXT: # %bb.1: 1785; X86-BMI2-NEXT: movl $-1, %ecx 1786; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx 1787; X86-BMI2-NEXT: .LBB25_2: 1788; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax 1789; X86-BMI2-NEXT: retl 1790; 1791; X64-NOBMI-LABEL: bzhi64_32_b0: 1792; X64-NOBMI: # %bb.0: 1793; X64-NOBMI-NEXT: movl %esi, %ecx 1794; X64-NOBMI-NEXT: movq $-1, %rax 1795; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1796; X64-NOBMI-NEXT: shlq %cl, %rax 1797; X64-NOBMI-NEXT: notl %eax 1798; X64-NOBMI-NEXT: andl %edi, %eax 1799; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 1800; X64-NOBMI-NEXT: retq 1801; 1802; X64-BMI1-LABEL: bzhi64_32_b0: 1803; X64-BMI1: # %bb.0: 1804; X64-BMI1-NEXT: shll $8, %esi 1805; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1806; X64-BMI1-NEXT: retq 1807; 1808; X64-BMI2-LABEL: bzhi64_32_b0: 1809; X64-BMI2: # %bb.0: 1810; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1811; X64-BMI2-NEXT: retq 1812 %widenumlowbits = zext i8 %numlowbits to i64 1813 %notmask = shl nsw i64 -1, %widenumlowbits 1814 %mask = xor i64 %notmask, -1 1815 %wideres = and i64 %val, %mask 1816 %res = trunc i64 %wideres to i32 1817 ret i32 %res 1818} 1819 1820; Shifting happens in 64-bit, then truncation. Masking is 32-bit. 1821define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind { 1822; X86-NOBMI-LABEL: bzhi64_32_b1: 1823; X86-NOBMI: # %bb.0: 1824; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1825; X86-NOBMI-NEXT: movl $-1, %eax 1826; X86-NOBMI-NEXT: shll %cl, %eax 1827; X86-NOBMI-NEXT: notl %eax 1828; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1829; X86-NOBMI-NEXT: retl 1830; 1831; X86-BMI1-LABEL: bzhi64_32_b1: 1832; X86-BMI1: # %bb.0: 1833; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1834; X86-BMI1-NEXT: shll $8, %eax 1835; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1836; X86-BMI1-NEXT: retl 1837; 1838; X86-BMI2-LABEL: bzhi64_32_b1: 1839; X86-BMI2: # %bb.0: 1840; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1841; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1842; X86-BMI2-NEXT: retl 1843; 1844; X64-NOBMI-LABEL: bzhi64_32_b1: 1845; X64-NOBMI: # %bb.0: 1846; X64-NOBMI-NEXT: movl %esi, %ecx 1847; X64-NOBMI-NEXT: movl $-1, %eax 1848; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1849; X64-NOBMI-NEXT: shll %cl, %eax 1850; X64-NOBMI-NEXT: notl %eax 1851; X64-NOBMI-NEXT: andl %edi, %eax 1852; X64-NOBMI-NEXT: retq 1853; 1854; X64-BMI1-LABEL: bzhi64_32_b1: 1855; X64-BMI1: # %bb.0: 1856; X64-BMI1-NEXT: shll $8, %esi 1857; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1858; X64-BMI1-NEXT: retq 1859; 1860; X64-BMI2-LABEL: bzhi64_32_b1: 1861; X64-BMI2: # %bb.0: 1862; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1863; X64-BMI2-NEXT: retq 1864 %truncval = trunc i64 %val to i32 1865 %widenumlowbits = zext i8 %numlowbits to i32 1866 %notmask = shl nsw i32 -1, %widenumlowbits 1867 %mask = xor i32 %notmask, -1 1868 %res = and i32 %truncval, %mask 1869 ret i32 %res 1870} 1871 1872; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit. 1873; Masking is 64-bit. Then truncation. 1874define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind { 1875; X86-NOBMI-LABEL: bzhi64_32_b2: 1876; X86-NOBMI: # %bb.0: 1877; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1878; X86-NOBMI-NEXT: movl $-1, %eax 1879; X86-NOBMI-NEXT: shll %cl, %eax 1880; X86-NOBMI-NEXT: notl %eax 1881; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1882; X86-NOBMI-NEXT: retl 1883; 1884; X86-BMI1-LABEL: bzhi64_32_b2: 1885; X86-BMI1: # %bb.0: 1886; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1887; X86-BMI1-NEXT: shll $8, %eax 1888; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1889; X86-BMI1-NEXT: retl 1890; 1891; X86-BMI2-LABEL: bzhi64_32_b2: 1892; X86-BMI2: # %bb.0: 1893; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1894; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1895; X86-BMI2-NEXT: retl 1896; 1897; X64-NOBMI-LABEL: bzhi64_32_b2: 1898; X64-NOBMI: # %bb.0: 1899; X64-NOBMI-NEXT: movl %esi, %ecx 1900; X64-NOBMI-NEXT: movl $-1, %eax 1901; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1902; X64-NOBMI-NEXT: shll %cl, %eax 1903; X64-NOBMI-NEXT: notl %eax 1904; X64-NOBMI-NEXT: andl %edi, %eax 1905; X64-NOBMI-NEXT: retq 1906; 1907; X64-BMI1-LABEL: bzhi64_32_b2: 1908; X64-BMI1: # %bb.0: 1909; X64-BMI1-NEXT: shll $8, %esi 1910; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1911; X64-BMI1-NEXT: retq 1912; 1913; X64-BMI2-LABEL: bzhi64_32_b2: 1914; X64-BMI2: # %bb.0: 1915; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1916; X64-BMI2-NEXT: retq 1917 %widenumlowbits = zext i8 %numlowbits to i32 1918 %notmask = shl nsw i32 -1, %widenumlowbits 1919 %mask = xor i32 %notmask, -1 1920 %zextmask = zext i32 %mask to i64 1921 %wideres = and i64 %val, %zextmask 1922 %res = trunc i64 %wideres to i32 1923 ret i32 %res 1924} 1925 1926; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit. 1927; Masking is 64-bit. Then truncation. 1928define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind { 1929; X86-NOBMI-LABEL: bzhi64_32_b3: 1930; X86-NOBMI: # %bb.0: 1931; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1932; X86-NOBMI-NEXT: movl $-1, %edx 1933; X86-NOBMI-NEXT: shll %cl, %edx 1934; X86-NOBMI-NEXT: xorl %eax, %eax 1935; X86-NOBMI-NEXT: testb $32, %cl 1936; X86-NOBMI-NEXT: jne .LBB28_2 1937; X86-NOBMI-NEXT: # %bb.1: 1938; X86-NOBMI-NEXT: movl %edx, %eax 1939; X86-NOBMI-NEXT: .LBB28_2: 1940; X86-NOBMI-NEXT: notl %eax 1941; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1942; X86-NOBMI-NEXT: retl 1943; 1944; X86-BMI1-LABEL: bzhi64_32_b3: 1945; X86-BMI1: # %bb.0: 1946; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1947; X86-BMI1-NEXT: movl $-1, %eax 1948; X86-BMI1-NEXT: shll %cl, %eax 1949; X86-BMI1-NEXT: xorl %edx, %edx 1950; X86-BMI1-NEXT: testb $32, %cl 1951; X86-BMI1-NEXT: jne .LBB28_2 1952; X86-BMI1-NEXT: # %bb.1: 1953; X86-BMI1-NEXT: movl %eax, %edx 1954; X86-BMI1-NEXT: .LBB28_2: 1955; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax 1956; X86-BMI1-NEXT: retl 1957; 1958; X86-BMI2-LABEL: bzhi64_32_b3: 1959; X86-BMI2: # %bb.0: 1960; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1961; X86-BMI2-NEXT: xorl %ecx, %ecx 1962; X86-BMI2-NEXT: testb $32, %al 1963; X86-BMI2-NEXT: jne .LBB28_2 1964; X86-BMI2-NEXT: # %bb.1: 1965; X86-BMI2-NEXT: movl $-1, %ecx 1966; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx 1967; X86-BMI2-NEXT: .LBB28_2: 1968; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax 1969; X86-BMI2-NEXT: retl 1970; 1971; X64-NOBMI-LABEL: bzhi64_32_b3: 1972; X64-NOBMI: # %bb.0: 1973; X64-NOBMI-NEXT: movl %esi, %ecx 1974; X64-NOBMI-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 1975; X64-NOBMI-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF 1976; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1977; X64-NOBMI-NEXT: shlq %cl, %rdx 1978; X64-NOBMI-NEXT: xorl %edx, %eax 1979; X64-NOBMI-NEXT: andl %edi, %eax 1980; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 1981; X64-NOBMI-NEXT: retq 1982; 1983; X64-BMI1-LABEL: bzhi64_32_b3: 1984; X64-BMI1: # %bb.0: 1985; X64-BMI1-NEXT: shll $8, %esi 1986; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1987; X64-BMI1-NEXT: retq 1988; 1989; X64-BMI2-LABEL: bzhi64_32_b3: 1990; X64-BMI2: # %bb.0: 1991; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1992; X64-BMI2-NEXT: retq 1993 %widenumlowbits = zext i8 %numlowbits to i64 1994 %notmask = shl nsw i64 4294967295, %widenumlowbits 1995 %mask = xor i64 %notmask, 4294967295 1996 %wideres = and i64 %val, %mask 1997 %res = trunc i64 %wideres to i32 1998 ret i32 %res 1999} 2000 2001; ---------------------------------------------------------------------------- ; 2002; Pattern c. 32-bit 2003; ---------------------------------------------------------------------------- ; 2004 2005define i32 @bzhi32_c0(i32 %val, i32 %numlowbits, ptr %escape) nounwind { 2006; X86-NOBMI-LABEL: bzhi32_c0: 2007; X86-NOBMI: # %bb.0: 2008; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 2009; X86-NOBMI-NEXT: xorl %ecx, %ecx 2010; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2011; X86-NOBMI-NEXT: movl $-1, %eax 2012; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2013; X86-NOBMI-NEXT: shrl %cl, %eax 2014; X86-NOBMI-NEXT: movl %eax, (%edx) 2015; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 2016; X86-NOBMI-NEXT: retl 2017; 2018; X86-BMI1-LABEL: bzhi32_c0: 2019; X86-BMI1: # %bb.0: 2020; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 2021; X86-BMI1-NEXT: xorl %ecx, %ecx 2022; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2023; X86-BMI1-NEXT: movl $-1, %eax 2024; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2025; X86-BMI1-NEXT: shrl %cl, %eax 2026; X86-BMI1-NEXT: movl %eax, (%edx) 2027; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 2028; X86-BMI1-NEXT: retl 2029; 2030; X86-BMI2-LABEL: bzhi32_c0: 2031; X86-BMI2: # %bb.0: 2032; X86-BMI2-NEXT: pushl %esi 2033; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2034; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx 2035; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax 2036; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx 2037; X86-BMI2-NEXT: negb %dl 2038; X86-BMI2-NEXT: movl $-1, %esi 2039; X86-BMI2-NEXT: shrxl %edx, %esi, %edx 2040; X86-BMI2-NEXT: movl %edx, (%ecx) 2041; X86-BMI2-NEXT: popl %esi 2042; X86-BMI2-NEXT: retl 2043; 2044; X64-NOBMI-LABEL: bzhi32_c0: 2045; X64-NOBMI: # %bb.0: 2046; X64-NOBMI-NEXT: movl %esi, %ecx 2047; X64-NOBMI-NEXT: negb %cl 2048; X64-NOBMI-NEXT: movl $-1, %eax 2049; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2050; X64-NOBMI-NEXT: shrl %cl, %eax 2051; X64-NOBMI-NEXT: movl %eax, (%rdx) 2052; X64-NOBMI-NEXT: andl %edi, %eax 2053; X64-NOBMI-NEXT: retq 2054; 2055; X64-BMI1-LABEL: bzhi32_c0: 2056; X64-BMI1: # %bb.0: 2057; X64-BMI1-NEXT: movl %esi, %ecx 2058; X64-BMI1-NEXT: negb %cl 2059; X64-BMI1-NEXT: movl $-1, %eax 2060; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2061; X64-BMI1-NEXT: shrl %cl, %eax 2062; X64-BMI1-NEXT: movl %eax, (%rdx) 2063; X64-BMI1-NEXT: andl %edi, %eax 2064; X64-BMI1-NEXT: retq 2065; 2066; X64-BMI2-LABEL: bzhi32_c0: 2067; X64-BMI2: # %bb.0: 2068; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 2069; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi 2070; X64-BMI2-NEXT: negb %sil 2071; X64-BMI2-NEXT: movl $-1, %ecx 2072; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx 2073; X64-BMI2-NEXT: movl %ecx, (%rdx) 2074; X64-BMI2-NEXT: retq 2075 %numhighbits = sub i32 32, %numlowbits 2076 %mask = lshr i32 -1, %numhighbits 2077 store i32 %mask, ptr %escape 2078 %masked = and i32 %mask, %val 2079 ret i32 %masked 2080} 2081 2082define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr %escape) nounwind { 2083; X86-NOBMI-LABEL: bzhi32_c1_indexzext: 2084; X86-NOBMI: # %bb.0: 2085; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 2086; X86-NOBMI-NEXT: xorl %ecx, %ecx 2087; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2088; X86-NOBMI-NEXT: movl $-1, %eax 2089; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2090; X86-NOBMI-NEXT: shrl %cl, %eax 2091; X86-NOBMI-NEXT: movl %eax, (%edx) 2092; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 2093; X86-NOBMI-NEXT: retl 2094; 2095; X86-BMI1-LABEL: bzhi32_c1_indexzext: 2096; X86-BMI1: # %bb.0: 2097; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 2098; X86-BMI1-NEXT: xorl %ecx, %ecx 2099; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2100; X86-BMI1-NEXT: movl $-1, %eax 2101; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2102; X86-BMI1-NEXT: shrl %cl, %eax 2103; X86-BMI1-NEXT: movl %eax, (%edx) 2104; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 2105; X86-BMI1-NEXT: retl 2106; 2107; X86-BMI2-LABEL: bzhi32_c1_indexzext: 2108; X86-BMI2: # %bb.0: 2109; X86-BMI2-NEXT: pushl %esi 2110; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2111; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx 2112; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax 2113; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx 2114; X86-BMI2-NEXT: negb %dl 2115; X86-BMI2-NEXT: movl $-1, %esi 2116; X86-BMI2-NEXT: shrxl %edx, %esi, %edx 2117; X86-BMI2-NEXT: movl %edx, (%ecx) 2118; X86-BMI2-NEXT: popl %esi 2119; X86-BMI2-NEXT: retl 2120; 2121; X64-NOBMI-LABEL: bzhi32_c1_indexzext: 2122; X64-NOBMI: # %bb.0: 2123; X64-NOBMI-NEXT: movl %esi, %ecx 2124; X64-NOBMI-NEXT: negb %cl 2125; X64-NOBMI-NEXT: movl $-1, %eax 2126; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2127; X64-NOBMI-NEXT: shrl %cl, %eax 2128; X64-NOBMI-NEXT: movl %eax, (%rdx) 2129; X64-NOBMI-NEXT: andl %edi, %eax 2130; X64-NOBMI-NEXT: retq 2131; 2132; X64-BMI1-LABEL: bzhi32_c1_indexzext: 2133; X64-BMI1: # %bb.0: 2134; X64-BMI1-NEXT: movl %esi, %ecx 2135; X64-BMI1-NEXT: negb %cl 2136; X64-BMI1-NEXT: movl $-1, %eax 2137; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2138; X64-BMI1-NEXT: shrl %cl, %eax 2139; X64-BMI1-NEXT: movl %eax, (%rdx) 2140; X64-BMI1-NEXT: andl %edi, %eax 2141; X64-BMI1-NEXT: retq 2142; 2143; X64-BMI2-LABEL: bzhi32_c1_indexzext: 2144; X64-BMI2: # %bb.0: 2145; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 2146; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi 2147; X64-BMI2-NEXT: negb %sil 2148; X64-BMI2-NEXT: movl $-1, %ecx 2149; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx 2150; X64-BMI2-NEXT: movl %ecx, (%rdx) 2151; X64-BMI2-NEXT: retq 2152 %numhighbits = sub i8 32, %numlowbits 2153 %sh_prom = zext i8 %numhighbits to i32 2154 %mask = lshr i32 -1, %sh_prom 2155 store i32 %mask, ptr %escape 2156 %masked = and i32 %mask, %val 2157 ret i32 %masked 2158} 2159 2160define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits, ptr %escape) nounwind { 2161; X86-NOBMI-LABEL: bzhi32_c2_load: 2162; X86-NOBMI: # %bb.0: 2163; X86-NOBMI-NEXT: pushl %esi 2164; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 2165; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 2166; X86-NOBMI-NEXT: xorl %ecx, %ecx 2167; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2168; X86-NOBMI-NEXT: movl $-1, %esi 2169; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2170; X86-NOBMI-NEXT: shrl %cl, %esi 2171; X86-NOBMI-NEXT: movl (%eax), %eax 2172; X86-NOBMI-NEXT: andl %esi, %eax 2173; X86-NOBMI-NEXT: movl %esi, (%edx) 2174; X86-NOBMI-NEXT: popl %esi 2175; X86-NOBMI-NEXT: retl 2176; 2177; X86-BMI1-LABEL: bzhi32_c2_load: 2178; X86-BMI1: # %bb.0: 2179; X86-BMI1-NEXT: pushl %esi 2180; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 2181; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 2182; X86-BMI1-NEXT: xorl %ecx, %ecx 2183; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2184; X86-BMI1-NEXT: movl $-1, %esi 2185; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2186; X86-BMI1-NEXT: shrl %cl, %esi 2187; X86-BMI1-NEXT: movl (%eax), %eax 2188; X86-BMI1-NEXT: andl %esi, %eax 2189; X86-BMI1-NEXT: movl %esi, (%edx) 2190; X86-BMI1-NEXT: popl %esi 2191; X86-BMI1-NEXT: retl 2192; 2193; X86-BMI2-LABEL: bzhi32_c2_load: 2194; X86-BMI2: # %bb.0: 2195; X86-BMI2-NEXT: pushl %esi 2196; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2197; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 2198; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx 2199; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax 2200; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx 2201; X86-BMI2-NEXT: negb %dl 2202; X86-BMI2-NEXT: movl $-1, %esi 2203; X86-BMI2-NEXT: shrxl %edx, %esi, %edx 2204; X86-BMI2-NEXT: movl %edx, (%ecx) 2205; X86-BMI2-NEXT: popl %esi 2206; X86-BMI2-NEXT: retl 2207; 2208; X64-NOBMI-LABEL: bzhi32_c2_load: 2209; X64-NOBMI: # %bb.0: 2210; X64-NOBMI-NEXT: movl %esi, %ecx 2211; X64-NOBMI-NEXT: negb %cl 2212; X64-NOBMI-NEXT: movl $-1, %esi 2213; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2214; X64-NOBMI-NEXT: shrl %cl, %esi 2215; X64-NOBMI-NEXT: movl (%rdi), %eax 2216; X64-NOBMI-NEXT: andl %esi, %eax 2217; X64-NOBMI-NEXT: movl %esi, (%rdx) 2218; X64-NOBMI-NEXT: retq 2219; 2220; X64-BMI1-LABEL: bzhi32_c2_load: 2221; X64-BMI1: # %bb.0: 2222; X64-BMI1-NEXT: movl %esi, %ecx 2223; X64-BMI1-NEXT: negb %cl 2224; X64-BMI1-NEXT: movl $-1, %esi 2225; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2226; X64-BMI1-NEXT: shrl %cl, %esi 2227; X64-BMI1-NEXT: movl (%rdi), %eax 2228; X64-BMI1-NEXT: andl %esi, %eax 2229; X64-BMI1-NEXT: movl %esi, (%rdx) 2230; X64-BMI1-NEXT: retq 2231; 2232; X64-BMI2-LABEL: bzhi32_c2_load: 2233; X64-BMI2: # %bb.0: 2234; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 2235; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi 2236; X64-BMI2-NEXT: negb %sil 2237; X64-BMI2-NEXT: movl $-1, %ecx 2238; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx 2239; X64-BMI2-NEXT: movl %ecx, (%rdx) 2240; X64-BMI2-NEXT: retq 2241 %val = load i32, ptr %w 2242 %numhighbits = sub i32 32, %numlowbits 2243 %mask = lshr i32 -1, %numhighbits 2244 store i32 %mask, ptr %escape 2245 %masked = and i32 %mask, %val 2246 ret i32 %masked 2247} 2248 2249define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwind { 2250; X86-NOBMI-LABEL: bzhi32_c3_load_indexzext: 2251; X86-NOBMI: # %bb.0: 2252; X86-NOBMI-NEXT: pushl %esi 2253; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 2254; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 2255; X86-NOBMI-NEXT: xorl %ecx, %ecx 2256; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2257; X86-NOBMI-NEXT: movl $-1, %esi 2258; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2259; X86-NOBMI-NEXT: shrl %cl, %esi 2260; X86-NOBMI-NEXT: movl (%eax), %eax 2261; X86-NOBMI-NEXT: andl %esi, %eax 2262; X86-NOBMI-NEXT: movl %esi, (%edx) 2263; X86-NOBMI-NEXT: popl %esi 2264; X86-NOBMI-NEXT: retl 2265; 2266; X86-BMI1-LABEL: bzhi32_c3_load_indexzext: 2267; X86-BMI1: # %bb.0: 2268; X86-BMI1-NEXT: pushl %esi 2269; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 2270; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 2271; X86-BMI1-NEXT: xorl %ecx, %ecx 2272; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2273; X86-BMI1-NEXT: movl $-1, %esi 2274; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2275; X86-BMI1-NEXT: shrl %cl, %esi 2276; X86-BMI1-NEXT: movl (%eax), %eax 2277; X86-BMI1-NEXT: andl %esi, %eax 2278; X86-BMI1-NEXT: movl %esi, (%edx) 2279; X86-BMI1-NEXT: popl %esi 2280; X86-BMI1-NEXT: retl 2281; 2282; X86-BMI2-LABEL: bzhi32_c3_load_indexzext: 2283; X86-BMI2: # %bb.0: 2284; X86-BMI2-NEXT: pushl %esi 2285; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2286; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 2287; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx 2288; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax 2289; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx 2290; X86-BMI2-NEXT: negb %dl 2291; X86-BMI2-NEXT: movl $-1, %esi 2292; X86-BMI2-NEXT: shrxl %edx, %esi, %edx 2293; X86-BMI2-NEXT: movl %edx, (%ecx) 2294; X86-BMI2-NEXT: popl %esi 2295; X86-BMI2-NEXT: retl 2296; 2297; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext: 2298; X64-NOBMI: # %bb.0: 2299; X64-NOBMI-NEXT: movl %esi, %ecx 2300; X64-NOBMI-NEXT: negb %cl 2301; X64-NOBMI-NEXT: movl $-1, %esi 2302; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2303; X64-NOBMI-NEXT: shrl %cl, %esi 2304; X64-NOBMI-NEXT: movl (%rdi), %eax 2305; X64-NOBMI-NEXT: andl %esi, %eax 2306; X64-NOBMI-NEXT: movl %esi, (%rdx) 2307; X64-NOBMI-NEXT: retq 2308; 2309; X64-BMI1-LABEL: bzhi32_c3_load_indexzext: 2310; X64-BMI1: # %bb.0: 2311; X64-BMI1-NEXT: movl %esi, %ecx 2312; X64-BMI1-NEXT: negb %cl 2313; X64-BMI1-NEXT: movl $-1, %esi 2314; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2315; X64-BMI1-NEXT: shrl %cl, %esi 2316; X64-BMI1-NEXT: movl (%rdi), %eax 2317; X64-BMI1-NEXT: andl %esi, %eax 2318; X64-BMI1-NEXT: movl %esi, (%rdx) 2319; X64-BMI1-NEXT: retq 2320; 2321; X64-BMI2-LABEL: bzhi32_c3_load_indexzext: 2322; X64-BMI2: # %bb.0: 2323; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 2324; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi 2325; X64-BMI2-NEXT: negb %sil 2326; X64-BMI2-NEXT: movl $-1, %ecx 2327; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx 2328; X64-BMI2-NEXT: movl %ecx, (%rdx) 2329; X64-BMI2-NEXT: retq 2330 %val = load i32, ptr %w 2331 %numhighbits = sub i8 32, %numlowbits 2332 %sh_prom = zext i8 %numhighbits to i32 2333 %mask = lshr i32 -1, %sh_prom 2334 store i32 %mask, ptr %escape 2335 %masked = and i32 %mask, %val 2336 ret i32 %masked 2337} 2338 2339define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, ptr %escape) nounwind { 2340; X86-NOBMI-LABEL: bzhi32_c4_commutative: 2341; X86-NOBMI: # %bb.0: 2342; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 2343; X86-NOBMI-NEXT: xorl %ecx, %ecx 2344; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2345; X86-NOBMI-NEXT: movl $-1, %eax 2346; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2347; X86-NOBMI-NEXT: shrl %cl, %eax 2348; X86-NOBMI-NEXT: movl %eax, (%edx) 2349; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 2350; X86-NOBMI-NEXT: retl 2351; 2352; X86-BMI1-LABEL: bzhi32_c4_commutative: 2353; X86-BMI1: # %bb.0: 2354; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 2355; X86-BMI1-NEXT: xorl %ecx, %ecx 2356; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2357; X86-BMI1-NEXT: movl $-1, %eax 2358; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2359; X86-BMI1-NEXT: shrl %cl, %eax 2360; X86-BMI1-NEXT: movl %eax, (%edx) 2361; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 2362; X86-BMI1-NEXT: retl 2363; 2364; X86-BMI2-LABEL: bzhi32_c4_commutative: 2365; X86-BMI2: # %bb.0: 2366; X86-BMI2-NEXT: pushl %esi 2367; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2368; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx 2369; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax 2370; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx 2371; X86-BMI2-NEXT: negb %dl 2372; X86-BMI2-NEXT: movl $-1, %esi 2373; X86-BMI2-NEXT: shrxl %edx, %esi, %edx 2374; X86-BMI2-NEXT: movl %edx, (%ecx) 2375; X86-BMI2-NEXT: popl %esi 2376; X86-BMI2-NEXT: retl 2377; 2378; X64-NOBMI-LABEL: bzhi32_c4_commutative: 2379; X64-NOBMI: # %bb.0: 2380; X64-NOBMI-NEXT: movl %esi, %ecx 2381; X64-NOBMI-NEXT: negb %cl 2382; X64-NOBMI-NEXT: movl $-1, %eax 2383; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2384; X64-NOBMI-NEXT: shrl %cl, %eax 2385; X64-NOBMI-NEXT: movl %eax, (%rdx) 2386; X64-NOBMI-NEXT: andl %edi, %eax 2387; X64-NOBMI-NEXT: retq 2388; 2389; X64-BMI1-LABEL: bzhi32_c4_commutative: 2390; X64-BMI1: # %bb.0: 2391; X64-BMI1-NEXT: movl %esi, %ecx 2392; X64-BMI1-NEXT: negb %cl 2393; X64-BMI1-NEXT: movl $-1, %eax 2394; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2395; X64-BMI1-NEXT: shrl %cl, %eax 2396; X64-BMI1-NEXT: movl %eax, (%rdx) 2397; X64-BMI1-NEXT: andl %edi, %eax 2398; X64-BMI1-NEXT: retq 2399; 2400; X64-BMI2-LABEL: bzhi32_c4_commutative: 2401; X64-BMI2: # %bb.0: 2402; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 2403; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi 2404; X64-BMI2-NEXT: negb %sil 2405; X64-BMI2-NEXT: movl $-1, %ecx 2406; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx 2407; X64-BMI2-NEXT: movl %ecx, (%rdx) 2408; X64-BMI2-NEXT: retq 2409 %numhighbits = sub i32 32, %numlowbits 2410 %mask = lshr i32 -1, %numhighbits 2411 store i32 %mask, ptr %escape 2412 %masked = and i32 %val, %mask ; swapped order 2413 ret i32 %masked 2414} 2415 2416; 64-bit 2417 2418define i64 @bzhi64_c0(i64 %val, i64 %numlowbits, ptr %escape) nounwind { 2419; X86-NOBMI-LABEL: bzhi64_c0: 2420; X86-NOBMI: # %bb.0: 2421; X86-NOBMI-NEXT: pushl %esi 2422; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 2423; X86-NOBMI-NEXT: movb $64, %cl 2424; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2425; X86-NOBMI-NEXT: movl $-1, %eax 2426; X86-NOBMI-NEXT: movl $-1, %edx 2427; X86-NOBMI-NEXT: shrl %cl, %edx 2428; X86-NOBMI-NEXT: testb $32, %cl 2429; X86-NOBMI-NEXT: je .LBB34_2 2430; X86-NOBMI-NEXT: # %bb.1: 2431; X86-NOBMI-NEXT: movl %edx, %eax 2432; X86-NOBMI-NEXT: xorl %edx, %edx 2433; X86-NOBMI-NEXT: .LBB34_2: 2434; X86-NOBMI-NEXT: movl %edx, 4(%esi) 2435; X86-NOBMI-NEXT: movl %eax, (%esi) 2436; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 2437; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 2438; X86-NOBMI-NEXT: popl %esi 2439; X86-NOBMI-NEXT: retl 2440; 2441; X86-BMI1-LABEL: bzhi64_c0: 2442; X86-BMI1: # %bb.0: 2443; X86-BMI1-NEXT: pushl %esi 2444; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 2445; X86-BMI1-NEXT: movb $64, %cl 2446; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2447; X86-BMI1-NEXT: movl $-1, %eax 2448; X86-BMI1-NEXT: movl $-1, %edx 2449; X86-BMI1-NEXT: shrl %cl, %edx 2450; X86-BMI1-NEXT: testb $32, %cl 2451; X86-BMI1-NEXT: je .LBB34_2 2452; X86-BMI1-NEXT: # %bb.1: 2453; X86-BMI1-NEXT: movl %edx, %eax 2454; X86-BMI1-NEXT: xorl %edx, %edx 2455; X86-BMI1-NEXT: .LBB34_2: 2456; X86-BMI1-NEXT: movl %edx, 4(%esi) 2457; X86-BMI1-NEXT: movl %eax, (%esi) 2458; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 2459; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 2460; X86-BMI1-NEXT: popl %esi 2461; X86-BMI1-NEXT: retl 2462; 2463; X86-BMI2-LABEL: bzhi64_c0: 2464; X86-BMI2: # %bb.0: 2465; X86-BMI2-NEXT: pushl %ebx 2466; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2467; X86-BMI2-NEXT: movb $64, %bl 2468; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl 2469; X86-BMI2-NEXT: movl $-1, %eax 2470; X86-BMI2-NEXT: shrxl %ebx, %eax, %edx 2471; X86-BMI2-NEXT: testb $32, %bl 2472; X86-BMI2-NEXT: je .LBB34_2 2473; X86-BMI2-NEXT: # %bb.1: 2474; X86-BMI2-NEXT: movl %edx, %eax 2475; X86-BMI2-NEXT: xorl %edx, %edx 2476; X86-BMI2-NEXT: .LBB34_2: 2477; X86-BMI2-NEXT: movl %edx, 4(%ecx) 2478; X86-BMI2-NEXT: movl %eax, (%ecx) 2479; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 2480; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 2481; X86-BMI2-NEXT: popl %ebx 2482; X86-BMI2-NEXT: retl 2483; 2484; X64-NOBMI-LABEL: bzhi64_c0: 2485; X64-NOBMI: # %bb.0: 2486; X64-NOBMI-NEXT: movq %rsi, %rcx 2487; X64-NOBMI-NEXT: negb %cl 2488; X64-NOBMI-NEXT: movq $-1, %rax 2489; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 2490; X64-NOBMI-NEXT: shrq %cl, %rax 2491; X64-NOBMI-NEXT: movq %rax, (%rdx) 2492; X64-NOBMI-NEXT: andq %rdi, %rax 2493; X64-NOBMI-NEXT: retq 2494; 2495; X64-BMI1-LABEL: bzhi64_c0: 2496; X64-BMI1: # %bb.0: 2497; X64-BMI1-NEXT: movq %rsi, %rcx 2498; X64-BMI1-NEXT: negb %cl 2499; X64-BMI1-NEXT: movq $-1, %rax 2500; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx 2501; X64-BMI1-NEXT: shrq %cl, %rax 2502; X64-BMI1-NEXT: movq %rax, (%rdx) 2503; X64-BMI1-NEXT: andq %rdi, %rax 2504; X64-BMI1-NEXT: retq 2505; 2506; X64-BMI2-LABEL: bzhi64_c0: 2507; X64-BMI2: # %bb.0: 2508; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 2509; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi 2510; X64-BMI2-NEXT: negb %sil 2511; X64-BMI2-NEXT: movq $-1, %rcx 2512; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx 2513; X64-BMI2-NEXT: movq %rcx, (%rdx) 2514; X64-BMI2-NEXT: retq 2515 %numhighbits = sub i64 64, %numlowbits 2516 %mask = lshr i64 -1, %numhighbits 2517 store i64 %mask, ptr %escape 2518 %masked = and i64 %mask, %val 2519 ret i64 %masked 2520} 2521 2522define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits, ptr %escape) nounwind { 2523; X86-NOBMI-LABEL: bzhi64_c1_indexzext: 2524; X86-NOBMI: # %bb.0: 2525; X86-NOBMI-NEXT: pushl %esi 2526; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 2527; X86-NOBMI-NEXT: movb $64, %cl 2528; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2529; X86-NOBMI-NEXT: movl $-1, %eax 2530; X86-NOBMI-NEXT: movl $-1, %edx 2531; X86-NOBMI-NEXT: shrl %cl, %edx 2532; X86-NOBMI-NEXT: testb $32, %cl 2533; X86-NOBMI-NEXT: je .LBB35_2 2534; X86-NOBMI-NEXT: # %bb.1: 2535; X86-NOBMI-NEXT: movl %edx, %eax 2536; X86-NOBMI-NEXT: xorl %edx, %edx 2537; X86-NOBMI-NEXT: .LBB35_2: 2538; X86-NOBMI-NEXT: movl %edx, 4(%esi) 2539; X86-NOBMI-NEXT: movl %eax, (%esi) 2540; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 2541; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 2542; X86-NOBMI-NEXT: popl %esi 2543; X86-NOBMI-NEXT: retl 2544; 2545; X86-BMI1-LABEL: bzhi64_c1_indexzext: 2546; X86-BMI1: # %bb.0: 2547; X86-BMI1-NEXT: pushl %esi 2548; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 2549; X86-BMI1-NEXT: movb $64, %cl 2550; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2551; X86-BMI1-NEXT: movl $-1, %eax 2552; X86-BMI1-NEXT: movl $-1, %edx 2553; X86-BMI1-NEXT: shrl %cl, %edx 2554; X86-BMI1-NEXT: testb $32, %cl 2555; X86-BMI1-NEXT: je .LBB35_2 2556; X86-BMI1-NEXT: # %bb.1: 2557; X86-BMI1-NEXT: movl %edx, %eax 2558; X86-BMI1-NEXT: xorl %edx, %edx 2559; X86-BMI1-NEXT: .LBB35_2: 2560; X86-BMI1-NEXT: movl %edx, 4(%esi) 2561; X86-BMI1-NEXT: movl %eax, (%esi) 2562; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 2563; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 2564; X86-BMI1-NEXT: popl %esi 2565; X86-BMI1-NEXT: retl 2566; 2567; X86-BMI2-LABEL: bzhi64_c1_indexzext: 2568; X86-BMI2: # %bb.0: 2569; X86-BMI2-NEXT: pushl %ebx 2570; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2571; X86-BMI2-NEXT: movb $64, %bl 2572; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl 2573; X86-BMI2-NEXT: movl $-1, %eax 2574; X86-BMI2-NEXT: shrxl %ebx, %eax, %edx 2575; X86-BMI2-NEXT: testb $32, %bl 2576; X86-BMI2-NEXT: je .LBB35_2 2577; X86-BMI2-NEXT: # %bb.1: 2578; X86-BMI2-NEXT: movl %edx, %eax 2579; X86-BMI2-NEXT: xorl %edx, %edx 2580; X86-BMI2-NEXT: .LBB35_2: 2581; X86-BMI2-NEXT: movl %edx, 4(%ecx) 2582; X86-BMI2-NEXT: movl %eax, (%ecx) 2583; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 2584; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 2585; X86-BMI2-NEXT: popl %ebx 2586; X86-BMI2-NEXT: retl 2587; 2588; X64-NOBMI-LABEL: bzhi64_c1_indexzext: 2589; X64-NOBMI: # %bb.0: 2590; X64-NOBMI-NEXT: movl %esi, %ecx 2591; X64-NOBMI-NEXT: negb %cl 2592; X64-NOBMI-NEXT: movq $-1, %rax 2593; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2594; X64-NOBMI-NEXT: shrq %cl, %rax 2595; X64-NOBMI-NEXT: movq %rax, (%rdx) 2596; X64-NOBMI-NEXT: andq %rdi, %rax 2597; X64-NOBMI-NEXT: retq 2598; 2599; X64-BMI1-LABEL: bzhi64_c1_indexzext: 2600; X64-BMI1: # %bb.0: 2601; X64-BMI1-NEXT: movl %esi, %ecx 2602; X64-BMI1-NEXT: negb %cl 2603; X64-BMI1-NEXT: movq $-1, %rax 2604; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2605; X64-BMI1-NEXT: shrq %cl, %rax 2606; X64-BMI1-NEXT: movq %rax, (%rdx) 2607; X64-BMI1-NEXT: andq %rdi, %rax 2608; X64-BMI1-NEXT: retq 2609; 2610; X64-BMI2-LABEL: bzhi64_c1_indexzext: 2611; X64-BMI2: # %bb.0: 2612; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 2613; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 2614; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi 2615; X64-BMI2-NEXT: negb %sil 2616; X64-BMI2-NEXT: movq $-1, %rcx 2617; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx 2618; X64-BMI2-NEXT: movq %rcx, (%rdx) 2619; X64-BMI2-NEXT: retq 2620 %numhighbits = sub i8 64, %numlowbits 2621 %sh_prom = zext i8 %numhighbits to i64 2622 %mask = lshr i64 -1, %sh_prom 2623 store i64 %mask, ptr %escape 2624 %masked = and i64 %mask, %val 2625 ret i64 %masked 2626} 2627 2628define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits, ptr %escape) nounwind { 2629; X86-NOBMI-LABEL: bzhi64_c2_load: 2630; X86-NOBMI: # %bb.0: 2631; X86-NOBMI-NEXT: pushl %ebx 2632; X86-NOBMI-NEXT: pushl %edi 2633; X86-NOBMI-NEXT: pushl %esi 2634; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 2635; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 2636; X86-NOBMI-NEXT: movb $64, %cl 2637; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2638; X86-NOBMI-NEXT: movl $-1, %edi 2639; X86-NOBMI-NEXT: movl $-1, %ebx 2640; X86-NOBMI-NEXT: shrl %cl, %ebx 2641; X86-NOBMI-NEXT: testb $32, %cl 2642; X86-NOBMI-NEXT: je .LBB36_2 2643; X86-NOBMI-NEXT: # %bb.1: 2644; X86-NOBMI-NEXT: movl %ebx, %edi 2645; X86-NOBMI-NEXT: xorl %ebx, %ebx 2646; X86-NOBMI-NEXT: .LBB36_2: 2647; X86-NOBMI-NEXT: movl 4(%eax), %edx 2648; X86-NOBMI-NEXT: andl %ebx, %edx 2649; X86-NOBMI-NEXT: movl (%eax), %eax 2650; X86-NOBMI-NEXT: andl %edi, %eax 2651; X86-NOBMI-NEXT: movl %ebx, 4(%esi) 2652; X86-NOBMI-NEXT: movl %edi, (%esi) 2653; X86-NOBMI-NEXT: popl %esi 2654; X86-NOBMI-NEXT: popl %edi 2655; X86-NOBMI-NEXT: popl %ebx 2656; X86-NOBMI-NEXT: retl 2657; 2658; X86-BMI1-LABEL: bzhi64_c2_load: 2659; X86-BMI1: # %bb.0: 2660; X86-BMI1-NEXT: pushl %ebx 2661; X86-BMI1-NEXT: pushl %edi 2662; X86-BMI1-NEXT: pushl %esi 2663; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 2664; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 2665; X86-BMI1-NEXT: movb $64, %cl 2666; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2667; X86-BMI1-NEXT: movl $-1, %edi 2668; X86-BMI1-NEXT: movl $-1, %ebx 2669; X86-BMI1-NEXT: shrl %cl, %ebx 2670; X86-BMI1-NEXT: testb $32, %cl 2671; X86-BMI1-NEXT: je .LBB36_2 2672; X86-BMI1-NEXT: # %bb.1: 2673; X86-BMI1-NEXT: movl %ebx, %edi 2674; X86-BMI1-NEXT: xorl %ebx, %ebx 2675; X86-BMI1-NEXT: .LBB36_2: 2676; X86-BMI1-NEXT: movl 4(%eax), %edx 2677; X86-BMI1-NEXT: andl %ebx, %edx 2678; X86-BMI1-NEXT: movl (%eax), %eax 2679; X86-BMI1-NEXT: andl %edi, %eax 2680; X86-BMI1-NEXT: movl %ebx, 4(%esi) 2681; X86-BMI1-NEXT: movl %edi, (%esi) 2682; X86-BMI1-NEXT: popl %esi 2683; X86-BMI1-NEXT: popl %edi 2684; X86-BMI1-NEXT: popl %ebx 2685; X86-BMI1-NEXT: retl 2686; 2687; X86-BMI2-LABEL: bzhi64_c2_load: 2688; X86-BMI2: # %bb.0: 2689; X86-BMI2-NEXT: pushl %edi 2690; X86-BMI2-NEXT: pushl %esi 2691; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2692; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 2693; X86-BMI2-NEXT: movb $64, %dl 2694; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %dl 2695; X86-BMI2-NEXT: movl $-1, %esi 2696; X86-BMI2-NEXT: shrxl %edx, %esi, %edi 2697; X86-BMI2-NEXT: testb $32, %dl 2698; X86-BMI2-NEXT: je .LBB36_2 2699; X86-BMI2-NEXT: # %bb.1: 2700; X86-BMI2-NEXT: movl %edi, %esi 2701; X86-BMI2-NEXT: xorl %edi, %edi 2702; X86-BMI2-NEXT: .LBB36_2: 2703; X86-BMI2-NEXT: movl 4(%eax), %edx 2704; X86-BMI2-NEXT: andl %edi, %edx 2705; X86-BMI2-NEXT: movl (%eax), %eax 2706; X86-BMI2-NEXT: andl %esi, %eax 2707; X86-BMI2-NEXT: movl %edi, 4(%ecx) 2708; X86-BMI2-NEXT: movl %esi, (%ecx) 2709; X86-BMI2-NEXT: popl %esi 2710; X86-BMI2-NEXT: popl %edi 2711; X86-BMI2-NEXT: retl 2712; 2713; X64-NOBMI-LABEL: bzhi64_c2_load: 2714; X64-NOBMI: # %bb.0: 2715; X64-NOBMI-NEXT: movq %rsi, %rcx 2716; X64-NOBMI-NEXT: negb %cl 2717; X64-NOBMI-NEXT: movq $-1, %rsi 2718; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 2719; X64-NOBMI-NEXT: shrq %cl, %rsi 2720; X64-NOBMI-NEXT: movq (%rdi), %rax 2721; X64-NOBMI-NEXT: andq %rsi, %rax 2722; X64-NOBMI-NEXT: movq %rsi, (%rdx) 2723; X64-NOBMI-NEXT: retq 2724; 2725; X64-BMI1-LABEL: bzhi64_c2_load: 2726; X64-BMI1: # %bb.0: 2727; X64-BMI1-NEXT: movq %rsi, %rcx 2728; X64-BMI1-NEXT: negb %cl 2729; X64-BMI1-NEXT: movq $-1, %rsi 2730; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx 2731; X64-BMI1-NEXT: shrq %cl, %rsi 2732; X64-BMI1-NEXT: movq (%rdi), %rax 2733; X64-BMI1-NEXT: andq %rsi, %rax 2734; X64-BMI1-NEXT: movq %rsi, (%rdx) 2735; X64-BMI1-NEXT: retq 2736; 2737; X64-BMI2-LABEL: bzhi64_c2_load: 2738; X64-BMI2: # %bb.0: 2739; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 2740; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi 2741; X64-BMI2-NEXT: negb %sil 2742; X64-BMI2-NEXT: movq $-1, %rcx 2743; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx 2744; X64-BMI2-NEXT: movq %rcx, (%rdx) 2745; X64-BMI2-NEXT: retq 2746 %val = load i64, ptr %w 2747 %numhighbits = sub i64 64, %numlowbits 2748 %mask = lshr i64 -1, %numhighbits 2749 store i64 %mask, ptr %escape 2750 %masked = and i64 %mask, %val 2751 ret i64 %masked 2752} 2753 2754define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwind { 2755; X86-NOBMI-LABEL: bzhi64_c3_load_indexzext: 2756; X86-NOBMI: # %bb.0: 2757; X86-NOBMI-NEXT: pushl %ebx 2758; X86-NOBMI-NEXT: pushl %edi 2759; X86-NOBMI-NEXT: pushl %esi 2760; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 2761; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 2762; X86-NOBMI-NEXT: movb $64, %cl 2763; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2764; X86-NOBMI-NEXT: movl $-1, %edi 2765; X86-NOBMI-NEXT: movl $-1, %ebx 2766; X86-NOBMI-NEXT: shrl %cl, %ebx 2767; X86-NOBMI-NEXT: testb $32, %cl 2768; X86-NOBMI-NEXT: je .LBB37_2 2769; X86-NOBMI-NEXT: # %bb.1: 2770; X86-NOBMI-NEXT: movl %ebx, %edi 2771; X86-NOBMI-NEXT: xorl %ebx, %ebx 2772; X86-NOBMI-NEXT: .LBB37_2: 2773; X86-NOBMI-NEXT: movl 4(%eax), %edx 2774; X86-NOBMI-NEXT: andl %ebx, %edx 2775; X86-NOBMI-NEXT: movl (%eax), %eax 2776; X86-NOBMI-NEXT: andl %edi, %eax 2777; X86-NOBMI-NEXT: movl %ebx, 4(%esi) 2778; X86-NOBMI-NEXT: movl %edi, (%esi) 2779; X86-NOBMI-NEXT: popl %esi 2780; X86-NOBMI-NEXT: popl %edi 2781; X86-NOBMI-NEXT: popl %ebx 2782; X86-NOBMI-NEXT: retl 2783; 2784; X86-BMI1-LABEL: bzhi64_c3_load_indexzext: 2785; X86-BMI1: # %bb.0: 2786; X86-BMI1-NEXT: pushl %ebx 2787; X86-BMI1-NEXT: pushl %edi 2788; X86-BMI1-NEXT: pushl %esi 2789; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 2790; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 2791; X86-BMI1-NEXT: movb $64, %cl 2792; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2793; X86-BMI1-NEXT: movl $-1, %edi 2794; X86-BMI1-NEXT: movl $-1, %ebx 2795; X86-BMI1-NEXT: shrl %cl, %ebx 2796; X86-BMI1-NEXT: testb $32, %cl 2797; X86-BMI1-NEXT: je .LBB37_2 2798; X86-BMI1-NEXT: # %bb.1: 2799; X86-BMI1-NEXT: movl %ebx, %edi 2800; X86-BMI1-NEXT: xorl %ebx, %ebx 2801; X86-BMI1-NEXT: .LBB37_2: 2802; X86-BMI1-NEXT: movl 4(%eax), %edx 2803; X86-BMI1-NEXT: andl %ebx, %edx 2804; X86-BMI1-NEXT: movl (%eax), %eax 2805; X86-BMI1-NEXT: andl %edi, %eax 2806; X86-BMI1-NEXT: movl %ebx, 4(%esi) 2807; X86-BMI1-NEXT: movl %edi, (%esi) 2808; X86-BMI1-NEXT: popl %esi 2809; X86-BMI1-NEXT: popl %edi 2810; X86-BMI1-NEXT: popl %ebx 2811; X86-BMI1-NEXT: retl 2812; 2813; X86-BMI2-LABEL: bzhi64_c3_load_indexzext: 2814; X86-BMI2: # %bb.0: 2815; X86-BMI2-NEXT: pushl %edi 2816; X86-BMI2-NEXT: pushl %esi 2817; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2818; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 2819; X86-BMI2-NEXT: movb $64, %dl 2820; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %dl 2821; X86-BMI2-NEXT: movl $-1, %esi 2822; X86-BMI2-NEXT: shrxl %edx, %esi, %edi 2823; X86-BMI2-NEXT: testb $32, %dl 2824; X86-BMI2-NEXT: je .LBB37_2 2825; X86-BMI2-NEXT: # %bb.1: 2826; X86-BMI2-NEXT: movl %edi, %esi 2827; X86-BMI2-NEXT: xorl %edi, %edi 2828; X86-BMI2-NEXT: .LBB37_2: 2829; X86-BMI2-NEXT: movl 4(%eax), %edx 2830; X86-BMI2-NEXT: andl %edi, %edx 2831; X86-BMI2-NEXT: movl (%eax), %eax 2832; X86-BMI2-NEXT: andl %esi, %eax 2833; X86-BMI2-NEXT: movl %edi, 4(%ecx) 2834; X86-BMI2-NEXT: movl %esi, (%ecx) 2835; X86-BMI2-NEXT: popl %esi 2836; X86-BMI2-NEXT: popl %edi 2837; X86-BMI2-NEXT: retl 2838; 2839; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext: 2840; X64-NOBMI: # %bb.0: 2841; X64-NOBMI-NEXT: movl %esi, %ecx 2842; X64-NOBMI-NEXT: negb %cl 2843; X64-NOBMI-NEXT: movq $-1, %rsi 2844; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2845; X64-NOBMI-NEXT: shrq %cl, %rsi 2846; X64-NOBMI-NEXT: movq (%rdi), %rax 2847; X64-NOBMI-NEXT: andq %rsi, %rax 2848; X64-NOBMI-NEXT: movq %rsi, (%rdx) 2849; X64-NOBMI-NEXT: retq 2850; 2851; X64-BMI1-LABEL: bzhi64_c3_load_indexzext: 2852; X64-BMI1: # %bb.0: 2853; X64-BMI1-NEXT: movl %esi, %ecx 2854; X64-BMI1-NEXT: negb %cl 2855; X64-BMI1-NEXT: movq $-1, %rsi 2856; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2857; X64-BMI1-NEXT: shrq %cl, %rsi 2858; X64-BMI1-NEXT: movq (%rdi), %rax 2859; X64-BMI1-NEXT: andq %rsi, %rax 2860; X64-BMI1-NEXT: movq %rsi, (%rdx) 2861; X64-BMI1-NEXT: retq 2862; 2863; X64-BMI2-LABEL: bzhi64_c3_load_indexzext: 2864; X64-BMI2: # %bb.0: 2865; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 2866; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 2867; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi 2868; X64-BMI2-NEXT: negb %sil 2869; X64-BMI2-NEXT: movq $-1, %rcx 2870; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx 2871; X64-BMI2-NEXT: movq %rcx, (%rdx) 2872; X64-BMI2-NEXT: retq 2873 %val = load i64, ptr %w 2874 %numhighbits = sub i8 64, %numlowbits 2875 %sh_prom = zext i8 %numhighbits to i64 2876 %mask = lshr i64 -1, %sh_prom 2877 store i64 %mask, ptr %escape 2878 %masked = and i64 %mask, %val 2879 ret i64 %masked 2880} 2881 2882define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits, ptr %escape) nounwind { 2883; X86-NOBMI-LABEL: bzhi64_c4_commutative: 2884; X86-NOBMI: # %bb.0: 2885; X86-NOBMI-NEXT: pushl %esi 2886; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 2887; X86-NOBMI-NEXT: movb $64, %cl 2888; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2889; X86-NOBMI-NEXT: movl $-1, %eax 2890; X86-NOBMI-NEXT: movl $-1, %edx 2891; X86-NOBMI-NEXT: shrl %cl, %edx 2892; X86-NOBMI-NEXT: testb $32, %cl 2893; X86-NOBMI-NEXT: je .LBB38_2 2894; X86-NOBMI-NEXT: # %bb.1: 2895; X86-NOBMI-NEXT: movl %edx, %eax 2896; X86-NOBMI-NEXT: xorl %edx, %edx 2897; X86-NOBMI-NEXT: .LBB38_2: 2898; X86-NOBMI-NEXT: movl %edx, 4(%esi) 2899; X86-NOBMI-NEXT: movl %eax, (%esi) 2900; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 2901; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 2902; X86-NOBMI-NEXT: popl %esi 2903; X86-NOBMI-NEXT: retl 2904; 2905; X86-BMI1-LABEL: bzhi64_c4_commutative: 2906; X86-BMI1: # %bb.0: 2907; X86-BMI1-NEXT: pushl %esi 2908; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 2909; X86-BMI1-NEXT: movb $64, %cl 2910; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2911; X86-BMI1-NEXT: movl $-1, %eax 2912; X86-BMI1-NEXT: movl $-1, %edx 2913; X86-BMI1-NEXT: shrl %cl, %edx 2914; X86-BMI1-NEXT: testb $32, %cl 2915; X86-BMI1-NEXT: je .LBB38_2 2916; X86-BMI1-NEXT: # %bb.1: 2917; X86-BMI1-NEXT: movl %edx, %eax 2918; X86-BMI1-NEXT: xorl %edx, %edx 2919; X86-BMI1-NEXT: .LBB38_2: 2920; X86-BMI1-NEXT: movl %edx, 4(%esi) 2921; X86-BMI1-NEXT: movl %eax, (%esi) 2922; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 2923; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 2924; X86-BMI1-NEXT: popl %esi 2925; X86-BMI1-NEXT: retl 2926; 2927; X86-BMI2-LABEL: bzhi64_c4_commutative: 2928; X86-BMI2: # %bb.0: 2929; X86-BMI2-NEXT: pushl %ebx 2930; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2931; X86-BMI2-NEXT: movb $64, %bl 2932; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl 2933; X86-BMI2-NEXT: movl $-1, %eax 2934; X86-BMI2-NEXT: shrxl %ebx, %eax, %edx 2935; X86-BMI2-NEXT: testb $32, %bl 2936; X86-BMI2-NEXT: je .LBB38_2 2937; X86-BMI2-NEXT: # %bb.1: 2938; X86-BMI2-NEXT: movl %edx, %eax 2939; X86-BMI2-NEXT: xorl %edx, %edx 2940; X86-BMI2-NEXT: .LBB38_2: 2941; X86-BMI2-NEXT: movl %edx, 4(%ecx) 2942; X86-BMI2-NEXT: movl %eax, (%ecx) 2943; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 2944; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 2945; X86-BMI2-NEXT: popl %ebx 2946; X86-BMI2-NEXT: retl 2947; 2948; X64-NOBMI-LABEL: bzhi64_c4_commutative: 2949; X64-NOBMI: # %bb.0: 2950; X64-NOBMI-NEXT: movq %rsi, %rcx 2951; X64-NOBMI-NEXT: negb %cl 2952; X64-NOBMI-NEXT: movq $-1, %rax 2953; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 2954; X64-NOBMI-NEXT: shrq %cl, %rax 2955; X64-NOBMI-NEXT: movq %rax, (%rdx) 2956; X64-NOBMI-NEXT: andq %rdi, %rax 2957; X64-NOBMI-NEXT: retq 2958; 2959; X64-BMI1-LABEL: bzhi64_c4_commutative: 2960; X64-BMI1: # %bb.0: 2961; X64-BMI1-NEXT: movq %rsi, %rcx 2962; X64-BMI1-NEXT: negb %cl 2963; X64-BMI1-NEXT: movq $-1, %rax 2964; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx 2965; X64-BMI1-NEXT: shrq %cl, %rax 2966; X64-BMI1-NEXT: movq %rax, (%rdx) 2967; X64-BMI1-NEXT: andq %rdi, %rax 2968; X64-BMI1-NEXT: retq 2969; 2970; X64-BMI2-LABEL: bzhi64_c4_commutative: 2971; X64-BMI2: # %bb.0: 2972; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 2973; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi 2974; X64-BMI2-NEXT: negb %sil 2975; X64-BMI2-NEXT: movq $-1, %rcx 2976; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx 2977; X64-BMI2-NEXT: movq %rcx, (%rdx) 2978; X64-BMI2-NEXT: retq 2979 %numhighbits = sub i64 64, %numlowbits 2980 %mask = lshr i64 -1, %numhighbits 2981 store i64 %mask, ptr %escape 2982 %masked = and i64 %val, %mask ; swapped order 2983 ret i64 %masked 2984} 2985 2986; 64-bit, but with 32-bit output 2987 2988; Everything done in 64-bit, truncation happens last. 2989define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind { 2990; X86-NOBMI-LABEL: bzhi64_32_c0: 2991; X86-NOBMI: # %bb.0: 2992; X86-NOBMI-NEXT: movb $64, %cl 2993; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2994; X86-NOBMI-NEXT: movl $-1, %eax 2995; X86-NOBMI-NEXT: shrl %cl, %eax 2996; X86-NOBMI-NEXT: testb $32, %cl 2997; X86-NOBMI-NEXT: jne .LBB39_2 2998; X86-NOBMI-NEXT: # %bb.1: 2999; X86-NOBMI-NEXT: movl $-1, %eax 3000; X86-NOBMI-NEXT: .LBB39_2: 3001; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 3002; X86-NOBMI-NEXT: retl 3003; 3004; X86-BMI1-LABEL: bzhi64_32_c0: 3005; X86-BMI1: # %bb.0: 3006; X86-BMI1-NEXT: movb $64, %cl 3007; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 3008; X86-BMI1-NEXT: movl $-1, %eax 3009; X86-BMI1-NEXT: shrl %cl, %eax 3010; X86-BMI1-NEXT: testb $32, %cl 3011; X86-BMI1-NEXT: jne .LBB39_2 3012; X86-BMI1-NEXT: # %bb.1: 3013; X86-BMI1-NEXT: movl $-1, %eax 3014; X86-BMI1-NEXT: .LBB39_2: 3015; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 3016; X86-BMI1-NEXT: retl 3017; 3018; X86-BMI2-LABEL: bzhi64_32_c0: 3019; X86-BMI2: # %bb.0: 3020; X86-BMI2-NEXT: movb $64, %cl 3021; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 3022; X86-BMI2-NEXT: movl $-1, %eax 3023; X86-BMI2-NEXT: testb $32, %cl 3024; X86-BMI2-NEXT: je .LBB39_2 3025; X86-BMI2-NEXT: # %bb.1: 3026; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 3027; X86-BMI2-NEXT: .LBB39_2: 3028; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 3029; X86-BMI2-NEXT: retl 3030; 3031; X64-NOBMI-LABEL: bzhi64_32_c0: 3032; X64-NOBMI: # %bb.0: 3033; X64-NOBMI-NEXT: movq %rsi, %rcx 3034; X64-NOBMI-NEXT: negb %cl 3035; X64-NOBMI-NEXT: movq $-1, %rax 3036; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3037; X64-NOBMI-NEXT: shrq %cl, %rax 3038; X64-NOBMI-NEXT: andl %edi, %eax 3039; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 3040; X64-NOBMI-NEXT: retq 3041; 3042; X64-BMI1-LABEL: bzhi64_32_c0: 3043; X64-BMI1: # %bb.0: 3044; X64-BMI1-NEXT: shll $8, %esi 3045; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 3046; X64-BMI1-NEXT: retq 3047; 3048; X64-BMI2-LABEL: bzhi64_32_c0: 3049; X64-BMI2: # %bb.0: 3050; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 3051; X64-BMI2-NEXT: retq 3052 %numhighbits = sub i64 64, %numlowbits 3053 %mask = lshr i64 -1, %numhighbits 3054 %masked = and i64 %mask, %val 3055 %res = trunc i64 %masked to i32 3056 ret i32 %res 3057} 3058 3059; Shifting happens in 64-bit, then truncation. Masking is 32-bit. 3060define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind { 3061; X86-NOBMI-LABEL: bzhi64_32_c1: 3062; X86-NOBMI: # %bb.0: 3063; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3064; X86-NOBMI-NEXT: xorl %ecx, %ecx 3065; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3066; X86-NOBMI-NEXT: shll %cl, %eax 3067; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3068; X86-NOBMI-NEXT: shrl %cl, %eax 3069; X86-NOBMI-NEXT: retl 3070; 3071; X86-BMI1-LABEL: bzhi64_32_c1: 3072; X86-BMI1: # %bb.0: 3073; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3074; X86-BMI1-NEXT: shll $8, %eax 3075; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 3076; X86-BMI1-NEXT: retl 3077; 3078; X86-BMI2-LABEL: bzhi64_32_c1: 3079; X86-BMI2: # %bb.0: 3080; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3081; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 3082; X86-BMI2-NEXT: retl 3083; 3084; X64-NOBMI-LABEL: bzhi64_32_c1: 3085; X64-NOBMI: # %bb.0: 3086; X64-NOBMI-NEXT: movl %esi, %ecx 3087; X64-NOBMI-NEXT: movq %rdi, %rax 3088; X64-NOBMI-NEXT: negb %cl 3089; X64-NOBMI-NEXT: shll %cl, %eax 3090; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3091; X64-NOBMI-NEXT: shrl %cl, %eax 3092; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 3093; X64-NOBMI-NEXT: retq 3094; 3095; X64-BMI1-LABEL: bzhi64_32_c1: 3096; X64-BMI1: # %bb.0: 3097; X64-BMI1-NEXT: shll $8, %esi 3098; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 3099; X64-BMI1-NEXT: retq 3100; 3101; X64-BMI2-LABEL: bzhi64_32_c1: 3102; X64-BMI2: # %bb.0: 3103; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 3104; X64-BMI2-NEXT: retq 3105 %truncval = trunc i64 %val to i32 3106 %numhighbits = sub i32 32, %numlowbits 3107 %mask = lshr i32 -1, %numhighbits 3108 %masked = and i32 %mask, %truncval 3109 ret i32 %masked 3110} 3111 3112; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit. 3113; Masking is 64-bit. Then truncation. 3114define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind { 3115; X86-NOBMI-LABEL: bzhi64_32_c2: 3116; X86-NOBMI: # %bb.0: 3117; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3118; X86-NOBMI-NEXT: xorl %ecx, %ecx 3119; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3120; X86-NOBMI-NEXT: shll %cl, %eax 3121; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3122; X86-NOBMI-NEXT: shrl %cl, %eax 3123; X86-NOBMI-NEXT: retl 3124; 3125; X86-BMI1-LABEL: bzhi64_32_c2: 3126; X86-BMI1: # %bb.0: 3127; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3128; X86-BMI1-NEXT: shll $8, %eax 3129; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 3130; X86-BMI1-NEXT: retl 3131; 3132; X86-BMI2-LABEL: bzhi64_32_c2: 3133; X86-BMI2: # %bb.0: 3134; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3135; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 3136; X86-BMI2-NEXT: retl 3137; 3138; X64-NOBMI-LABEL: bzhi64_32_c2: 3139; X64-NOBMI: # %bb.0: 3140; X64-NOBMI-NEXT: movl %esi, %ecx 3141; X64-NOBMI-NEXT: movq %rdi, %rax 3142; X64-NOBMI-NEXT: negb %cl 3143; X64-NOBMI-NEXT: shll %cl, %eax 3144; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3145; X64-NOBMI-NEXT: shrl %cl, %eax 3146; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 3147; X64-NOBMI-NEXT: retq 3148; 3149; X64-BMI1-LABEL: bzhi64_32_c2: 3150; X64-BMI1: # %bb.0: 3151; X64-BMI1-NEXT: shll $8, %esi 3152; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 3153; X64-BMI1-NEXT: retq 3154; 3155; X64-BMI2-LABEL: bzhi64_32_c2: 3156; X64-BMI2: # %bb.0: 3157; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 3158; X64-BMI2-NEXT: retq 3159 %numhighbits = sub i32 32, %numlowbits 3160 %mask = lshr i32 -1, %numhighbits 3161 %zextmask = zext i32 %mask to i64 3162 %masked = and i64 %zextmask, %val 3163 %truncmasked = trunc i64 %masked to i32 3164 ret i32 %truncmasked 3165} 3166 3167; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit. 3168; Masking is 64-bit. Then truncation. 3169define i32 @bzhi64_32_c3(i64 %val, i64 %numlowbits) nounwind { 3170; X86-LABEL: bzhi64_32_c3: 3171; X86: # %bb.0: 3172; X86-NEXT: movb $64, %cl 3173; X86-NEXT: subb {{[0-9]+}}(%esp), %cl 3174; X86-NEXT: xorl %eax, %eax 3175; X86-NEXT: movl $-1, %edx 3176; X86-NEXT: shrdl %cl, %eax, %edx 3177; X86-NEXT: testb $32, %cl 3178; X86-NEXT: jne .LBB42_2 3179; X86-NEXT: # %bb.1: 3180; X86-NEXT: movl %edx, %eax 3181; X86-NEXT: .LBB42_2: 3182; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 3183; X86-NEXT: retl 3184; 3185; X64-NOBMI-LABEL: bzhi64_32_c3: 3186; X64-NOBMI: # %bb.0: 3187; X64-NOBMI-NEXT: movq %rsi, %rcx 3188; X64-NOBMI-NEXT: negb %cl 3189; X64-NOBMI-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 3190; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3191; X64-NOBMI-NEXT: shrq %cl, %rax 3192; X64-NOBMI-NEXT: andl %edi, %eax 3193; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 3194; X64-NOBMI-NEXT: retq 3195; 3196; X64-BMI1-LABEL: bzhi64_32_c3: 3197; X64-BMI1: # %bb.0: 3198; X64-BMI1-NEXT: movq %rsi, %rcx 3199; X64-BMI1-NEXT: negb %cl 3200; X64-BMI1-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 3201; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx 3202; X64-BMI1-NEXT: shrq %cl, %rax 3203; X64-BMI1-NEXT: andl %edi, %eax 3204; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax 3205; X64-BMI1-NEXT: retq 3206; 3207; X64-BMI2-LABEL: bzhi64_32_c3: 3208; X64-BMI2: # %bb.0: 3209; X64-BMI2-NEXT: negb %sil 3210; X64-BMI2-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 3211; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax 3212; X64-BMI2-NEXT: andl %edi, %eax 3213; X64-BMI2-NEXT: # kill: def $eax killed $eax killed $rax 3214; X64-BMI2-NEXT: retq 3215 %numhighbits = sub i64 64, %numlowbits 3216 %mask = lshr i64 4294967295, %numhighbits 3217 %masked = and i64 %mask, %val 3218 %truncmasked = trunc i64 %masked to i32 3219 ret i32 %truncmasked 3220} 3221 3222; ---------------------------------------------------------------------------- ; 3223; Pattern d. 32-bit. 3224; ---------------------------------------------------------------------------- ; 3225 3226define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { 3227; X86-NOBMI-LABEL: bzhi32_d0: 3228; X86-NOBMI: # %bb.0: 3229; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3230; X86-NOBMI-NEXT: xorl %ecx, %ecx 3231; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3232; X86-NOBMI-NEXT: shll %cl, %eax 3233; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3234; X86-NOBMI-NEXT: shrl %cl, %eax 3235; X86-NOBMI-NEXT: retl 3236; 3237; X86-BMI1-LABEL: bzhi32_d0: 3238; X86-BMI1: # %bb.0: 3239; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3240; X86-BMI1-NEXT: shll $8, %eax 3241; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 3242; X86-BMI1-NEXT: retl 3243; 3244; X86-BMI2-LABEL: bzhi32_d0: 3245; X86-BMI2: # %bb.0: 3246; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3247; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 3248; X86-BMI2-NEXT: retl 3249; 3250; X64-NOBMI-LABEL: bzhi32_d0: 3251; X64-NOBMI: # %bb.0: 3252; X64-NOBMI-NEXT: movl %esi, %ecx 3253; X64-NOBMI-NEXT: movl %edi, %eax 3254; X64-NOBMI-NEXT: negb %cl 3255; X64-NOBMI-NEXT: shll %cl, %eax 3256; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3257; X64-NOBMI-NEXT: shrl %cl, %eax 3258; X64-NOBMI-NEXT: retq 3259; 3260; X64-BMI1-LABEL: bzhi32_d0: 3261; X64-BMI1: # %bb.0: 3262; X64-BMI1-NEXT: shll $8, %esi 3263; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 3264; X64-BMI1-NEXT: retq 3265; 3266; X64-BMI2-LABEL: bzhi32_d0: 3267; X64-BMI2: # %bb.0: 3268; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 3269; X64-BMI2-NEXT: retq 3270 %numhighbits = sub i32 32, %numlowbits 3271 %highbitscleared = shl i32 %val, %numhighbits 3272 %masked = lshr i32 %highbitscleared, %numhighbits 3273 ret i32 %masked 3274} 3275 3276define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { 3277; X86-NOBMI-LABEL: bzhi32_d1_indexzext: 3278; X86-NOBMI: # %bb.0: 3279; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3280; X86-NOBMI-NEXT: xorl %ecx, %ecx 3281; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3282; X86-NOBMI-NEXT: shll %cl, %eax 3283; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3284; X86-NOBMI-NEXT: shrl %cl, %eax 3285; X86-NOBMI-NEXT: retl 3286; 3287; X86-BMI1-LABEL: bzhi32_d1_indexzext: 3288; X86-BMI1: # %bb.0: 3289; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3290; X86-BMI1-NEXT: shll $8, %eax 3291; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 3292; X86-BMI1-NEXT: retl 3293; 3294; X86-BMI2-LABEL: bzhi32_d1_indexzext: 3295; X86-BMI2: # %bb.0: 3296; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3297; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 3298; X86-BMI2-NEXT: retl 3299; 3300; X64-NOBMI-LABEL: bzhi32_d1_indexzext: 3301; X64-NOBMI: # %bb.0: 3302; X64-NOBMI-NEXT: movl %esi, %ecx 3303; X64-NOBMI-NEXT: movl %edi, %eax 3304; X64-NOBMI-NEXT: negb %cl 3305; X64-NOBMI-NEXT: shll %cl, %eax 3306; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3307; X64-NOBMI-NEXT: shrl %cl, %eax 3308; X64-NOBMI-NEXT: retq 3309; 3310; X64-BMI1-LABEL: bzhi32_d1_indexzext: 3311; X64-BMI1: # %bb.0: 3312; X64-BMI1-NEXT: shll $8, %esi 3313; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 3314; X64-BMI1-NEXT: retq 3315; 3316; X64-BMI2-LABEL: bzhi32_d1_indexzext: 3317; X64-BMI2: # %bb.0: 3318; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 3319; X64-BMI2-NEXT: retq 3320 %numhighbits = sub i8 32, %numlowbits 3321 %sh_prom = zext i8 %numhighbits to i32 3322 %highbitscleared = shl i32 %val, %sh_prom 3323 %masked = lshr i32 %highbitscleared, %sh_prom 3324 ret i32 %masked 3325} 3326 3327define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind { 3328; X86-NOBMI-LABEL: bzhi32_d2_load: 3329; X86-NOBMI: # %bb.0: 3330; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3331; X86-NOBMI-NEXT: movl (%eax), %eax 3332; X86-NOBMI-NEXT: xorl %ecx, %ecx 3333; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3334; X86-NOBMI-NEXT: shll %cl, %eax 3335; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3336; X86-NOBMI-NEXT: shrl %cl, %eax 3337; X86-NOBMI-NEXT: retl 3338; 3339; X86-BMI1-LABEL: bzhi32_d2_load: 3340; X86-BMI1: # %bb.0: 3341; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 3342; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 3343; X86-BMI1-NEXT: shll $8, %ecx 3344; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax 3345; X86-BMI1-NEXT: retl 3346; 3347; X86-BMI2-LABEL: bzhi32_d2_load: 3348; X86-BMI2: # %bb.0: 3349; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3350; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 3351; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax 3352; X86-BMI2-NEXT: retl 3353; 3354; X64-NOBMI-LABEL: bzhi32_d2_load: 3355; X64-NOBMI: # %bb.0: 3356; X64-NOBMI-NEXT: movl %esi, %ecx 3357; X64-NOBMI-NEXT: movl (%rdi), %eax 3358; X64-NOBMI-NEXT: negb %cl 3359; X64-NOBMI-NEXT: shll %cl, %eax 3360; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3361; X64-NOBMI-NEXT: shrl %cl, %eax 3362; X64-NOBMI-NEXT: retq 3363; 3364; X64-BMI1-LABEL: bzhi32_d2_load: 3365; X64-BMI1: # %bb.0: 3366; X64-BMI1-NEXT: shll $8, %esi 3367; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax 3368; X64-BMI1-NEXT: retq 3369; 3370; X64-BMI2-LABEL: bzhi32_d2_load: 3371; X64-BMI2: # %bb.0: 3372; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 3373; X64-BMI2-NEXT: retq 3374 %val = load i32, ptr %w 3375 %numhighbits = sub i32 32, %numlowbits 3376 %highbitscleared = shl i32 %val, %numhighbits 3377 %masked = lshr i32 %highbitscleared, %numhighbits 3378 ret i32 %masked 3379} 3380 3381define i32 @bzhi32_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { 3382; X86-NOBMI-LABEL: bzhi32_d3_load_indexzext: 3383; X86-NOBMI: # %bb.0: 3384; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3385; X86-NOBMI-NEXT: movl (%eax), %eax 3386; X86-NOBMI-NEXT: xorl %ecx, %ecx 3387; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3388; X86-NOBMI-NEXT: shll %cl, %eax 3389; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3390; X86-NOBMI-NEXT: shrl %cl, %eax 3391; X86-NOBMI-NEXT: retl 3392; 3393; X86-BMI1-LABEL: bzhi32_d3_load_indexzext: 3394; X86-BMI1: # %bb.0: 3395; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 3396; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 3397; X86-BMI1-NEXT: shll $8, %ecx 3398; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax 3399; X86-BMI1-NEXT: retl 3400; 3401; X86-BMI2-LABEL: bzhi32_d3_load_indexzext: 3402; X86-BMI2: # %bb.0: 3403; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3404; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 3405; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax 3406; X86-BMI2-NEXT: retl 3407; 3408; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext: 3409; X64-NOBMI: # %bb.0: 3410; X64-NOBMI-NEXT: movl %esi, %ecx 3411; X64-NOBMI-NEXT: movl (%rdi), %eax 3412; X64-NOBMI-NEXT: negb %cl 3413; X64-NOBMI-NEXT: shll %cl, %eax 3414; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3415; X64-NOBMI-NEXT: shrl %cl, %eax 3416; X64-NOBMI-NEXT: retq 3417; 3418; X64-BMI1-LABEL: bzhi32_d3_load_indexzext: 3419; X64-BMI1: # %bb.0: 3420; X64-BMI1-NEXT: shll $8, %esi 3421; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax 3422; X64-BMI1-NEXT: retq 3423; 3424; X64-BMI2-LABEL: bzhi32_d3_load_indexzext: 3425; X64-BMI2: # %bb.0: 3426; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 3427; X64-BMI2-NEXT: retq 3428 %val = load i32, ptr %w 3429 %numhighbits = sub i8 32, %numlowbits 3430 %sh_prom = zext i8 %numhighbits to i32 3431 %highbitscleared = shl i32 %val, %sh_prom 3432 %masked = lshr i32 %highbitscleared, %sh_prom 3433 ret i32 %masked 3434} 3435 3436; 64-bit. 3437 3438define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind { 3439; X86-NOBMI-LABEL: bzhi64_d0: 3440; X86-NOBMI: # %bb.0: 3441; X86-NOBMI-NEXT: pushl %ebx 3442; X86-NOBMI-NEXT: pushl %edi 3443; X86-NOBMI-NEXT: pushl %esi 3444; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 3445; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3446; X86-NOBMI-NEXT: movb $64, %cl 3447; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3448; X86-NOBMI-NEXT: movl %edx, %esi 3449; X86-NOBMI-NEXT: shll %cl, %esi 3450; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 3451; X86-NOBMI-NEXT: testb $32, %cl 3452; X86-NOBMI-NEXT: movl %esi, %edi 3453; X86-NOBMI-NEXT: jne .LBB47_2 3454; X86-NOBMI-NEXT: # %bb.1: 3455; X86-NOBMI-NEXT: movl %eax, %edi 3456; X86-NOBMI-NEXT: .LBB47_2: 3457; X86-NOBMI-NEXT: movl %edi, %eax 3458; X86-NOBMI-NEXT: shrl %cl, %eax 3459; X86-NOBMI-NEXT: xorl %ebx, %ebx 3460; X86-NOBMI-NEXT: testb $32, %cl 3461; X86-NOBMI-NEXT: movl $0, %edx 3462; X86-NOBMI-NEXT: jne .LBB47_4 3463; X86-NOBMI-NEXT: # %bb.3: 3464; X86-NOBMI-NEXT: movl %esi, %ebx 3465; X86-NOBMI-NEXT: movl %eax, %edx 3466; X86-NOBMI-NEXT: .LBB47_4: 3467; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 3468; X86-NOBMI-NEXT: testb $32, %cl 3469; X86-NOBMI-NEXT: jne .LBB47_6 3470; X86-NOBMI-NEXT: # %bb.5: 3471; X86-NOBMI-NEXT: movl %ebx, %eax 3472; X86-NOBMI-NEXT: .LBB47_6: 3473; X86-NOBMI-NEXT: popl %esi 3474; X86-NOBMI-NEXT: popl %edi 3475; X86-NOBMI-NEXT: popl %ebx 3476; X86-NOBMI-NEXT: retl 3477; 3478; X86-BMI1-LABEL: bzhi64_d0: 3479; X86-BMI1: # %bb.0: 3480; X86-BMI1-NEXT: pushl %ebx 3481; X86-BMI1-NEXT: pushl %edi 3482; X86-BMI1-NEXT: pushl %esi 3483; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 3484; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 3485; X86-BMI1-NEXT: movb $64, %cl 3486; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 3487; X86-BMI1-NEXT: movl %edx, %esi 3488; X86-BMI1-NEXT: shll %cl, %esi 3489; X86-BMI1-NEXT: shldl %cl, %edx, %eax 3490; X86-BMI1-NEXT: testb $32, %cl 3491; X86-BMI1-NEXT: movl %esi, %edi 3492; X86-BMI1-NEXT: jne .LBB47_2 3493; X86-BMI1-NEXT: # %bb.1: 3494; X86-BMI1-NEXT: movl %eax, %edi 3495; X86-BMI1-NEXT: .LBB47_2: 3496; X86-BMI1-NEXT: movl %edi, %eax 3497; X86-BMI1-NEXT: shrl %cl, %eax 3498; X86-BMI1-NEXT: xorl %ebx, %ebx 3499; X86-BMI1-NEXT: testb $32, %cl 3500; X86-BMI1-NEXT: movl $0, %edx 3501; X86-BMI1-NEXT: jne .LBB47_4 3502; X86-BMI1-NEXT: # %bb.3: 3503; X86-BMI1-NEXT: movl %esi, %ebx 3504; X86-BMI1-NEXT: movl %eax, %edx 3505; X86-BMI1-NEXT: .LBB47_4: 3506; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx 3507; X86-BMI1-NEXT: testb $32, %cl 3508; X86-BMI1-NEXT: jne .LBB47_6 3509; X86-BMI1-NEXT: # %bb.5: 3510; X86-BMI1-NEXT: movl %ebx, %eax 3511; X86-BMI1-NEXT: .LBB47_6: 3512; X86-BMI1-NEXT: popl %esi 3513; X86-BMI1-NEXT: popl %edi 3514; X86-BMI1-NEXT: popl %ebx 3515; X86-BMI1-NEXT: retl 3516; 3517; X86-BMI2-LABEL: bzhi64_d0: 3518; X86-BMI2: # %bb.0: 3519; X86-BMI2-NEXT: pushl %edi 3520; X86-BMI2-NEXT: pushl %esi 3521; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3522; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 3523; X86-BMI2-NEXT: movb $64, %cl 3524; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 3525; X86-BMI2-NEXT: shldl %cl, %eax, %esi 3526; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi 3527; X86-BMI2-NEXT: xorl %edx, %edx 3528; X86-BMI2-NEXT: testb $32, %cl 3529; X86-BMI2-NEXT: je .LBB47_2 3530; X86-BMI2-NEXT: # %bb.1: 3531; X86-BMI2-NEXT: movl %edi, %esi 3532; X86-BMI2-NEXT: movl $0, %edi 3533; X86-BMI2-NEXT: .LBB47_2: 3534; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax 3535; X86-BMI2-NEXT: jne .LBB47_4 3536; X86-BMI2-NEXT: # %bb.3: 3537; X86-BMI2-NEXT: movl %eax, %edx 3538; X86-BMI2-NEXT: .LBB47_4: 3539; X86-BMI2-NEXT: shrdl %cl, %esi, %edi 3540; X86-BMI2-NEXT: testb $32, %cl 3541; X86-BMI2-NEXT: jne .LBB47_6 3542; X86-BMI2-NEXT: # %bb.5: 3543; X86-BMI2-NEXT: movl %edi, %eax 3544; X86-BMI2-NEXT: .LBB47_6: 3545; X86-BMI2-NEXT: popl %esi 3546; X86-BMI2-NEXT: popl %edi 3547; X86-BMI2-NEXT: retl 3548; 3549; X64-NOBMI-LABEL: bzhi64_d0: 3550; X64-NOBMI: # %bb.0: 3551; X64-NOBMI-NEXT: movq %rsi, %rcx 3552; X64-NOBMI-NEXT: movq %rdi, %rax 3553; X64-NOBMI-NEXT: negb %cl 3554; X64-NOBMI-NEXT: shlq %cl, %rax 3555; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3556; X64-NOBMI-NEXT: shrq %cl, %rax 3557; X64-NOBMI-NEXT: retq 3558; 3559; X64-BMI1-LABEL: bzhi64_d0: 3560; X64-BMI1: # %bb.0: 3561; X64-BMI1-NEXT: shll $8, %esi 3562; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 3563; X64-BMI1-NEXT: retq 3564; 3565; X64-BMI2-LABEL: bzhi64_d0: 3566; X64-BMI2: # %bb.0: 3567; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 3568; X64-BMI2-NEXT: retq 3569 %numhighbits = sub i64 64, %numlowbits 3570 %highbitscleared = shl i64 %val, %numhighbits 3571 %masked = lshr i64 %highbitscleared, %numhighbits 3572 ret i64 %masked 3573} 3574 3575define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind { 3576; X86-NOBMI-LABEL: bzhi64_d1_indexzext: 3577; X86-NOBMI: # %bb.0: 3578; X86-NOBMI-NEXT: pushl %ebx 3579; X86-NOBMI-NEXT: pushl %edi 3580; X86-NOBMI-NEXT: pushl %esi 3581; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 3582; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3583; X86-NOBMI-NEXT: movb $64, %cl 3584; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3585; X86-NOBMI-NEXT: movl %edx, %esi 3586; X86-NOBMI-NEXT: shll %cl, %esi 3587; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 3588; X86-NOBMI-NEXT: testb $32, %cl 3589; X86-NOBMI-NEXT: movl %esi, %edi 3590; X86-NOBMI-NEXT: jne .LBB48_2 3591; X86-NOBMI-NEXT: # %bb.1: 3592; X86-NOBMI-NEXT: movl %eax, %edi 3593; X86-NOBMI-NEXT: .LBB48_2: 3594; X86-NOBMI-NEXT: movl %edi, %eax 3595; X86-NOBMI-NEXT: shrl %cl, %eax 3596; X86-NOBMI-NEXT: xorl %ebx, %ebx 3597; X86-NOBMI-NEXT: testb $32, %cl 3598; X86-NOBMI-NEXT: movl $0, %edx 3599; X86-NOBMI-NEXT: jne .LBB48_4 3600; X86-NOBMI-NEXT: # %bb.3: 3601; X86-NOBMI-NEXT: movl %esi, %ebx 3602; X86-NOBMI-NEXT: movl %eax, %edx 3603; X86-NOBMI-NEXT: .LBB48_4: 3604; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 3605; X86-NOBMI-NEXT: testb $32, %cl 3606; X86-NOBMI-NEXT: jne .LBB48_6 3607; X86-NOBMI-NEXT: # %bb.5: 3608; X86-NOBMI-NEXT: movl %ebx, %eax 3609; X86-NOBMI-NEXT: .LBB48_6: 3610; X86-NOBMI-NEXT: popl %esi 3611; X86-NOBMI-NEXT: popl %edi 3612; X86-NOBMI-NEXT: popl %ebx 3613; X86-NOBMI-NEXT: retl 3614; 3615; X86-BMI1-LABEL: bzhi64_d1_indexzext: 3616; X86-BMI1: # %bb.0: 3617; X86-BMI1-NEXT: pushl %ebx 3618; X86-BMI1-NEXT: pushl %edi 3619; X86-BMI1-NEXT: pushl %esi 3620; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 3621; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 3622; X86-BMI1-NEXT: movb $64, %cl 3623; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 3624; X86-BMI1-NEXT: movl %edx, %esi 3625; X86-BMI1-NEXT: shll %cl, %esi 3626; X86-BMI1-NEXT: shldl %cl, %edx, %eax 3627; X86-BMI1-NEXT: testb $32, %cl 3628; X86-BMI1-NEXT: movl %esi, %edi 3629; X86-BMI1-NEXT: jne .LBB48_2 3630; X86-BMI1-NEXT: # %bb.1: 3631; X86-BMI1-NEXT: movl %eax, %edi 3632; X86-BMI1-NEXT: .LBB48_2: 3633; X86-BMI1-NEXT: movl %edi, %eax 3634; X86-BMI1-NEXT: shrl %cl, %eax 3635; X86-BMI1-NEXT: xorl %ebx, %ebx 3636; X86-BMI1-NEXT: testb $32, %cl 3637; X86-BMI1-NEXT: movl $0, %edx 3638; X86-BMI1-NEXT: jne .LBB48_4 3639; X86-BMI1-NEXT: # %bb.3: 3640; X86-BMI1-NEXT: movl %esi, %ebx 3641; X86-BMI1-NEXT: movl %eax, %edx 3642; X86-BMI1-NEXT: .LBB48_4: 3643; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx 3644; X86-BMI1-NEXT: testb $32, %cl 3645; X86-BMI1-NEXT: jne .LBB48_6 3646; X86-BMI1-NEXT: # %bb.5: 3647; X86-BMI1-NEXT: movl %ebx, %eax 3648; X86-BMI1-NEXT: .LBB48_6: 3649; X86-BMI1-NEXT: popl %esi 3650; X86-BMI1-NEXT: popl %edi 3651; X86-BMI1-NEXT: popl %ebx 3652; X86-BMI1-NEXT: retl 3653; 3654; X86-BMI2-LABEL: bzhi64_d1_indexzext: 3655; X86-BMI2: # %bb.0: 3656; X86-BMI2-NEXT: pushl %edi 3657; X86-BMI2-NEXT: pushl %esi 3658; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3659; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 3660; X86-BMI2-NEXT: movb $64, %cl 3661; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 3662; X86-BMI2-NEXT: shldl %cl, %eax, %esi 3663; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi 3664; X86-BMI2-NEXT: xorl %edx, %edx 3665; X86-BMI2-NEXT: testb $32, %cl 3666; X86-BMI2-NEXT: je .LBB48_2 3667; X86-BMI2-NEXT: # %bb.1: 3668; X86-BMI2-NEXT: movl %edi, %esi 3669; X86-BMI2-NEXT: movl $0, %edi 3670; X86-BMI2-NEXT: .LBB48_2: 3671; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax 3672; X86-BMI2-NEXT: jne .LBB48_4 3673; X86-BMI2-NEXT: # %bb.3: 3674; X86-BMI2-NEXT: movl %eax, %edx 3675; X86-BMI2-NEXT: .LBB48_4: 3676; X86-BMI2-NEXT: shrdl %cl, %esi, %edi 3677; X86-BMI2-NEXT: testb $32, %cl 3678; X86-BMI2-NEXT: jne .LBB48_6 3679; X86-BMI2-NEXT: # %bb.5: 3680; X86-BMI2-NEXT: movl %edi, %eax 3681; X86-BMI2-NEXT: .LBB48_6: 3682; X86-BMI2-NEXT: popl %esi 3683; X86-BMI2-NEXT: popl %edi 3684; X86-BMI2-NEXT: retl 3685; 3686; X64-NOBMI-LABEL: bzhi64_d1_indexzext: 3687; X64-NOBMI: # %bb.0: 3688; X64-NOBMI-NEXT: movl %esi, %ecx 3689; X64-NOBMI-NEXT: movq %rdi, %rax 3690; X64-NOBMI-NEXT: negb %cl 3691; X64-NOBMI-NEXT: shlq %cl, %rax 3692; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3693; X64-NOBMI-NEXT: shrq %cl, %rax 3694; X64-NOBMI-NEXT: retq 3695; 3696; X64-BMI1-LABEL: bzhi64_d1_indexzext: 3697; X64-BMI1: # %bb.0: 3698; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi 3699; X64-BMI1-NEXT: shll $8, %esi 3700; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 3701; X64-BMI1-NEXT: retq 3702; 3703; X64-BMI2-LABEL: bzhi64_d1_indexzext: 3704; X64-BMI2: # %bb.0: 3705; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 3706; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 3707; X64-BMI2-NEXT: retq 3708 %numhighbits = sub i8 64, %numlowbits 3709 %sh_prom = zext i8 %numhighbits to i64 3710 %highbitscleared = shl i64 %val, %sh_prom 3711 %masked = lshr i64 %highbitscleared, %sh_prom 3712 ret i64 %masked 3713} 3714 3715define i64 @bzhi64_d2_load(ptr %w, i64 %numlowbits) nounwind { 3716; X86-NOBMI-LABEL: bzhi64_d2_load: 3717; X86-NOBMI: # %bb.0: 3718; X86-NOBMI-NEXT: pushl %ebx 3719; X86-NOBMI-NEXT: pushl %edi 3720; X86-NOBMI-NEXT: pushl %esi 3721; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3722; X86-NOBMI-NEXT: movl (%eax), %edx 3723; X86-NOBMI-NEXT: movl 4(%eax), %eax 3724; X86-NOBMI-NEXT: movb $64, %cl 3725; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3726; X86-NOBMI-NEXT: movl %edx, %esi 3727; X86-NOBMI-NEXT: shll %cl, %esi 3728; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 3729; X86-NOBMI-NEXT: testb $32, %cl 3730; X86-NOBMI-NEXT: movl %esi, %edi 3731; X86-NOBMI-NEXT: jne .LBB49_2 3732; X86-NOBMI-NEXT: # %bb.1: 3733; X86-NOBMI-NEXT: movl %eax, %edi 3734; X86-NOBMI-NEXT: .LBB49_2: 3735; X86-NOBMI-NEXT: movl %edi, %eax 3736; X86-NOBMI-NEXT: shrl %cl, %eax 3737; X86-NOBMI-NEXT: xorl %ebx, %ebx 3738; X86-NOBMI-NEXT: testb $32, %cl 3739; X86-NOBMI-NEXT: movl $0, %edx 3740; X86-NOBMI-NEXT: jne .LBB49_4 3741; X86-NOBMI-NEXT: # %bb.3: 3742; X86-NOBMI-NEXT: movl %esi, %ebx 3743; X86-NOBMI-NEXT: movl %eax, %edx 3744; X86-NOBMI-NEXT: .LBB49_4: 3745; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 3746; X86-NOBMI-NEXT: testb $32, %cl 3747; X86-NOBMI-NEXT: jne .LBB49_6 3748; X86-NOBMI-NEXT: # %bb.5: 3749; X86-NOBMI-NEXT: movl %ebx, %eax 3750; X86-NOBMI-NEXT: .LBB49_6: 3751; X86-NOBMI-NEXT: popl %esi 3752; X86-NOBMI-NEXT: popl %edi 3753; X86-NOBMI-NEXT: popl %ebx 3754; X86-NOBMI-NEXT: retl 3755; 3756; X86-BMI1-LABEL: bzhi64_d2_load: 3757; X86-BMI1: # %bb.0: 3758; X86-BMI1-NEXT: pushl %ebx 3759; X86-BMI1-NEXT: pushl %edi 3760; X86-BMI1-NEXT: pushl %esi 3761; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 3762; X86-BMI1-NEXT: movl (%eax), %edx 3763; X86-BMI1-NEXT: movl 4(%eax), %eax 3764; X86-BMI1-NEXT: movb $64, %cl 3765; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 3766; X86-BMI1-NEXT: movl %edx, %esi 3767; X86-BMI1-NEXT: shll %cl, %esi 3768; X86-BMI1-NEXT: shldl %cl, %edx, %eax 3769; X86-BMI1-NEXT: testb $32, %cl 3770; X86-BMI1-NEXT: movl %esi, %edi 3771; X86-BMI1-NEXT: jne .LBB49_2 3772; X86-BMI1-NEXT: # %bb.1: 3773; X86-BMI1-NEXT: movl %eax, %edi 3774; X86-BMI1-NEXT: .LBB49_2: 3775; X86-BMI1-NEXT: movl %edi, %eax 3776; X86-BMI1-NEXT: shrl %cl, %eax 3777; X86-BMI1-NEXT: xorl %ebx, %ebx 3778; X86-BMI1-NEXT: testb $32, %cl 3779; X86-BMI1-NEXT: movl $0, %edx 3780; X86-BMI1-NEXT: jne .LBB49_4 3781; X86-BMI1-NEXT: # %bb.3: 3782; X86-BMI1-NEXT: movl %esi, %ebx 3783; X86-BMI1-NEXT: movl %eax, %edx 3784; X86-BMI1-NEXT: .LBB49_4: 3785; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx 3786; X86-BMI1-NEXT: testb $32, %cl 3787; X86-BMI1-NEXT: jne .LBB49_6 3788; X86-BMI1-NEXT: # %bb.5: 3789; X86-BMI1-NEXT: movl %ebx, %eax 3790; X86-BMI1-NEXT: .LBB49_6: 3791; X86-BMI1-NEXT: popl %esi 3792; X86-BMI1-NEXT: popl %edi 3793; X86-BMI1-NEXT: popl %ebx 3794; X86-BMI1-NEXT: retl 3795; 3796; X86-BMI2-LABEL: bzhi64_d2_load: 3797; X86-BMI2: # %bb.0: 3798; X86-BMI2-NEXT: pushl %edi 3799; X86-BMI2-NEXT: pushl %esi 3800; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3801; X86-BMI2-NEXT: movl (%eax), %edx 3802; X86-BMI2-NEXT: movl 4(%eax), %esi 3803; X86-BMI2-NEXT: movb $64, %cl 3804; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 3805; X86-BMI2-NEXT: shldl %cl, %edx, %esi 3806; X86-BMI2-NEXT: shlxl %ecx, %edx, %edi 3807; X86-BMI2-NEXT: xorl %edx, %edx 3808; X86-BMI2-NEXT: testb $32, %cl 3809; X86-BMI2-NEXT: je .LBB49_2 3810; X86-BMI2-NEXT: # %bb.1: 3811; X86-BMI2-NEXT: movl %edi, %esi 3812; X86-BMI2-NEXT: movl $0, %edi 3813; X86-BMI2-NEXT: .LBB49_2: 3814; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax 3815; X86-BMI2-NEXT: jne .LBB49_4 3816; X86-BMI2-NEXT: # %bb.3: 3817; X86-BMI2-NEXT: movl %eax, %edx 3818; X86-BMI2-NEXT: .LBB49_4: 3819; X86-BMI2-NEXT: shrdl %cl, %esi, %edi 3820; X86-BMI2-NEXT: testb $32, %cl 3821; X86-BMI2-NEXT: jne .LBB49_6 3822; X86-BMI2-NEXT: # %bb.5: 3823; X86-BMI2-NEXT: movl %edi, %eax 3824; X86-BMI2-NEXT: .LBB49_6: 3825; X86-BMI2-NEXT: popl %esi 3826; X86-BMI2-NEXT: popl %edi 3827; X86-BMI2-NEXT: retl 3828; 3829; X64-NOBMI-LABEL: bzhi64_d2_load: 3830; X64-NOBMI: # %bb.0: 3831; X64-NOBMI-NEXT: movq %rsi, %rcx 3832; X64-NOBMI-NEXT: movq (%rdi), %rax 3833; X64-NOBMI-NEXT: negb %cl 3834; X64-NOBMI-NEXT: shlq %cl, %rax 3835; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3836; X64-NOBMI-NEXT: shrq %cl, %rax 3837; X64-NOBMI-NEXT: retq 3838; 3839; X64-BMI1-LABEL: bzhi64_d2_load: 3840; X64-BMI1: # %bb.0: 3841; X64-BMI1-NEXT: shll $8, %esi 3842; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax 3843; X64-BMI1-NEXT: retq 3844; 3845; X64-BMI2-LABEL: bzhi64_d2_load: 3846; X64-BMI2: # %bb.0: 3847; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 3848; X64-BMI2-NEXT: retq 3849 %val = load i64, ptr %w 3850 %numhighbits = sub i64 64, %numlowbits 3851 %highbitscleared = shl i64 %val, %numhighbits 3852 %masked = lshr i64 %highbitscleared, %numhighbits 3853 ret i64 %masked 3854} 3855 3856define i64 @bzhi64_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { 3857; X86-NOBMI-LABEL: bzhi64_d3_load_indexzext: 3858; X86-NOBMI: # %bb.0: 3859; X86-NOBMI-NEXT: pushl %ebx 3860; X86-NOBMI-NEXT: pushl %edi 3861; X86-NOBMI-NEXT: pushl %esi 3862; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3863; X86-NOBMI-NEXT: movl (%eax), %edx 3864; X86-NOBMI-NEXT: movl 4(%eax), %eax 3865; X86-NOBMI-NEXT: movb $64, %cl 3866; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3867; X86-NOBMI-NEXT: movl %edx, %esi 3868; X86-NOBMI-NEXT: shll %cl, %esi 3869; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 3870; X86-NOBMI-NEXT: testb $32, %cl 3871; X86-NOBMI-NEXT: movl %esi, %edi 3872; X86-NOBMI-NEXT: jne .LBB50_2 3873; X86-NOBMI-NEXT: # %bb.1: 3874; X86-NOBMI-NEXT: movl %eax, %edi 3875; X86-NOBMI-NEXT: .LBB50_2: 3876; X86-NOBMI-NEXT: movl %edi, %eax 3877; X86-NOBMI-NEXT: shrl %cl, %eax 3878; X86-NOBMI-NEXT: xorl %ebx, %ebx 3879; X86-NOBMI-NEXT: testb $32, %cl 3880; X86-NOBMI-NEXT: movl $0, %edx 3881; X86-NOBMI-NEXT: jne .LBB50_4 3882; X86-NOBMI-NEXT: # %bb.3: 3883; X86-NOBMI-NEXT: movl %esi, %ebx 3884; X86-NOBMI-NEXT: movl %eax, %edx 3885; X86-NOBMI-NEXT: .LBB50_4: 3886; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 3887; X86-NOBMI-NEXT: testb $32, %cl 3888; X86-NOBMI-NEXT: jne .LBB50_6 3889; X86-NOBMI-NEXT: # %bb.5: 3890; X86-NOBMI-NEXT: movl %ebx, %eax 3891; X86-NOBMI-NEXT: .LBB50_6: 3892; X86-NOBMI-NEXT: popl %esi 3893; X86-NOBMI-NEXT: popl %edi 3894; X86-NOBMI-NEXT: popl %ebx 3895; X86-NOBMI-NEXT: retl 3896; 3897; X86-BMI1-LABEL: bzhi64_d3_load_indexzext: 3898; X86-BMI1: # %bb.0: 3899; X86-BMI1-NEXT: pushl %ebx 3900; X86-BMI1-NEXT: pushl %edi 3901; X86-BMI1-NEXT: pushl %esi 3902; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 3903; X86-BMI1-NEXT: movl (%eax), %edx 3904; X86-BMI1-NEXT: movl 4(%eax), %eax 3905; X86-BMI1-NEXT: movb $64, %cl 3906; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 3907; X86-BMI1-NEXT: movl %edx, %esi 3908; X86-BMI1-NEXT: shll %cl, %esi 3909; X86-BMI1-NEXT: shldl %cl, %edx, %eax 3910; X86-BMI1-NEXT: testb $32, %cl 3911; X86-BMI1-NEXT: movl %esi, %edi 3912; X86-BMI1-NEXT: jne .LBB50_2 3913; X86-BMI1-NEXT: # %bb.1: 3914; X86-BMI1-NEXT: movl %eax, %edi 3915; X86-BMI1-NEXT: .LBB50_2: 3916; X86-BMI1-NEXT: movl %edi, %eax 3917; X86-BMI1-NEXT: shrl %cl, %eax 3918; X86-BMI1-NEXT: xorl %ebx, %ebx 3919; X86-BMI1-NEXT: testb $32, %cl 3920; X86-BMI1-NEXT: movl $0, %edx 3921; X86-BMI1-NEXT: jne .LBB50_4 3922; X86-BMI1-NEXT: # %bb.3: 3923; X86-BMI1-NEXT: movl %esi, %ebx 3924; X86-BMI1-NEXT: movl %eax, %edx 3925; X86-BMI1-NEXT: .LBB50_4: 3926; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx 3927; X86-BMI1-NEXT: testb $32, %cl 3928; X86-BMI1-NEXT: jne .LBB50_6 3929; X86-BMI1-NEXT: # %bb.5: 3930; X86-BMI1-NEXT: movl %ebx, %eax 3931; X86-BMI1-NEXT: .LBB50_6: 3932; X86-BMI1-NEXT: popl %esi 3933; X86-BMI1-NEXT: popl %edi 3934; X86-BMI1-NEXT: popl %ebx 3935; X86-BMI1-NEXT: retl 3936; 3937; X86-BMI2-LABEL: bzhi64_d3_load_indexzext: 3938; X86-BMI2: # %bb.0: 3939; X86-BMI2-NEXT: pushl %edi 3940; X86-BMI2-NEXT: pushl %esi 3941; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3942; X86-BMI2-NEXT: movl (%eax), %edx 3943; X86-BMI2-NEXT: movl 4(%eax), %esi 3944; X86-BMI2-NEXT: movb $64, %cl 3945; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 3946; X86-BMI2-NEXT: shldl %cl, %edx, %esi 3947; X86-BMI2-NEXT: shlxl %ecx, %edx, %edi 3948; X86-BMI2-NEXT: xorl %edx, %edx 3949; X86-BMI2-NEXT: testb $32, %cl 3950; X86-BMI2-NEXT: je .LBB50_2 3951; X86-BMI2-NEXT: # %bb.1: 3952; X86-BMI2-NEXT: movl %edi, %esi 3953; X86-BMI2-NEXT: movl $0, %edi 3954; X86-BMI2-NEXT: .LBB50_2: 3955; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax 3956; X86-BMI2-NEXT: jne .LBB50_4 3957; X86-BMI2-NEXT: # %bb.3: 3958; X86-BMI2-NEXT: movl %eax, %edx 3959; X86-BMI2-NEXT: .LBB50_4: 3960; X86-BMI2-NEXT: shrdl %cl, %esi, %edi 3961; X86-BMI2-NEXT: testb $32, %cl 3962; X86-BMI2-NEXT: jne .LBB50_6 3963; X86-BMI2-NEXT: # %bb.5: 3964; X86-BMI2-NEXT: movl %edi, %eax 3965; X86-BMI2-NEXT: .LBB50_6: 3966; X86-BMI2-NEXT: popl %esi 3967; X86-BMI2-NEXT: popl %edi 3968; X86-BMI2-NEXT: retl 3969; 3970; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext: 3971; X64-NOBMI: # %bb.0: 3972; X64-NOBMI-NEXT: movl %esi, %ecx 3973; X64-NOBMI-NEXT: movq (%rdi), %rax 3974; X64-NOBMI-NEXT: negb %cl 3975; X64-NOBMI-NEXT: shlq %cl, %rax 3976; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3977; X64-NOBMI-NEXT: shrq %cl, %rax 3978; X64-NOBMI-NEXT: retq 3979; 3980; X64-BMI1-LABEL: bzhi64_d3_load_indexzext: 3981; X64-BMI1: # %bb.0: 3982; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi 3983; X64-BMI1-NEXT: shll $8, %esi 3984; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax 3985; X64-BMI1-NEXT: retq 3986; 3987; X64-BMI2-LABEL: bzhi64_d3_load_indexzext: 3988; X64-BMI2: # %bb.0: 3989; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 3990; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 3991; X64-BMI2-NEXT: retq 3992 %val = load i64, ptr %w 3993 %numhighbits = sub i8 64, %numlowbits 3994 %sh_prom = zext i8 %numhighbits to i64 3995 %highbitscleared = shl i64 %val, %sh_prom 3996 %masked = lshr i64 %highbitscleared, %sh_prom 3997 ret i64 %masked 3998} 3999 4000; 64-bit, but with 32-bit output 4001 4002; Everything done in 64-bit, truncation happens last. 4003define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind { 4004; X86-NOBMI-LABEL: bzhi64_32_d0: 4005; X86-NOBMI: # %bb.0: 4006; X86-NOBMI-NEXT: pushl %esi 4007; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 4008; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 4009; X86-NOBMI-NEXT: movb $64, %cl 4010; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 4011; X86-NOBMI-NEXT: movl %esi, %edx 4012; X86-NOBMI-NEXT: shll %cl, %edx 4013; X86-NOBMI-NEXT: shldl %cl, %esi, %eax 4014; X86-NOBMI-NEXT: testb $32, %cl 4015; X86-NOBMI-NEXT: je .LBB51_2 4016; X86-NOBMI-NEXT: # %bb.1: 4017; X86-NOBMI-NEXT: movl %edx, %eax 4018; X86-NOBMI-NEXT: xorl %edx, %edx 4019; X86-NOBMI-NEXT: .LBB51_2: 4020; X86-NOBMI-NEXT: shrdl %cl, %eax, %edx 4021; X86-NOBMI-NEXT: shrl %cl, %eax 4022; X86-NOBMI-NEXT: testb $32, %cl 4023; X86-NOBMI-NEXT: jne .LBB51_4 4024; X86-NOBMI-NEXT: # %bb.3: 4025; X86-NOBMI-NEXT: movl %edx, %eax 4026; X86-NOBMI-NEXT: .LBB51_4: 4027; X86-NOBMI-NEXT: popl %esi 4028; X86-NOBMI-NEXT: retl 4029; 4030; X86-BMI1-LABEL: bzhi64_32_d0: 4031; X86-BMI1: # %bb.0: 4032; X86-BMI1-NEXT: pushl %esi 4033; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 4034; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 4035; X86-BMI1-NEXT: movb $64, %cl 4036; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 4037; X86-BMI1-NEXT: movl %esi, %edx 4038; X86-BMI1-NEXT: shll %cl, %edx 4039; X86-BMI1-NEXT: shldl %cl, %esi, %eax 4040; X86-BMI1-NEXT: testb $32, %cl 4041; X86-BMI1-NEXT: je .LBB51_2 4042; X86-BMI1-NEXT: # %bb.1: 4043; X86-BMI1-NEXT: movl %edx, %eax 4044; X86-BMI1-NEXT: xorl %edx, %edx 4045; X86-BMI1-NEXT: .LBB51_2: 4046; X86-BMI1-NEXT: shrdl %cl, %eax, %edx 4047; X86-BMI1-NEXT: shrl %cl, %eax 4048; X86-BMI1-NEXT: testb $32, %cl 4049; X86-BMI1-NEXT: jne .LBB51_4 4050; X86-BMI1-NEXT: # %bb.3: 4051; X86-BMI1-NEXT: movl %edx, %eax 4052; X86-BMI1-NEXT: .LBB51_4: 4053; X86-BMI1-NEXT: popl %esi 4054; X86-BMI1-NEXT: retl 4055; 4056; X86-BMI2-LABEL: bzhi64_32_d0: 4057; X86-BMI2: # %bb.0: 4058; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 4059; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 4060; X86-BMI2-NEXT: movb $64, %cl 4061; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 4062; X86-BMI2-NEXT: shldl %cl, %eax, %edx 4063; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 4064; X86-BMI2-NEXT: testb $32, %cl 4065; X86-BMI2-NEXT: je .LBB51_2 4066; X86-BMI2-NEXT: # %bb.1: 4067; X86-BMI2-NEXT: movl %eax, %edx 4068; X86-BMI2-NEXT: xorl %eax, %eax 4069; X86-BMI2-NEXT: .LBB51_2: 4070; X86-BMI2-NEXT: shrdl %cl, %edx, %eax 4071; X86-BMI2-NEXT: testb $32, %cl 4072; X86-BMI2-NEXT: je .LBB51_4 4073; X86-BMI2-NEXT: # %bb.3: 4074; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax 4075; X86-BMI2-NEXT: .LBB51_4: 4076; X86-BMI2-NEXT: retl 4077; 4078; X64-NOBMI-LABEL: bzhi64_32_d0: 4079; X64-NOBMI: # %bb.0: 4080; X64-NOBMI-NEXT: movq %rsi, %rcx 4081; X64-NOBMI-NEXT: movq %rdi, %rax 4082; X64-NOBMI-NEXT: negb %cl 4083; X64-NOBMI-NEXT: shlq %cl, %rax 4084; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 4085; X64-NOBMI-NEXT: shrq %cl, %rax 4086; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 4087; X64-NOBMI-NEXT: retq 4088; 4089; X64-BMI1-LABEL: bzhi64_32_d0: 4090; X64-BMI1: # %bb.0: 4091; X64-BMI1-NEXT: shll $8, %esi 4092; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 4093; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax 4094; X64-BMI1-NEXT: retq 4095; 4096; X64-BMI2-LABEL: bzhi64_32_d0: 4097; X64-BMI2: # %bb.0: 4098; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 4099; X64-BMI2-NEXT: # kill: def $eax killed $eax killed $rax 4100; X64-BMI2-NEXT: retq 4101 %numhighbits = sub i64 64, %numlowbits 4102 %highbitscleared = shl i64 %val, %numhighbits 4103 %masked = lshr i64 %highbitscleared, %numhighbits 4104 %res = trunc i64 %masked to i32 4105 ret i32 %res 4106} 4107 4108; Shifting happens in 64-bit, then truncation. Masking is 32-bit. 4109define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind { 4110; X86-NOBMI-LABEL: bzhi64_32_d1: 4111; X86-NOBMI: # %bb.0: 4112; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 4113; X86-NOBMI-NEXT: xorl %ecx, %ecx 4114; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 4115; X86-NOBMI-NEXT: shll %cl, %eax 4116; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 4117; X86-NOBMI-NEXT: shrl %cl, %eax 4118; X86-NOBMI-NEXT: retl 4119; 4120; X86-BMI1-LABEL: bzhi64_32_d1: 4121; X86-BMI1: # %bb.0: 4122; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 4123; X86-BMI1-NEXT: shll $8, %eax 4124; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 4125; X86-BMI1-NEXT: retl 4126; 4127; X86-BMI2-LABEL: bzhi64_32_d1: 4128; X86-BMI2: # %bb.0: 4129; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 4130; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 4131; X86-BMI2-NEXT: retl 4132; 4133; X64-NOBMI-LABEL: bzhi64_32_d1: 4134; X64-NOBMI: # %bb.0: 4135; X64-NOBMI-NEXT: movl %esi, %ecx 4136; X64-NOBMI-NEXT: movq %rdi, %rax 4137; X64-NOBMI-NEXT: negb %cl 4138; X64-NOBMI-NEXT: shll %cl, %eax 4139; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 4140; X64-NOBMI-NEXT: shrl %cl, %eax 4141; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 4142; X64-NOBMI-NEXT: retq 4143; 4144; X64-BMI1-LABEL: bzhi64_32_d1: 4145; X64-BMI1: # %bb.0: 4146; X64-BMI1-NEXT: shll $8, %esi 4147; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 4148; X64-BMI1-NEXT: retq 4149; 4150; X64-BMI2-LABEL: bzhi64_32_d1: 4151; X64-BMI2: # %bb.0: 4152; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 4153; X64-BMI2-NEXT: retq 4154 %truncval = trunc i64 %val to i32 4155 %numhighbits = sub i32 32, %numlowbits 4156 %highbitscleared = shl i32 %truncval, %numhighbits 4157 %masked = lshr i32 %highbitscleared, %numhighbits 4158 ret i32 %masked 4159} 4160 4161; ---------------------------------------------------------------------------- ; 4162; Constant mask 4163; ---------------------------------------------------------------------------- ; 4164 4165; 32-bit 4166 4167define i32 @bzhi32_constant_mask32(i32 %val) nounwind { 4168; X86-LABEL: bzhi32_constant_mask32: 4169; X86: # %bb.0: 4170; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4171; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 4172; X86-NEXT: retl 4173; 4174; X64-LABEL: bzhi32_constant_mask32: 4175; X64: # %bb.0: 4176; X64-NEXT: movl %edi, %eax 4177; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF 4178; X64-NEXT: retq 4179 %masked = and i32 %val, 2147483647 4180 ret i32 %masked 4181} 4182 4183define i32 @bzhi32_constant_mask32_load(ptr %val) nounwind { 4184; X86-LABEL: bzhi32_constant_mask32_load: 4185; X86: # %bb.0: 4186; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4187; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4188; X86-NEXT: andl (%ecx), %eax 4189; X86-NEXT: retl 4190; 4191; X64-LABEL: bzhi32_constant_mask32_load: 4192; X64: # %bb.0: 4193; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4194; X64-NEXT: andl (%rdi), %eax 4195; X64-NEXT: retq 4196 %val1 = load i32, ptr %val 4197 %masked = and i32 %val1, 2147483647 4198 ret i32 %masked 4199} 4200 4201define i32 @bzhi32_constant_mask16(i32 %val) nounwind { 4202; X86-LABEL: bzhi32_constant_mask16: 4203; X86: # %bb.0: 4204; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 4205; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 4206; X86-NEXT: retl 4207; 4208; X64-LABEL: bzhi32_constant_mask16: 4209; X64: # %bb.0: 4210; X64-NEXT: movl %edi, %eax 4211; X64-NEXT: andl $32767, %eax # imm = 0x7FFF 4212; X64-NEXT: retq 4213 %masked = and i32 %val, 32767 4214 ret i32 %masked 4215} 4216 4217define i32 @bzhi32_constant_mask16_load(ptr %val) nounwind { 4218; X86-LABEL: bzhi32_constant_mask16_load: 4219; X86: # %bb.0: 4220; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4221; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 4222; X86-NEXT: andl (%ecx), %eax 4223; X86-NEXT: retl 4224; 4225; X64-LABEL: bzhi32_constant_mask16_load: 4226; X64: # %bb.0: 4227; X64-NEXT: movl $32767, %eax # imm = 0x7FFF 4228; X64-NEXT: andl (%rdi), %eax 4229; X64-NEXT: retq 4230 %val1 = load i32, ptr %val 4231 %masked = and i32 %val1, 32767 4232 ret i32 %masked 4233} 4234 4235define i32 @bzhi32_constant_mask8(i32 %val) nounwind { 4236; X86-LABEL: bzhi32_constant_mask8: 4237; X86: # %bb.0: 4238; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4239; X86-NEXT: andl $127, %eax 4240; X86-NEXT: retl 4241; 4242; X64-LABEL: bzhi32_constant_mask8: 4243; X64: # %bb.0: 4244; X64-NEXT: movl %edi, %eax 4245; X64-NEXT: andl $127, %eax 4246; X64-NEXT: retq 4247 %masked = and i32 %val, 127 4248 ret i32 %masked 4249} 4250 4251define i32 @bzhi32_constant_mask8_load(ptr %val) nounwind { 4252; X86-LABEL: bzhi32_constant_mask8_load: 4253; X86: # %bb.0: 4254; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4255; X86-NEXT: movl (%eax), %eax 4256; X86-NEXT: andl $127, %eax 4257; X86-NEXT: retl 4258; 4259; X64-LABEL: bzhi32_constant_mask8_load: 4260; X64: # %bb.0: 4261; X64-NEXT: movl (%rdi), %eax 4262; X64-NEXT: andl $127, %eax 4263; X64-NEXT: retq 4264 %val1 = load i32, ptr %val 4265 %masked = and i32 %val1, 127 4266 ret i32 %masked 4267} 4268 4269; 64-bit 4270 4271define i64 @bzhi64_constant_mask64(i64 %val) nounwind { 4272; X86-LABEL: bzhi64_constant_mask64: 4273; X86: # %bb.0: 4274; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4275; X86-NEXT: movl $1073741823, %edx # imm = 0x3FFFFFFF 4276; X86-NEXT: andl {{[0-9]+}}(%esp), %edx 4277; X86-NEXT: retl 4278; 4279; X64-NOBMI-LABEL: bzhi64_constant_mask64: 4280; X64-NOBMI: # %bb.0: 4281; X64-NOBMI-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF 4282; X64-NOBMI-NEXT: andq %rdi, %rax 4283; X64-NOBMI-NEXT: retq 4284; 4285; X64-BMI1NOTBM-LABEL: bzhi64_constant_mask64: 4286; X64-BMI1NOTBM: # %bb.0: 4287; X64-BMI1NOTBM-NEXT: movl $15872, %eax # imm = 0x3E00 4288; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax 4289; X64-BMI1NOTBM-NEXT: retq 4290; 4291; X64-BMI1TBM-LABEL: bzhi64_constant_mask64: 4292; X64-BMI1TBM: # %bb.0: 4293; X64-BMI1TBM-NEXT: bextrq $15872, %rdi, %rax # imm = 0x3E00 4294; X64-BMI1TBM-NEXT: retq 4295; 4296; X64-BMI2TBM-LABEL: bzhi64_constant_mask64: 4297; X64-BMI2TBM: # %bb.0: 4298; X64-BMI2TBM-NEXT: bextrq $15872, %rdi, %rax # imm = 0x3E00 4299; X64-BMI2TBM-NEXT: retq 4300; 4301; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64: 4302; X64-BMI2NOTBM: # %bb.0: 4303; X64-BMI2NOTBM-NEXT: movb $62, %al 4304; X64-BMI2NOTBM-NEXT: bzhiq %rax, %rdi, %rax 4305; X64-BMI2NOTBM-NEXT: retq 4306 %masked = and i64 %val, 4611686018427387903 4307 ret i64 %masked 4308} 4309 4310define i64 @bzhi64_constant_mask64_load(ptr %val) nounwind { 4311; X86-LABEL: bzhi64_constant_mask64_load: 4312; X86: # %bb.0: 4313; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4314; X86-NEXT: movl (%ecx), %eax 4315; X86-NEXT: movl $1073741823, %edx # imm = 0x3FFFFFFF 4316; X86-NEXT: andl 4(%ecx), %edx 4317; X86-NEXT: retl 4318; 4319; X64-NOBMI-LABEL: bzhi64_constant_mask64_load: 4320; X64-NOBMI: # %bb.0: 4321; X64-NOBMI-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF 4322; X64-NOBMI-NEXT: andq (%rdi), %rax 4323; X64-NOBMI-NEXT: retq 4324; 4325; X64-BMI1NOTBM-LABEL: bzhi64_constant_mask64_load: 4326; X64-BMI1NOTBM: # %bb.0: 4327; X64-BMI1NOTBM-NEXT: movl $15872, %eax # imm = 0x3E00 4328; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax 4329; X64-BMI1NOTBM-NEXT: retq 4330; 4331; X64-BMI1TBM-LABEL: bzhi64_constant_mask64_load: 4332; X64-BMI1TBM: # %bb.0: 4333; X64-BMI1TBM-NEXT: bextrq $15872, (%rdi), %rax # imm = 0x3E00 4334; X64-BMI1TBM-NEXT: retq 4335; 4336; X64-BMI2TBM-LABEL: bzhi64_constant_mask64_load: 4337; X64-BMI2TBM: # %bb.0: 4338; X64-BMI2TBM-NEXT: bextrq $15872, (%rdi), %rax # imm = 0x3E00 4339; X64-BMI2TBM-NEXT: retq 4340; 4341; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64_load: 4342; X64-BMI2NOTBM: # %bb.0: 4343; X64-BMI2NOTBM-NEXT: movb $62, %al 4344; X64-BMI2NOTBM-NEXT: bzhiq %rax, (%rdi), %rax 4345; X64-BMI2NOTBM-NEXT: retq 4346 %val1 = load i64, ptr %val 4347 %masked = and i64 %val1, 4611686018427387903 4348 ret i64 %masked 4349} 4350 4351define i64 @bzhi64_constant_mask32(i64 %val) nounwind { 4352; X86-LABEL: bzhi64_constant_mask32: 4353; X86: # %bb.0: 4354; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4355; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 4356; X86-NEXT: xorl %edx, %edx 4357; X86-NEXT: retl 4358; 4359; X64-LABEL: bzhi64_constant_mask32: 4360; X64: # %bb.0: 4361; X64-NEXT: movq %rdi, %rax 4362; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF 4363; X64-NEXT: retq 4364 %masked = and i64 %val, 2147483647 4365 ret i64 %masked 4366} 4367 4368define i64 @bzhi64_constant_mask32_load(ptr %val) nounwind { 4369; X86-LABEL: bzhi64_constant_mask32_load: 4370; X86: # %bb.0: 4371; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4372; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4373; X86-NEXT: andl (%ecx), %eax 4374; X86-NEXT: xorl %edx, %edx 4375; X86-NEXT: retl 4376; 4377; X64-LABEL: bzhi64_constant_mask32_load: 4378; X64: # %bb.0: 4379; X64-NEXT: movq (%rdi), %rax 4380; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF 4381; X64-NEXT: retq 4382 %val1 = load i64, ptr %val 4383 %masked = and i64 %val1, 2147483647 4384 ret i64 %masked 4385} 4386 4387define i64 @bzhi64_constant_mask16(i64 %val) nounwind { 4388; X86-LABEL: bzhi64_constant_mask16: 4389; X86: # %bb.0: 4390; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 4391; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 4392; X86-NEXT: xorl %edx, %edx 4393; X86-NEXT: retl 4394; 4395; X64-LABEL: bzhi64_constant_mask16: 4396; X64: # %bb.0: 4397; X64-NEXT: movq %rdi, %rax 4398; X64-NEXT: andl $32767, %eax # imm = 0x7FFF 4399; X64-NEXT: retq 4400 %masked = and i64 %val, 32767 4401 ret i64 %masked 4402} 4403 4404define i64 @bzhi64_constant_mask16_load(ptr %val) nounwind { 4405; X86-LABEL: bzhi64_constant_mask16_load: 4406; X86: # %bb.0: 4407; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4408; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 4409; X86-NEXT: andl (%ecx), %eax 4410; X86-NEXT: xorl %edx, %edx 4411; X86-NEXT: retl 4412; 4413; X64-LABEL: bzhi64_constant_mask16_load: 4414; X64: # %bb.0: 4415; X64-NEXT: movq (%rdi), %rax 4416; X64-NEXT: andl $32767, %eax # imm = 0x7FFF 4417; X64-NEXT: retq 4418 %val1 = load i64, ptr %val 4419 %masked = and i64 %val1, 32767 4420 ret i64 %masked 4421} 4422 4423define i64 @bzhi64_constant_mask8(i64 %val) nounwind { 4424; X86-LABEL: bzhi64_constant_mask8: 4425; X86: # %bb.0: 4426; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4427; X86-NEXT: andl $127, %eax 4428; X86-NEXT: xorl %edx, %edx 4429; X86-NEXT: retl 4430; 4431; X64-LABEL: bzhi64_constant_mask8: 4432; X64: # %bb.0: 4433; X64-NEXT: movq %rdi, %rax 4434; X64-NEXT: andl $127, %eax 4435; X64-NEXT: retq 4436 %masked = and i64 %val, 127 4437 ret i64 %masked 4438} 4439 4440define i64 @bzhi64_constant_mask8_load(ptr %val) nounwind { 4441; X86-LABEL: bzhi64_constant_mask8_load: 4442; X86: # %bb.0: 4443; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4444; X86-NEXT: movl (%eax), %eax 4445; X86-NEXT: andl $127, %eax 4446; X86-NEXT: xorl %edx, %edx 4447; X86-NEXT: retl 4448; 4449; X64-LABEL: bzhi64_constant_mask8_load: 4450; X64: # %bb.0: 4451; X64-NEXT: movq (%rdi), %rax 4452; X64-NEXT: andl $127, %eax 4453; X64-NEXT: retq 4454 %val1 = load i64, ptr %val 4455 %masked = and i64 %val1, 127 4456 ret i64 %masked 4457} 4458