1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2,+cmov | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefix=X64 4 5define i32 @bzhi32(i32 %x, i32 %y) { 6; X86-LABEL: bzhi32: 7; X86: # %bb.0: 8; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 9; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 10; X86-NEXT: addl %ecx, %ecx 11; X86-NEXT: bzhil %eax, %ecx, %eax 12; X86-NEXT: retl 13; 14; X64-LABEL: bzhi32: 15; X64: # %bb.0: 16; X64-NEXT: addl %edi, %edi 17; X64-NEXT: bzhil %esi, %edi, %eax 18; X64-NEXT: retq 19 %x1 = add i32 %x, %x 20 %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y) 21 ret i32 %tmp 22} 23 24define i32 @bzhi32_load(ptr %x, i32 %y) { 25; X86-LABEL: bzhi32_load: 26; X86: # %bb.0: 27; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 28; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 29; X86-NEXT: bzhil %eax, (%ecx), %eax 30; X86-NEXT: retl 31; 32; X64-LABEL: bzhi32_load: 33; X64: # %bb.0: 34; X64-NEXT: bzhil %esi, (%rdi), %eax 35; X64-NEXT: retq 36 %x1 = load i32, ptr %x 37 %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y) 38 ret i32 %tmp 39} 40 41; PR48768 - 'bzhi' clears the overflow flag, so we don't need a separate 'test'. 42define i1 @bzhi32_overflow(i32 %x, i32 %y) { 43; X86-LABEL: bzhi32_overflow: 44; X86: # %bb.0: 45; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 46; X86-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 47; X86-NEXT: setle %al 48; X86-NEXT: retl 49; 50; X64-LABEL: bzhi32_overflow: 51; X64: # %bb.0: 52; X64-NEXT: bzhil %esi, %edi, %eax 53; X64-NEXT: setle %al 54; X64-NEXT: retq 55 %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y) 56 %cmp = icmp slt i32 %tmp, 1 57 ret i1 %cmp 58} 59 60declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) 61 62define i32 @pdep32(i32 %x, i32 %y) { 63; X86-LABEL: pdep32: 64; X86: # %bb.0: 65; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 66; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 67; X86-NEXT: addl %ecx, %ecx 68; X86-NEXT: pdepl %ecx, %eax, %eax 69; X86-NEXT: retl 70; 71; X64-LABEL: pdep32: 72; X64: # %bb.0: 73; X64-NEXT: addl %esi, %esi 74; X64-NEXT: pdepl %esi, %edi, %eax 75; X64-NEXT: retq 76 %y1 = add i32 %y, %y 77 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1) 78 ret i32 %tmp 79} 80 81define i32 @pdep32_load(i32 %x, ptr %y) { 82; X86-LABEL: pdep32_load: 83; X86: # %bb.0: 84; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 85; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 86; X86-NEXT: pdepl (%eax), %ecx, %eax 87; X86-NEXT: retl 88; 89; X64-LABEL: pdep32_load: 90; X64: # %bb.0: 91; X64-NEXT: pdepl (%rsi), %edi, %eax 92; X64-NEXT: retq 93 %y1 = load i32, ptr %y 94 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1) 95 ret i32 %tmp 96} 97 98define i32 @pdep32_anyext(i16 %x) { 99; X86-LABEL: pdep32_anyext: 100; X86: # %bb.0: 101; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax 102; X86-NEXT: movl $-1431655766, %ecx # imm = 0xAAAAAAAA 103; X86-NEXT: pdepl %ecx, %eax, %eax 104; X86-NEXT: retl 105; 106; X64-LABEL: pdep32_anyext: 107; X64: # %bb.0: 108; X64-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA 109; X64-NEXT: pdepl %eax, %edi, %eax 110; X64-NEXT: retq 111 %x1 = sext i16 %x to i32 112 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x1, i32 -1431655766) 113 ret i32 %tmp 114} 115 116define i32 @pdep32_demandedbits(i32 %x) { 117; X86-LABEL: pdep32_demandedbits: 118; X86: # %bb.0: 119; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 120; X86-NEXT: movl $1431655765, %ecx # imm = 0x55555555 121; X86-NEXT: pdepl %ecx, %eax, %eax 122; X86-NEXT: retl 123; 124; X64-LABEL: pdep32_demandedbits: 125; X64: # %bb.0: 126; X64-NEXT: movl $1431655765, %eax # imm = 0x55555555 127; X64-NEXT: pdepl %eax, %edi, %eax 128; X64-NEXT: retq 129 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765) 130 %tmp2 = and i32 %tmp, 1431655765 131 ret i32 %tmp2 132} 133 134define i32 @pdep32_demandedbits2(i32 %x, i32 %y) { 135; X86-LABEL: pdep32_demandedbits2: 136; X86: # %bb.0: 137; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 138; X86-NEXT: pdepl {{[0-9]+}}(%esp), %eax, %eax 139; X86-NEXT: andl $128, %eax 140; X86-NEXT: retl 141; 142; X64-LABEL: pdep32_demandedbits2: 143; X64: # %bb.0: 144; X64-NEXT: pdepl %esi, %edi, %eax 145; X64-NEXT: andl $128, %eax 146; X64-NEXT: retq 147 %tmp = and i32 %x, 255 148 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y) 149 %tmp3 = and i32 %tmp2, 128 150 ret i32 %tmp3 151} 152 153define i32 @pdep32_demandedbits_mask(i32 %x, i16 %y) { 154; X86-LABEL: pdep32_demandedbits_mask: 155; X86: # %bb.0: 156; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax 157; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 158; X86-NEXT: pdepl %eax, %ecx, %eax 159; X86-NEXT: andl $32768, %eax # imm = 0x8000 160; X86-NEXT: retl 161; 162; X64-LABEL: pdep32_demandedbits_mask: 163; X64: # %bb.0: 164; X64-NEXT: pdepl %esi, %edi, %eax 165; X64-NEXT: andl $32768, %eax # imm = 0x8000 166; X64-NEXT: retq 167 %tmp = sext i16 %y to i32 168 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp) 169 %tmp3 = and i32 %tmp2, 32768 170 ret i32 %tmp3 171} 172 173define i32 @pdep32_demandedbits_mask2(i32 %x, i16 %y) { 174; X86-LABEL: pdep32_demandedbits_mask2: 175; X86: # %bb.0: 176; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax 177; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 178; X86-NEXT: pdepl %eax, %ecx, %eax 179; X86-NEXT: movzwl %ax, %eax 180; X86-NEXT: retl 181; 182; X64-LABEL: pdep32_demandedbits_mask2: 183; X64: # %bb.0: 184; X64-NEXT: pdepl %esi, %edi, %eax 185; X64-NEXT: movzwl %ax, %eax 186; X64-NEXT: retq 187 %tmp = sext i16 %y to i32 188 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp) 189 %tmp3 = and i32 %tmp2, 65535 190 ret i32 %tmp3 191} 192 193define i32 @pdep32_knownbits(i32 %x) { 194; X86-LABEL: pdep32_knownbits: 195; X86: # %bb.0: 196; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 197; X86-NEXT: movl $1431655765, %ecx # imm = 0x55555555 198; X86-NEXT: pdepl %ecx, %eax, %eax 199; X86-NEXT: imull %eax, %eax 200; X86-NEXT: retl 201; 202; X64-LABEL: pdep32_knownbits: 203; X64: # %bb.0: 204; X64-NEXT: movl $1431655765, %eax # imm = 0x55555555 205; X64-NEXT: pdepl %eax, %edi, %eax 206; X64-NEXT: imull %eax, %eax 207; X64-NEXT: retq 208 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765) 209 %tmp2 = and i32 %tmp, 1431655765 210 %tmp3 = mul i32 %tmp, %tmp2 211 ret i32 %tmp3 212} 213 214define i32 @pdep32_knownbits2(i32 %x, i32 %y) { 215; X86-LABEL: pdep32_knownbits2: 216; X86: # %bb.0: 217; X86-NEXT: movl $-256, %eax 218; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 219; X86-NEXT: pdepl {{[0-9]+}}(%esp), %eax, %eax 220; X86-NEXT: imull %eax, %eax 221; X86-NEXT: retl 222; 223; X64-LABEL: pdep32_knownbits2: 224; X64: # %bb.0: 225; X64-NEXT: andl $-256, %edi 226; X64-NEXT: pdepl %esi, %edi, %eax 227; X64-NEXT: imull %eax, %eax 228; X64-NEXT: retq 229 %tmp = and i32 %x, -256 230 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y) 231 %tmp3 = and i32 %tmp2, -256 232 %tmp4 = mul i32 %tmp2, %tmp3 233 ret i32 %tmp4 234} 235 236declare i32 @llvm.x86.bmi.pdep.32(i32, i32) 237 238define i32 @pext32(i32 %x, i32 %y) { 239; X86-LABEL: pext32: 240; X86: # %bb.0: 241; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 242; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 243; X86-NEXT: addl %ecx, %ecx 244; X86-NEXT: pextl %ecx, %eax, %eax 245; X86-NEXT: retl 246; 247; X64-LABEL: pext32: 248; X64: # %bb.0: 249; X64-NEXT: addl %esi, %esi 250; X64-NEXT: pextl %esi, %edi, %eax 251; X64-NEXT: retq 252 %y1 = add i32 %y, %y 253 %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1) 254 ret i32 %tmp 255} 256 257define i32 @pext32_load(i32 %x, ptr %y) { 258; X86-LABEL: pext32_load: 259; X86: # %bb.0: 260; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 261; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 262; X86-NEXT: pextl (%eax), %ecx, %eax 263; X86-NEXT: retl 264; 265; X64-LABEL: pext32_load: 266; X64: # %bb.0: 267; X64-NEXT: pextl (%rsi), %edi, %eax 268; X64-NEXT: retq 269 %y1 = load i32, ptr %y 270 %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1) 271 ret i32 %tmp 272} 273 274define i32 @pext32_knownbits(i32 %x) { 275; X86-LABEL: pext32_knownbits: 276; X86: # %bb.0: 277; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 278; X86-NEXT: movl $1431655765, %ecx # imm = 0x55555555 279; X86-NEXT: pextl %ecx, %eax, %eax 280; X86-NEXT: retl 281; 282; X64-LABEL: pext32_knownbits: 283; X64: # %bb.0: 284; X64-NEXT: movl $1431655765, %eax # imm = 0x55555555 285; X64-NEXT: pextl %eax, %edi, %eax 286; X64-NEXT: retq 287 %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 1431655765) 288 %tmp2 = and i32 %tmp, 65535 289 ret i32 %tmp2 290} 291 292declare i32 @llvm.x86.bmi.pext.32(i32, i32) 293 294define i32 @mulx32(i32 %x, i32 %y, ptr %p) { 295; X86-LABEL: mulx32: 296; X86: # %bb.0: 297; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 298; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 299; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 300; X86-NEXT: addl %edx, %edx 301; X86-NEXT: addl %eax, %eax 302; X86-NEXT: mulxl %eax, %eax, %edx 303; X86-NEXT: movl %edx, (%ecx) 304; X86-NEXT: retl 305; 306; X64-LABEL: mulx32: 307; X64: # %bb.0: 308; X64-NEXT: # kill: def $esi killed $esi def $rsi 309; X64-NEXT: # kill: def $edi killed $edi def $rdi 310; X64-NEXT: addl %edi, %edi 311; X64-NEXT: leal (%rsi,%rsi), %eax 312; X64-NEXT: imulq %rdi, %rax 313; X64-NEXT: movq %rax, %rcx 314; X64-NEXT: shrq $32, %rcx 315; X64-NEXT: movl %ecx, (%rdx) 316; X64-NEXT: # kill: def $eax killed $eax killed $rax 317; X64-NEXT: retq 318 %x1 = add i32 %x, %x 319 %y1 = add i32 %y, %y 320 %x2 = zext i32 %x1 to i64 321 %y2 = zext i32 %y1 to i64 322 %r1 = mul i64 %x2, %y2 323 %h1 = lshr i64 %r1, 32 324 %h = trunc i64 %h1 to i32 325 %l = trunc i64 %r1 to i32 326 store i32 %h, ptr %p 327 ret i32 %l 328} 329 330define i32 @mulx32_load(i32 %x, ptr %y, ptr %p) { 331; X86-LABEL: mulx32_load: 332; X86: # %bb.0: 333; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 334; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 335; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 336; X86-NEXT: addl %edx, %edx 337; X86-NEXT: mulxl (%eax), %eax, %edx 338; X86-NEXT: movl %edx, (%ecx) 339; X86-NEXT: retl 340; 341; X64-LABEL: mulx32_load: 342; X64: # %bb.0: 343; X64-NEXT: # kill: def $edi killed $edi def $rdi 344; X64-NEXT: leal (%rdi,%rdi), %eax 345; X64-NEXT: movl (%rsi), %ecx 346; X64-NEXT: imulq %rcx, %rax 347; X64-NEXT: movq %rax, %rcx 348; X64-NEXT: shrq $32, %rcx 349; X64-NEXT: movl %ecx, (%rdx) 350; X64-NEXT: # kill: def $eax killed $eax killed $rax 351; X64-NEXT: retq 352 %x1 = add i32 %x, %x 353 %y1 = load i32, ptr %y 354 %x2 = zext i32 %x1 to i64 355 %y2 = zext i32 %y1 to i64 356 %r1 = mul i64 %x2, %y2 357 %h1 = lshr i64 %r1, 32 358 %h = trunc i64 %h1 to i32 359 %l = trunc i64 %r1 to i32 360 store i32 %h, ptr %p 361 ret i32 %l 362} 363