1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 4; RUN: llc < %s -mtriple=i686-unknown -mattr=+xop | FileCheck %s --check-prefixes=X86XOP 5 6; These tests just check that the plumbing is in place for @llvm.bitreverse. The 7; actual output is massive at the moment as llvm.bitreverse is not yet legal. 8 9declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone 10 11define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind { 12; X86-LABEL: test_bitreverse_v2i16: 13; X86: # %bb.0: 14; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 15; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 16; X86-NEXT: rolw $8, %ax 17; X86-NEXT: movl %eax, %edx 18; X86-NEXT: andl $3855, %edx # imm = 0xF0F 19; X86-NEXT: shll $4, %edx 20; X86-NEXT: shrl $4, %eax 21; X86-NEXT: andl $3855, %eax # imm = 0xF0F 22; X86-NEXT: orl %edx, %eax 23; X86-NEXT: movl %eax, %edx 24; X86-NEXT: andl $13107, %edx # imm = 0x3333 25; X86-NEXT: shrl $2, %eax 26; X86-NEXT: andl $13107, %eax # imm = 0x3333 27; X86-NEXT: leal (%eax,%edx,4), %eax 28; X86-NEXT: movl %eax, %edx 29; X86-NEXT: andl $21845, %edx # imm = 0x5555 30; X86-NEXT: shrl %eax 31; X86-NEXT: andl $21845, %eax # imm = 0x5555 32; X86-NEXT: leal (%eax,%edx,2), %eax 33; X86-NEXT: rolw $8, %cx 34; X86-NEXT: movl %ecx, %edx 35; X86-NEXT: andl $3855, %edx # imm = 0xF0F 36; X86-NEXT: shll $4, %edx 37; X86-NEXT: shrl $4, %ecx 38; X86-NEXT: andl $3855, %ecx # imm = 0xF0F 39; X86-NEXT: orl %edx, %ecx 40; X86-NEXT: movl %ecx, %edx 41; X86-NEXT: andl $13107, %edx # imm = 0x3333 42; X86-NEXT: shrl $2, %ecx 43; X86-NEXT: andl $13107, %ecx # imm = 0x3333 44; X86-NEXT: leal (%ecx,%edx,4), %ecx 45; X86-NEXT: movl %ecx, %edx 46; X86-NEXT: andl $21845, %edx # imm = 0x5555 47; X86-NEXT: shrl %ecx 48; X86-NEXT: andl $21845, %ecx # imm = 0x5555 49; X86-NEXT: leal (%ecx,%edx,2), %edx 50; X86-NEXT: # kill: def $ax killed $ax killed $eax 51; X86-NEXT: # kill: def $dx killed $dx killed $edx 52; X86-NEXT: retl 53; 54; X64-LABEL: test_bitreverse_v2i16: 55; X64: # %bb.0: 56; X64-NEXT: movdqa %xmm0, %xmm1 57; X64-NEXT: psrlw $8, %xmm1 58; X64-NEXT: psllw $8, %xmm0 59; X64-NEXT: por %xmm1, %xmm0 60; X64-NEXT: movdqa %xmm0, %xmm1 61; X64-NEXT: psrlw $4, %xmm1 62; X64-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 63; X64-NEXT: pand %xmm2, %xmm1 64; X64-NEXT: pand %xmm2, %xmm0 65; X64-NEXT: psllw $4, %xmm0 66; X64-NEXT: por %xmm1, %xmm0 67; X64-NEXT: movdqa %xmm0, %xmm1 68; X64-NEXT: psrlw $2, %xmm1 69; X64-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 70; X64-NEXT: pand %xmm2, %xmm1 71; X64-NEXT: pand %xmm2, %xmm0 72; X64-NEXT: psllw $2, %xmm0 73; X64-NEXT: por %xmm1, %xmm0 74; X64-NEXT: movdqa %xmm0, %xmm1 75; X64-NEXT: psrlw $1, %xmm1 76; X64-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85] 77; X64-NEXT: pand %xmm2, %xmm1 78; X64-NEXT: pand %xmm2, %xmm0 79; X64-NEXT: paddb %xmm0, %xmm0 80; X64-NEXT: por %xmm1, %xmm0 81; X64-NEXT: retq 82; 83; X86XOP-LABEL: test_bitreverse_v2i16: 84; X86XOP: # %bb.0: 85; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 86; X86XOP-NEXT: retl 87 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) 88 ret <2 x i16> %b 89} 90 91declare i64 @llvm.bitreverse.i64(i64) readnone 92 93define i64 @test_bitreverse_i64(i64 %a) nounwind { 94; X86-LABEL: test_bitreverse_i64: 95; X86: # %bb.0: 96; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 97; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 98; X86-NEXT: bswapl %eax 99; X86-NEXT: movl %eax, %edx 100; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 101; X86-NEXT: shll $4, %edx 102; X86-NEXT: shrl $4, %eax 103; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 104; X86-NEXT: orl %edx, %eax 105; X86-NEXT: movl %eax, %edx 106; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 107; X86-NEXT: shrl $2, %eax 108; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 109; X86-NEXT: leal (%eax,%edx,4), %eax 110; X86-NEXT: movl %eax, %edx 111; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 112; X86-NEXT: shrl %eax 113; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 114; X86-NEXT: leal (%eax,%edx,2), %eax 115; X86-NEXT: bswapl %ecx 116; X86-NEXT: movl %ecx, %edx 117; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 118; X86-NEXT: shll $4, %edx 119; X86-NEXT: shrl $4, %ecx 120; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 121; X86-NEXT: orl %edx, %ecx 122; X86-NEXT: movl %ecx, %edx 123; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 124; X86-NEXT: shrl $2, %ecx 125; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 126; X86-NEXT: leal (%ecx,%edx,4), %ecx 127; X86-NEXT: movl %ecx, %edx 128; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 129; X86-NEXT: shrl %ecx 130; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 131; X86-NEXT: leal (%ecx,%edx,2), %edx 132; X86-NEXT: retl 133; 134; X64-LABEL: test_bitreverse_i64: 135; X64: # %bb.0: 136; X64-NEXT: bswapq %rdi 137; X64-NEXT: movq %rdi, %rax 138; X64-NEXT: shrq $4, %rax 139; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F 140; X64-NEXT: andq %rcx, %rax 141; X64-NEXT: andq %rcx, %rdi 142; X64-NEXT: shlq $4, %rdi 143; X64-NEXT: orq %rax, %rdi 144; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 145; X64-NEXT: movq %rdi, %rcx 146; X64-NEXT: andq %rax, %rcx 147; X64-NEXT: shrq $2, %rdi 148; X64-NEXT: andq %rax, %rdi 149; X64-NEXT: leaq (%rdi,%rcx,4), %rax 150; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 151; X64-NEXT: movq %rax, %rdx 152; X64-NEXT: andq %rcx, %rdx 153; X64-NEXT: shrq %rax 154; X64-NEXT: andq %rcx, %rax 155; X64-NEXT: leaq (%rax,%rdx,2), %rax 156; X64-NEXT: retq 157; 158; X86XOP-LABEL: test_bitreverse_i64: 159; X86XOP: # %bb.0: 160; X86XOP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 161; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 162; X86XOP-NEXT: vmovd %xmm0, %eax 163; X86XOP-NEXT: vpextrd $1, %xmm0, %edx 164; X86XOP-NEXT: retl 165 %b = call i64 @llvm.bitreverse.i64(i64 %a) 166 ret i64 %b 167} 168 169declare i32 @llvm.bitreverse.i32(i32) readnone 170 171define i32 @test_bitreverse_i32(i32 %a) nounwind { 172; X86-LABEL: test_bitreverse_i32: 173; X86: # %bb.0: 174; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 175; X86-NEXT: bswapl %eax 176; X86-NEXT: movl %eax, %ecx 177; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 178; X86-NEXT: shll $4, %ecx 179; X86-NEXT: shrl $4, %eax 180; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 181; X86-NEXT: orl %ecx, %eax 182; X86-NEXT: movl %eax, %ecx 183; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 184; X86-NEXT: shrl $2, %eax 185; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 186; X86-NEXT: leal (%eax,%ecx,4), %eax 187; X86-NEXT: movl %eax, %ecx 188; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 189; X86-NEXT: shrl %eax 190; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 191; X86-NEXT: leal (%eax,%ecx,2), %eax 192; X86-NEXT: retl 193; 194; X64-LABEL: test_bitreverse_i32: 195; X64: # %bb.0: 196; X64-NEXT: # kill: def $edi killed $edi def $rdi 197; X64-NEXT: bswapl %edi 198; X64-NEXT: movl %edi, %eax 199; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 200; X64-NEXT: shll $4, %eax 201; X64-NEXT: shrl $4, %edi 202; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F 203; X64-NEXT: orl %eax, %edi 204; X64-NEXT: movl %edi, %eax 205; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 206; X64-NEXT: shrl $2, %edi 207; X64-NEXT: andl $858993459, %edi # imm = 0x33333333 208; X64-NEXT: leal (%rdi,%rax,4), %eax 209; X64-NEXT: movl %eax, %ecx 210; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 211; X64-NEXT: shrl %eax 212; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 213; X64-NEXT: leal (%rax,%rcx,2), %eax 214; X64-NEXT: retq 215; 216; X86XOP-LABEL: test_bitreverse_i32: 217; X86XOP: # %bb.0: 218; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 219; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 220; X86XOP-NEXT: vmovd %xmm0, %eax 221; X86XOP-NEXT: retl 222 %b = call i32 @llvm.bitreverse.i32(i32 %a) 223 ret i32 %b 224} 225 226declare i24 @llvm.bitreverse.i24(i24) readnone 227 228define i24 @test_bitreverse_i24(i24 %a) nounwind { 229; X86-LABEL: test_bitreverse_i24: 230; X86: # %bb.0: 231; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 232; X86-NEXT: bswapl %eax 233; X86-NEXT: movl %eax, %ecx 234; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 235; X86-NEXT: shll $4, %ecx 236; X86-NEXT: shrl $4, %eax 237; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 238; X86-NEXT: orl %ecx, %eax 239; X86-NEXT: movl %eax, %ecx 240; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 241; X86-NEXT: shrl $2, %eax 242; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 243; X86-NEXT: leal (%eax,%ecx,4), %eax 244; X86-NEXT: movl %eax, %ecx 245; X86-NEXT: andl $1431655680, %ecx # imm = 0x55555500 246; X86-NEXT: shrl %eax 247; X86-NEXT: andl $1431655680, %eax # imm = 0x55555500 248; X86-NEXT: leal (%eax,%ecx,2), %eax 249; X86-NEXT: shrl $8, %eax 250; X86-NEXT: retl 251; 252; X64-LABEL: test_bitreverse_i24: 253; X64: # %bb.0: 254; X64-NEXT: # kill: def $edi killed $edi def $rdi 255; X64-NEXT: bswapl %edi 256; X64-NEXT: movl %edi, %eax 257; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 258; X64-NEXT: shll $4, %eax 259; X64-NEXT: shrl $4, %edi 260; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F 261; X64-NEXT: orl %eax, %edi 262; X64-NEXT: movl %edi, %eax 263; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 264; X64-NEXT: shrl $2, %edi 265; X64-NEXT: andl $858993459, %edi # imm = 0x33333333 266; X64-NEXT: leal (%rdi,%rax,4), %eax 267; X64-NEXT: movl %eax, %ecx 268; X64-NEXT: andl $1431655680, %ecx # imm = 0x55555500 269; X64-NEXT: shrl %eax 270; X64-NEXT: andl $1431655680, %eax # imm = 0x55555500 271; X64-NEXT: leal (%rax,%rcx,2), %eax 272; X64-NEXT: shrl $8, %eax 273; X64-NEXT: retq 274; 275; X86XOP-LABEL: test_bitreverse_i24: 276; X86XOP: # %bb.0: 277; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 278; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 279; X86XOP-NEXT: vmovd %xmm0, %eax 280; X86XOP-NEXT: shrl $8, %eax 281; X86XOP-NEXT: retl 282 %b = call i24 @llvm.bitreverse.i24(i24 %a) 283 ret i24 %b 284} 285 286declare i16 @llvm.bitreverse.i16(i16) readnone 287 288define i16 @test_bitreverse_i16(i16 %a) nounwind { 289; X86-LABEL: test_bitreverse_i16: 290; X86: # %bb.0: 291; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 292; X86-NEXT: rolw $8, %ax 293; X86-NEXT: movl %eax, %ecx 294; X86-NEXT: andl $3855, %ecx # imm = 0xF0F 295; X86-NEXT: shll $4, %ecx 296; X86-NEXT: shrl $4, %eax 297; X86-NEXT: andl $3855, %eax # imm = 0xF0F 298; X86-NEXT: orl %ecx, %eax 299; X86-NEXT: movl %eax, %ecx 300; X86-NEXT: andl $13107, %ecx # imm = 0x3333 301; X86-NEXT: shrl $2, %eax 302; X86-NEXT: andl $13107, %eax # imm = 0x3333 303; X86-NEXT: leal (%eax,%ecx,4), %eax 304; X86-NEXT: movl %eax, %ecx 305; X86-NEXT: andl $21845, %ecx # imm = 0x5555 306; X86-NEXT: shrl %eax 307; X86-NEXT: andl $21845, %eax # imm = 0x5555 308; X86-NEXT: leal (%eax,%ecx,2), %eax 309; X86-NEXT: # kill: def $ax killed $ax killed $eax 310; X86-NEXT: retl 311; 312; X64-LABEL: test_bitreverse_i16: 313; X64: # %bb.0: 314; X64-NEXT: # kill: def $edi killed $edi def $rdi 315; X64-NEXT: rolw $8, %di 316; X64-NEXT: movl %edi, %eax 317; X64-NEXT: andl $3855, %eax # imm = 0xF0F 318; X64-NEXT: shll $4, %eax 319; X64-NEXT: shrl $4, %edi 320; X64-NEXT: andl $3855, %edi # imm = 0xF0F 321; X64-NEXT: orl %eax, %edi 322; X64-NEXT: movl %edi, %eax 323; X64-NEXT: andl $13107, %eax # imm = 0x3333 324; X64-NEXT: shrl $2, %edi 325; X64-NEXT: andl $13107, %edi # imm = 0x3333 326; X64-NEXT: leal (%rdi,%rax,4), %eax 327; X64-NEXT: movl %eax, %ecx 328; X64-NEXT: andl $21845, %ecx # imm = 0x5555 329; X64-NEXT: shrl %eax 330; X64-NEXT: andl $21845, %eax # imm = 0x5555 331; X64-NEXT: leal (%rax,%rcx,2), %eax 332; X64-NEXT: # kill: def $ax killed $ax killed $eax 333; X64-NEXT: retq 334; 335; X86XOP-LABEL: test_bitreverse_i16: 336; X86XOP: # %bb.0: 337; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 338; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 339; X86XOP-NEXT: vmovd %xmm0, %eax 340; X86XOP-NEXT: # kill: def $ax killed $ax killed $eax 341; X86XOP-NEXT: retl 342 %b = call i16 @llvm.bitreverse.i16(i16 %a) 343 ret i16 %b 344} 345 346declare i8 @llvm.bitreverse.i8(i8) readnone 347 348define i8 @test_bitreverse_i8(i8 %a) { 349; X86-LABEL: test_bitreverse_i8: 350; X86: # %bb.0: 351; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 352; X86-NEXT: rolb $4, %al 353; X86-NEXT: movl %eax, %ecx 354; X86-NEXT: andb $51, %cl 355; X86-NEXT: shlb $2, %cl 356; X86-NEXT: shrb $2, %al 357; X86-NEXT: andb $51, %al 358; X86-NEXT: orb %cl, %al 359; X86-NEXT: movl %eax, %ecx 360; X86-NEXT: andb $85, %cl 361; X86-NEXT: addb %cl, %cl 362; X86-NEXT: shrb %al 363; X86-NEXT: andb $85, %al 364; X86-NEXT: orb %cl, %al 365; X86-NEXT: retl 366; 367; X64-LABEL: test_bitreverse_i8: 368; X64: # %bb.0: 369; X64-NEXT: rolb $4, %dil 370; X64-NEXT: movl %edi, %eax 371; X64-NEXT: andb $51, %al 372; X64-NEXT: shlb $2, %al 373; X64-NEXT: shrb $2, %dil 374; X64-NEXT: andb $51, %dil 375; X64-NEXT: orb %dil, %al 376; X64-NEXT: movl %eax, %ecx 377; X64-NEXT: andb $85, %cl 378; X64-NEXT: addb %cl, %cl 379; X64-NEXT: shrb %al 380; X64-NEXT: andb $85, %al 381; X64-NEXT: orb %cl, %al 382; X64-NEXT: retq 383; 384; X86XOP-LABEL: test_bitreverse_i8: 385; X86XOP: # %bb.0: 386; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 387; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 388; X86XOP-NEXT: vmovd %xmm0, %eax 389; X86XOP-NEXT: # kill: def $al killed $al killed $eax 390; X86XOP-NEXT: retl 391 %b = call i8 @llvm.bitreverse.i8(i8 %a) 392 ret i8 %b 393} 394 395declare i4 @llvm.bitreverse.i4(i4) readnone 396 397define i4 @test_bitreverse_i4(i4 %a) { 398; X86-LABEL: test_bitreverse_i4: 399; X86: # %bb.0: 400; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 401; X86-NEXT: movl %ecx, %eax 402; X86-NEXT: andb $15, %al 403; X86-NEXT: movl %ecx, %edx 404; X86-NEXT: addb %cl, %dl 405; X86-NEXT: andb $4, %dl 406; X86-NEXT: shlb $3, %cl 407; X86-NEXT: andb $8, %cl 408; X86-NEXT: orb %dl, %cl 409; X86-NEXT: movl %eax, %edx 410; X86-NEXT: shrb %dl 411; X86-NEXT: andb $2, %dl 412; X86-NEXT: orb %cl, %dl 413; X86-NEXT: shrb $3, %al 414; X86-NEXT: orb %dl, %al 415; X86-NEXT: retl 416; 417; X64-LABEL: test_bitreverse_i4: 418; X64: # %bb.0: 419; X64-NEXT: # kill: def $edi killed $edi def $rdi 420; X64-NEXT: leal (%rdi,%rdi), %ecx 421; X64-NEXT: leal (,%rdi,8), %edx 422; X64-NEXT: movl %edi, %eax 423; X64-NEXT: andb $15, %al 424; X64-NEXT: andb $4, %cl 425; X64-NEXT: andb $8, %dl 426; X64-NEXT: orb %cl, %dl 427; X64-NEXT: movl %eax, %ecx 428; X64-NEXT: shrb %cl 429; X64-NEXT: andb $2, %cl 430; X64-NEXT: orb %dl, %cl 431; X64-NEXT: shrb $3, %al 432; X64-NEXT: orb %cl, %al 433; X64-NEXT: retq 434; 435; X86XOP-LABEL: test_bitreverse_i4: 436; X86XOP: # %bb.0: 437; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 438; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 439; X86XOP-NEXT: vmovd %xmm0, %eax 440; X86XOP-NEXT: shrb $4, %al 441; X86XOP-NEXT: # kill: def $al killed $al killed $eax 442; X86XOP-NEXT: retl 443 %b = call i4 @llvm.bitreverse.i4(i4 %a) 444 ret i4 %b 445} 446 447; These tests check that bitreverse(constant) calls are folded 448 449define <2 x i16> @fold_v2i16() { 450; X86-LABEL: fold_v2i16: 451; X86: # %bb.0: 452; X86-NEXT: movw $-4096, %ax # imm = 0xF000 453; X86-NEXT: movw $240, %dx 454; X86-NEXT: retl 455; 456; X64-LABEL: fold_v2i16: 457; X64: # %bb.0: 458; X64-NEXT: movaps {{.*#+}} xmm0 = <61440,240,u,u,u,u,u,u> 459; X64-NEXT: retq 460; 461; X86XOP-LABEL: fold_v2i16: 462; X86XOP: # %bb.0: 463; X86XOP-NEXT: vmovaps {{.*#+}} xmm0 = <61440,240,u,u,u,u,u,u> 464; X86XOP-NEXT: retl 465 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> <i16 15, i16 3840>) 466 ret <2 x i16> %b 467} 468 469define i24 @fold_i24() { 470; X86-LABEL: fold_i24: 471; X86: # %bb.0: 472; X86-NEXT: movl $2048, %eax # imm = 0x800 473; X86-NEXT: retl 474; 475; X64-LABEL: fold_i24: 476; X64: # %bb.0: 477; X64-NEXT: movl $2048, %eax # imm = 0x800 478; X64-NEXT: retq 479; 480; X86XOP-LABEL: fold_i24: 481; X86XOP: # %bb.0: 482; X86XOP-NEXT: movl $2048, %eax # imm = 0x800 483; X86XOP-NEXT: retl 484 %b = call i24 @llvm.bitreverse.i24(i24 4096) 485 ret i24 %b 486} 487 488define i8 @fold_i8() { 489; X86-LABEL: fold_i8: 490; X86: # %bb.0: 491; X86-NEXT: movb $-16, %al 492; X86-NEXT: retl 493; 494; X64-LABEL: fold_i8: 495; X64: # %bb.0: 496; X64-NEXT: movb $-16, %al 497; X64-NEXT: retq 498; 499; X86XOP-LABEL: fold_i8: 500; X86XOP: # %bb.0: 501; X86XOP-NEXT: movb $-16, %al 502; X86XOP-NEXT: retl 503 %b = call i8 @llvm.bitreverse.i8(i8 15) 504 ret i8 %b 505} 506 507define i4 @fold_i4() { 508; X86-LABEL: fold_i4: 509; X86: # %bb.0: 510; X86-NEXT: movb $1, %al 511; X86-NEXT: retl 512; 513; X64-LABEL: fold_i4: 514; X64: # %bb.0: 515; X64-NEXT: movb $1, %al 516; X64-NEXT: retq 517; 518; X86XOP-LABEL: fold_i4: 519; X86XOP: # %bb.0: 520; X86XOP-NEXT: movb $1, %al 521; X86XOP-NEXT: retl 522 %b = call i4 @llvm.bitreverse.i4(i4 8) 523 ret i4 %b 524} 525 526; These tests check that bitreverse(bitreverse()) calls are removed 527 528define i8 @identity_i8(i8 %a) { 529; X86-LABEL: identity_i8: 530; X86: # %bb.0: 531; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 532; X86-NEXT: retl 533; 534; X64-LABEL: identity_i8: 535; X64: # %bb.0: 536; X64-NEXT: movl %edi, %eax 537; X64-NEXT: # kill: def $al killed $al killed $eax 538; X64-NEXT: retq 539; 540; X86XOP-LABEL: identity_i8: 541; X86XOP: # %bb.0: 542; X86XOP-NEXT: movzbl {{[0-9]+}}(%esp), %eax 543; X86XOP-NEXT: retl 544 %b = call i8 @llvm.bitreverse.i8(i8 %a) 545 %c = call i8 @llvm.bitreverse.i8(i8 %b) 546 ret i8 %c 547} 548 549define <2 x i16> @identity_v2i16(<2 x i16> %a) { 550; X86-LABEL: identity_v2i16: 551; X86: # %bb.0: 552; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 553; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx 554; X86-NEXT: retl 555; 556; X64-LABEL: identity_v2i16: 557; X64: # %bb.0: 558; X64-NEXT: retq 559; 560; X86XOP-LABEL: identity_v2i16: 561; X86XOP: # %bb.0: 562; X86XOP-NEXT: retl 563 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) 564 %c = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %b) 565 ret <2 x i16> %c 566} 567 568; These tests check that bitreverse(undef) calls are removed 569 570define i8 @undef_i8() { 571; X86-LABEL: undef_i8: 572; X86: # %bb.0: 573; X86-NEXT: retl 574; 575; X64-LABEL: undef_i8: 576; X64: # %bb.0: 577; X64-NEXT: retq 578; 579; X86XOP-LABEL: undef_i8: 580; X86XOP: # %bb.0: 581; X86XOP-NEXT: retl 582 %b = call i8 @llvm.bitreverse.i8(i8 undef) 583 ret i8 %b 584} 585 586define <2 x i16> @undef_v2i16() { 587; X86-LABEL: undef_v2i16: 588; X86: # %bb.0: 589; X86-NEXT: retl 590; 591; X64-LABEL: undef_v2i16: 592; X64: # %bb.0: 593; X64-NEXT: retq 594; 595; X86XOP-LABEL: undef_v2i16: 596; X86XOP: # %bb.0: 597; X86XOP-NEXT: retl 598 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef) 599 ret <2 x i16> %b 600} 601 602; Make sure we don't assert during type legalization promoting a large 603; bitreverse due to the need for a large shift that won't fit in the i8 returned 604; from getShiftAmountTy. 605define i528 @large_promotion(i528 %A) nounwind { 606; X86-LABEL: large_promotion: 607; X86: # %bb.0: 608; X86-NEXT: pushl %ebp 609; X86-NEXT: pushl %ebx 610; X86-NEXT: pushl %edi 611; X86-NEXT: pushl %esi 612; X86-NEXT: subl $56, %esp 613; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 614; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 615; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 616; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 617; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 618; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 619; X86-NEXT: bswapl %ebx 620; X86-NEXT: movl %ebx, %ebp 621; X86-NEXT: andl $252645135, %ebp # imm = 0xF0F0F0F 622; X86-NEXT: shll $4, %ebp 623; X86-NEXT: shrl $4, %ebx 624; X86-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F 625; X86-NEXT: orl %ebp, %ebx 626; X86-NEXT: movl %ebx, %ebp 627; X86-NEXT: andl $858993459, %ebp # imm = 0x33333333 628; X86-NEXT: shrl $2, %ebx 629; X86-NEXT: andl $858993459, %ebx # imm = 0x33333333 630; X86-NEXT: leal (%ebx,%ebp,4), %ebx 631; X86-NEXT: movl %ebx, %ebp 632; X86-NEXT: andl $1431633920, %ebp # imm = 0x55550000 633; X86-NEXT: shrl %ebx 634; X86-NEXT: andl $1431633920, %ebx # imm = 0x55550000 635; X86-NEXT: leal (%ebx,%ebp,2), %ebx 636; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill 637; X86-NEXT: bswapl %edi 638; X86-NEXT: movl %edi, %ebx 639; X86-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F 640; X86-NEXT: shll $4, %ebx 641; X86-NEXT: shrl $4, %edi 642; X86-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F 643; X86-NEXT: orl %ebx, %edi 644; X86-NEXT: movl %edi, %ebx 645; X86-NEXT: andl $858993459, %ebx # imm = 0x33333333 646; X86-NEXT: shrl $2, %edi 647; X86-NEXT: andl $858993459, %edi # imm = 0x33333333 648; X86-NEXT: leal (%edi,%ebx,4), %edi 649; X86-NEXT: movl %edi, %ebx 650; X86-NEXT: andl $1431655765, %ebx # imm = 0x55555555 651; X86-NEXT: shrl %edi 652; X86-NEXT: andl $1431655765, %edi # imm = 0x55555555 653; X86-NEXT: leal (%edi,%ebx,2), %ebx 654; X86-NEXT: bswapl %esi 655; X86-NEXT: movl %esi, %edi 656; X86-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F 657; X86-NEXT: shll $4, %edi 658; X86-NEXT: shrl $4, %esi 659; X86-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F 660; X86-NEXT: orl %edi, %esi 661; X86-NEXT: movl %esi, %edi 662; X86-NEXT: andl $858993459, %edi # imm = 0x33333333 663; X86-NEXT: shrl $2, %esi 664; X86-NEXT: andl $858993459, %esi # imm = 0x33333333 665; X86-NEXT: leal (%esi,%edi,4), %esi 666; X86-NEXT: movl %esi, %edi 667; X86-NEXT: andl $1431655765, %edi # imm = 0x55555555 668; X86-NEXT: shrl %esi 669; X86-NEXT: andl $1431655765, %esi # imm = 0x55555555 670; X86-NEXT: leal (%esi,%edi,2), %esi 671; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 672; X86-NEXT: bswapl %edx 673; X86-NEXT: movl %edx, %esi 674; X86-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F 675; X86-NEXT: shll $4, %esi 676; X86-NEXT: shrl $4, %edx 677; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 678; X86-NEXT: orl %esi, %edx 679; X86-NEXT: movl %edx, %esi 680; X86-NEXT: andl $858993459, %esi # imm = 0x33333333 681; X86-NEXT: shrl $2, %edx 682; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 683; X86-NEXT: leal (%edx,%esi,4), %edx 684; X86-NEXT: movl %edx, %esi 685; X86-NEXT: andl $1431655765, %esi # imm = 0x55555555 686; X86-NEXT: shrl %edx 687; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 688; X86-NEXT: leal (%edx,%esi,2), %edx 689; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 690; X86-NEXT: bswapl %ecx 691; X86-NEXT: movl %ecx, %edx 692; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 693; X86-NEXT: shll $4, %edx 694; X86-NEXT: shrl $4, %ecx 695; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 696; X86-NEXT: orl %edx, %ecx 697; X86-NEXT: movl %ecx, %edx 698; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 699; X86-NEXT: shrl $2, %ecx 700; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 701; X86-NEXT: leal (%ecx,%edx,4), %ecx 702; X86-NEXT: movl %ecx, %edx 703; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 704; X86-NEXT: shrl %ecx 705; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 706; X86-NEXT: leal (%ecx,%edx,2), %ecx 707; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 708; X86-NEXT: bswapl %eax 709; X86-NEXT: movl %eax, %ecx 710; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 711; X86-NEXT: shll $4, %ecx 712; X86-NEXT: shrl $4, %eax 713; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 714; X86-NEXT: orl %ecx, %eax 715; X86-NEXT: movl %eax, %ecx 716; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 717; X86-NEXT: shrl $2, %eax 718; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 719; X86-NEXT: leal (%eax,%ecx,4), %eax 720; X86-NEXT: movl %eax, %ecx 721; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 722; X86-NEXT: shrl %eax 723; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 724; X86-NEXT: leal (%eax,%ecx,2), %eax 725; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 726; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 727; X86-NEXT: bswapl %eax 728; X86-NEXT: movl %eax, %ecx 729; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 730; X86-NEXT: shll $4, %ecx 731; X86-NEXT: shrl $4, %eax 732; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 733; X86-NEXT: orl %ecx, %eax 734; X86-NEXT: movl %eax, %ecx 735; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 736; X86-NEXT: shrl $2, %eax 737; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 738; X86-NEXT: leal (%eax,%ecx,4), %eax 739; X86-NEXT: movl %eax, %ecx 740; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 741; X86-NEXT: shrl %eax 742; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 743; X86-NEXT: leal (%eax,%ecx,2), %eax 744; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 745; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 746; X86-NEXT: bswapl %eax 747; X86-NEXT: movl %eax, %ecx 748; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 749; X86-NEXT: shll $4, %ecx 750; X86-NEXT: shrl $4, %eax 751; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 752; X86-NEXT: orl %ecx, %eax 753; X86-NEXT: movl %eax, %ecx 754; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 755; X86-NEXT: shrl $2, %eax 756; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 757; X86-NEXT: leal (%eax,%ecx,4), %eax 758; X86-NEXT: movl %eax, %ecx 759; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 760; X86-NEXT: shrl %eax 761; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 762; X86-NEXT: leal (%eax,%ecx,2), %eax 763; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 764; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 765; X86-NEXT: bswapl %eax 766; X86-NEXT: movl %eax, %ecx 767; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 768; X86-NEXT: shll $4, %ecx 769; X86-NEXT: shrl $4, %eax 770; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 771; X86-NEXT: orl %ecx, %eax 772; X86-NEXT: movl %eax, %ecx 773; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 774; X86-NEXT: shrl $2, %eax 775; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 776; X86-NEXT: leal (%eax,%ecx,4), %eax 777; X86-NEXT: movl %eax, %ecx 778; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 779; X86-NEXT: shrl %eax 780; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 781; X86-NEXT: leal (%eax,%ecx,2), %eax 782; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 783; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 784; X86-NEXT: bswapl %eax 785; X86-NEXT: movl %eax, %ecx 786; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 787; X86-NEXT: shll $4, %ecx 788; X86-NEXT: shrl $4, %eax 789; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 790; X86-NEXT: orl %ecx, %eax 791; X86-NEXT: movl %eax, %ecx 792; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 793; X86-NEXT: shrl $2, %eax 794; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 795; X86-NEXT: leal (%eax,%ecx,4), %eax 796; X86-NEXT: movl %eax, %ecx 797; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 798; X86-NEXT: shrl %eax 799; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 800; X86-NEXT: leal (%eax,%ecx,2), %eax 801; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 802; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 803; X86-NEXT: bswapl %eax 804; X86-NEXT: movl %eax, %ecx 805; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 806; X86-NEXT: shll $4, %ecx 807; X86-NEXT: shrl $4, %eax 808; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 809; X86-NEXT: orl %ecx, %eax 810; X86-NEXT: movl %eax, %ecx 811; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 812; X86-NEXT: shrl $2, %eax 813; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 814; X86-NEXT: leal (%eax,%ecx,4), %eax 815; X86-NEXT: movl %eax, %ecx 816; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 817; X86-NEXT: shrl %eax 818; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 819; X86-NEXT: leal (%eax,%ecx,2), %eax 820; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 821; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 822; X86-NEXT: bswapl %eax 823; X86-NEXT: movl %eax, %ecx 824; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 825; X86-NEXT: shll $4, %ecx 826; X86-NEXT: shrl $4, %eax 827; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 828; X86-NEXT: orl %ecx, %eax 829; X86-NEXT: movl %eax, %ecx 830; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 831; X86-NEXT: shrl $2, %eax 832; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 833; X86-NEXT: leal (%eax,%ecx,4), %eax 834; X86-NEXT: movl %eax, %ecx 835; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 836; X86-NEXT: shrl %eax 837; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 838; X86-NEXT: leal (%eax,%ecx,2), %eax 839; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 840; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 841; X86-NEXT: bswapl %eax 842; X86-NEXT: movl %eax, %ecx 843; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 844; X86-NEXT: shll $4, %ecx 845; X86-NEXT: shrl $4, %eax 846; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 847; X86-NEXT: orl %ecx, %eax 848; X86-NEXT: movl %eax, %ecx 849; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 850; X86-NEXT: shrl $2, %eax 851; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 852; X86-NEXT: leal (%eax,%ecx,4), %eax 853; X86-NEXT: movl %eax, %ecx 854; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 855; X86-NEXT: shrl %eax 856; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 857; X86-NEXT: leal (%eax,%ecx,2), %eax 858; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 859; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 860; X86-NEXT: bswapl %eax 861; X86-NEXT: movl %eax, %ecx 862; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 863; X86-NEXT: shll $4, %ecx 864; X86-NEXT: shrl $4, %eax 865; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 866; X86-NEXT: orl %ecx, %eax 867; X86-NEXT: movl %eax, %ecx 868; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 869; X86-NEXT: shrl $2, %eax 870; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 871; X86-NEXT: leal (%eax,%ecx,4), %eax 872; X86-NEXT: movl %eax, %ecx 873; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 874; X86-NEXT: shrl %eax 875; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 876; X86-NEXT: leal (%eax,%ecx,2), %eax 877; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 878; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 879; X86-NEXT: bswapl %eax 880; X86-NEXT: movl %eax, %ecx 881; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 882; X86-NEXT: shll $4, %ecx 883; X86-NEXT: shrl $4, %eax 884; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 885; X86-NEXT: orl %ecx, %eax 886; X86-NEXT: movl %eax, %ecx 887; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 888; X86-NEXT: shrl $2, %eax 889; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 890; X86-NEXT: leal (%eax,%ecx,4), %eax 891; X86-NEXT: movl %eax, %ecx 892; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 893; X86-NEXT: shrl %eax 894; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 895; X86-NEXT: leal (%eax,%ecx,2), %eax 896; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 897; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 898; X86-NEXT: bswapl %eax 899; X86-NEXT: movl %eax, %ecx 900; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 901; X86-NEXT: shll $4, %ecx 902; X86-NEXT: shrl $4, %eax 903; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 904; X86-NEXT: orl %ecx, %eax 905; X86-NEXT: movl %eax, %ecx 906; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 907; X86-NEXT: shrl $2, %eax 908; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 909; X86-NEXT: leal (%eax,%ecx,4), %eax 910; X86-NEXT: movl %eax, %ecx 911; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 912; X86-NEXT: shrl %eax 913; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 914; X86-NEXT: leal (%eax,%ecx,2), %edi 915; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 916; X86-NEXT: bswapl %eax 917; X86-NEXT: movl %eax, %ecx 918; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 919; X86-NEXT: shll $4, %ecx 920; X86-NEXT: shrl $4, %eax 921; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 922; X86-NEXT: orl %ecx, %eax 923; X86-NEXT: movl %eax, %ecx 924; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 925; X86-NEXT: shrl $2, %eax 926; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 927; X86-NEXT: leal (%eax,%ecx,4), %eax 928; X86-NEXT: movl %eax, %ecx 929; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 930; X86-NEXT: shrl %eax 931; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 932; X86-NEXT: leal (%eax,%ecx,2), %edx 933; X86-NEXT: movl (%esp), %esi # 4-byte Reload 934; X86-NEXT: shrdl $16, %ebx, %esi 935; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 936; X86-NEXT: shrdl $16, %eax, %ebx 937; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill 938; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 939; X86-NEXT: shrdl $16, %ecx, %eax 940; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 941; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 942; X86-NEXT: shrdl $16, %eax, %ecx 943; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 944; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 945; X86-NEXT: shrdl $16, %ecx, %eax 946; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 947; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 948; X86-NEXT: shrdl $16, %eax, %ecx 949; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 950; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 951; X86-NEXT: shrdl $16, %ecx, %eax 952; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 953; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 954; X86-NEXT: shrdl $16, %eax, %ecx 955; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 956; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 957; X86-NEXT: shrdl $16, %ecx, %eax 958; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 959; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 960; X86-NEXT: shrdl $16, %eax, %ecx 961; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 962; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 963; X86-NEXT: shrdl $16, %ecx, %eax 964; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 965; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 966; X86-NEXT: shrdl $16, %ebp, %ecx 967; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 968; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload 969; X86-NEXT: shrdl $16, %ebx, %ebp 970; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 971; X86-NEXT: shrdl $16, %eax, %ebx 972; X86-NEXT: shrdl $16, %edi, %eax 973; X86-NEXT: movl %eax, %ecx 974; X86-NEXT: shrdl $16, %edx, %edi 975; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 976; X86-NEXT: movl %edi, 60(%eax) 977; X86-NEXT: movl %ecx, 56(%eax) 978; X86-NEXT: movl %ebx, 52(%eax) 979; X86-NEXT: movl %ebp, 48(%eax) 980; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 981; X86-NEXT: movl %ecx, 44(%eax) 982; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 983; X86-NEXT: movl %ecx, 40(%eax) 984; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 985; X86-NEXT: movl %ecx, 36(%eax) 986; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 987; X86-NEXT: movl %ecx, 32(%eax) 988; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 989; X86-NEXT: movl %ecx, 28(%eax) 990; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 991; X86-NEXT: movl %ecx, 24(%eax) 992; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 993; X86-NEXT: movl %ecx, 20(%eax) 994; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 995; X86-NEXT: movl %ecx, 16(%eax) 996; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 997; X86-NEXT: movl %ecx, 12(%eax) 998; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 999; X86-NEXT: movl %ecx, 8(%eax) 1000; X86-NEXT: movl (%esp), %ecx # 4-byte Reload 1001; X86-NEXT: movl %ecx, 4(%eax) 1002; X86-NEXT: movl %esi, (%eax) 1003; X86-NEXT: shrl $16, %edx 1004; X86-NEXT: movw %dx, 64(%eax) 1005; X86-NEXT: addl $56, %esp 1006; X86-NEXT: popl %esi 1007; X86-NEXT: popl %edi 1008; X86-NEXT: popl %ebx 1009; X86-NEXT: popl %ebp 1010; X86-NEXT: retl $4 1011; 1012; X64-LABEL: large_promotion: 1013; X64: # %bb.0: 1014; X64-NEXT: pushq %rbp 1015; X64-NEXT: pushq %r15 1016; X64-NEXT: pushq %r14 1017; X64-NEXT: pushq %r13 1018; X64-NEXT: pushq %r12 1019; X64-NEXT: pushq %rbx 1020; X64-NEXT: movq %rdi, %rax 1021; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 1022; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 1023; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp 1024; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi 1025; X64-NEXT: bswapq %rdi 1026; X64-NEXT: movq %rdi, %rbx 1027; X64-NEXT: shrq $4, %rbx 1028; X64-NEXT: movabsq $1085102592571150095, %r13 # imm = 0xF0F0F0F0F0F0F0F 1029; X64-NEXT: andq %r13, %rbx 1030; X64-NEXT: andq %r13, %rdi 1031; X64-NEXT: shlq $4, %rdi 1032; X64-NEXT: orq %rbx, %rdi 1033; X64-NEXT: movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333 1034; X64-NEXT: movq %rdi, %rbx 1035; X64-NEXT: andq %r11, %rbx 1036; X64-NEXT: shrq $2, %rdi 1037; X64-NEXT: andq %r11, %rdi 1038; X64-NEXT: leaq (%rdi,%rbx,4), %rdi 1039; X64-NEXT: movabsq $6148820866244280320, %r10 # imm = 0x5555000000000000 1040; X64-NEXT: movq %rdi, %rbx 1041; X64-NEXT: andq %r10, %rbx 1042; X64-NEXT: shrq %rdi 1043; X64-NEXT: andq %r10, %rdi 1044; X64-NEXT: leaq (%rdi,%rbx,2), %r10 1045; X64-NEXT: bswapq %rbp 1046; X64-NEXT: movq %rbp, %rdi 1047; X64-NEXT: shrq $4, %rdi 1048; X64-NEXT: andq %r13, %rdi 1049; X64-NEXT: andq %r13, %rbp 1050; X64-NEXT: shlq $4, %rbp 1051; X64-NEXT: orq %rdi, %rbp 1052; X64-NEXT: movq %rbp, %rdi 1053; X64-NEXT: andq %r11, %rdi 1054; X64-NEXT: shrq $2, %rbp 1055; X64-NEXT: andq %r11, %rbp 1056; X64-NEXT: leaq (%rbp,%rdi,4), %rdi 1057; X64-NEXT: movabsq $6148914691236517205, %rbp # imm = 0x5555555555555555 1058; X64-NEXT: movq %rdi, %rbx 1059; X64-NEXT: andq %rbp, %rbx 1060; X64-NEXT: shrq %rdi 1061; X64-NEXT: andq %rbp, %rdi 1062; X64-NEXT: leaq (%rdi,%rbx,2), %r14 1063; X64-NEXT: shrdq $48, %r14, %r10 1064; X64-NEXT: bswapq %r15 1065; X64-NEXT: movq %r15, %rdi 1066; X64-NEXT: shrq $4, %rdi 1067; X64-NEXT: andq %r13, %rdi 1068; X64-NEXT: andq %r13, %r15 1069; X64-NEXT: shlq $4, %r15 1070; X64-NEXT: orq %rdi, %r15 1071; X64-NEXT: movq %r15, %rdi 1072; X64-NEXT: andq %r11, %rdi 1073; X64-NEXT: shrq $2, %r15 1074; X64-NEXT: andq %r11, %r15 1075; X64-NEXT: leaq (%r15,%rdi,4), %rdi 1076; X64-NEXT: movq %rdi, %rbx 1077; X64-NEXT: andq %rbp, %rbx 1078; X64-NEXT: shrq %rdi 1079; X64-NEXT: andq %rbp, %rdi 1080; X64-NEXT: leaq (%rdi,%rbx,2), %r15 1081; X64-NEXT: shrdq $48, %r15, %r14 1082; X64-NEXT: bswapq %r12 1083; X64-NEXT: movq %r12, %rdi 1084; X64-NEXT: shrq $4, %rdi 1085; X64-NEXT: andq %r13, %rdi 1086; X64-NEXT: andq %r13, %r12 1087; X64-NEXT: shlq $4, %r12 1088; X64-NEXT: orq %rdi, %r12 1089; X64-NEXT: movq %r12, %rdi 1090; X64-NEXT: andq %r11, %rdi 1091; X64-NEXT: shrq $2, %r12 1092; X64-NEXT: andq %r11, %r12 1093; X64-NEXT: leaq (%r12,%rdi,4), %rdi 1094; X64-NEXT: movq %rdi, %rbx 1095; X64-NEXT: andq %rbp, %rbx 1096; X64-NEXT: shrq %rdi 1097; X64-NEXT: andq %rbp, %rdi 1098; X64-NEXT: leaq (%rdi,%rbx,2), %r12 1099; X64-NEXT: shrdq $48, %r12, %r15 1100; X64-NEXT: bswapq %r9 1101; X64-NEXT: movq %r9, %rdi 1102; X64-NEXT: shrq $4, %rdi 1103; X64-NEXT: andq %r13, %rdi 1104; X64-NEXT: andq %r13, %r9 1105; X64-NEXT: shlq $4, %r9 1106; X64-NEXT: orq %rdi, %r9 1107; X64-NEXT: movq %r9, %rdi 1108; X64-NEXT: andq %r11, %rdi 1109; X64-NEXT: shrq $2, %r9 1110; X64-NEXT: andq %r11, %r9 1111; X64-NEXT: leaq (%r9,%rdi,4), %rdi 1112; X64-NEXT: movq %rdi, %rbx 1113; X64-NEXT: andq %rbp, %rbx 1114; X64-NEXT: shrq %rdi 1115; X64-NEXT: andq %rbp, %rdi 1116; X64-NEXT: leaq (%rdi,%rbx,2), %r9 1117; X64-NEXT: shrdq $48, %r9, %r12 1118; X64-NEXT: bswapq %r8 1119; X64-NEXT: movq %r8, %rdi 1120; X64-NEXT: shrq $4, %rdi 1121; X64-NEXT: andq %r13, %rdi 1122; X64-NEXT: andq %r13, %r8 1123; X64-NEXT: shlq $4, %r8 1124; X64-NEXT: orq %rdi, %r8 1125; X64-NEXT: movq %r8, %rdi 1126; X64-NEXT: andq %r11, %rdi 1127; X64-NEXT: shrq $2, %r8 1128; X64-NEXT: andq %r11, %r8 1129; X64-NEXT: leaq (%r8,%rdi,4), %rdi 1130; X64-NEXT: movq %rdi, %rbx 1131; X64-NEXT: andq %rbp, %rbx 1132; X64-NEXT: shrq %rdi 1133; X64-NEXT: andq %rbp, %rdi 1134; X64-NEXT: leaq (%rdi,%rbx,2), %rdi 1135; X64-NEXT: shrdq $48, %rdi, %r9 1136; X64-NEXT: bswapq %rcx 1137; X64-NEXT: movq %rcx, %rbx 1138; X64-NEXT: shrq $4, %rbx 1139; X64-NEXT: andq %r13, %rbx 1140; X64-NEXT: andq %r13, %rcx 1141; X64-NEXT: shlq $4, %rcx 1142; X64-NEXT: orq %rbx, %rcx 1143; X64-NEXT: movq %rcx, %rbx 1144; X64-NEXT: andq %r11, %rbx 1145; X64-NEXT: shrq $2, %rcx 1146; X64-NEXT: andq %r11, %rcx 1147; X64-NEXT: leaq (%rcx,%rbx,4), %rcx 1148; X64-NEXT: movq %rcx, %rbx 1149; X64-NEXT: andq %rbp, %rbx 1150; X64-NEXT: shrq %rcx 1151; X64-NEXT: andq %rbp, %rcx 1152; X64-NEXT: leaq (%rcx,%rbx,2), %rcx 1153; X64-NEXT: shrdq $48, %rcx, %rdi 1154; X64-NEXT: bswapq %rdx 1155; X64-NEXT: movq %rdx, %rbx 1156; X64-NEXT: shrq $4, %rbx 1157; X64-NEXT: andq %r13, %rbx 1158; X64-NEXT: andq %r13, %rdx 1159; X64-NEXT: shlq $4, %rdx 1160; X64-NEXT: orq %rbx, %rdx 1161; X64-NEXT: movq %rdx, %rbx 1162; X64-NEXT: andq %r11, %rbx 1163; X64-NEXT: shrq $2, %rdx 1164; X64-NEXT: andq %r11, %rdx 1165; X64-NEXT: leaq (%rdx,%rbx,4), %rdx 1166; X64-NEXT: movq %rdx, %rbx 1167; X64-NEXT: andq %rbp, %rbx 1168; X64-NEXT: shrq %rdx 1169; X64-NEXT: andq %rbp, %rdx 1170; X64-NEXT: leaq (%rdx,%rbx,2), %rdx 1171; X64-NEXT: shrdq $48, %rdx, %rcx 1172; X64-NEXT: bswapq %rsi 1173; X64-NEXT: movq %rsi, %rbx 1174; X64-NEXT: shrq $4, %rbx 1175; X64-NEXT: andq %r13, %rbx 1176; X64-NEXT: andq %r13, %rsi 1177; X64-NEXT: shlq $4, %rsi 1178; X64-NEXT: orq %rbx, %rsi 1179; X64-NEXT: movq %rsi, %rbx 1180; X64-NEXT: andq %r11, %rbx 1181; X64-NEXT: shrq $2, %rsi 1182; X64-NEXT: andq %r11, %rsi 1183; X64-NEXT: leaq (%rsi,%rbx,4), %rsi 1184; X64-NEXT: movq %rsi, %rbx 1185; X64-NEXT: andq %rbp, %rbx 1186; X64-NEXT: shrq %rsi 1187; X64-NEXT: andq %rbp, %rsi 1188; X64-NEXT: leaq (%rsi,%rbx,2), %rsi 1189; X64-NEXT: shrdq $48, %rsi, %rdx 1190; X64-NEXT: shrq $48, %rsi 1191; X64-NEXT: movq %rdx, 56(%rax) 1192; X64-NEXT: movq %rcx, 48(%rax) 1193; X64-NEXT: movq %rdi, 40(%rax) 1194; X64-NEXT: movq %r9, 32(%rax) 1195; X64-NEXT: movq %r12, 24(%rax) 1196; X64-NEXT: movq %r15, 16(%rax) 1197; X64-NEXT: movq %r14, 8(%rax) 1198; X64-NEXT: movq %r10, (%rax) 1199; X64-NEXT: movw %si, 64(%rax) 1200; X64-NEXT: popq %rbx 1201; X64-NEXT: popq %r12 1202; X64-NEXT: popq %r13 1203; X64-NEXT: popq %r14 1204; X64-NEXT: popq %r15 1205; X64-NEXT: popq %rbp 1206; X64-NEXT: retq 1207; 1208; X86XOP-LABEL: large_promotion: 1209; X86XOP: # %bb.0: 1210; X86XOP-NEXT: pushl %ebp 1211; X86XOP-NEXT: pushl %ebx 1212; X86XOP-NEXT: pushl %edi 1213; X86XOP-NEXT: pushl %esi 1214; X86XOP-NEXT: subl $44, %esp 1215; X86XOP-NEXT: vmovdqa {{.*#+}} xmm0 = [87,86,85,84,83,82,81,80,95,94,93,92,91,90,89,88] 1216; X86XOP-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1217; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1218; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1219; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1220; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1221; X86XOP-NEXT: vmovd %xmm1, %ecx 1222; X86XOP-NEXT: shrdl $16, %ecx, %eax 1223; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1224; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1225; X86XOP-NEXT: shrdl $16, %eax, %ecx 1226; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1227; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1228; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1229; X86XOP-NEXT: vmovd %xmm1, %ecx 1230; X86XOP-NEXT: shrdl $16, %ecx, %eax 1231; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1232; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1233; X86XOP-NEXT: shrdl $16, %eax, %ecx 1234; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1235; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1236; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1237; X86XOP-NEXT: vmovd %xmm1, %ecx 1238; X86XOP-NEXT: shrdl $16, %ecx, %eax 1239; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1240; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1241; X86XOP-NEXT: shrdl $16, %eax, %ecx 1242; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1243; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1244; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1245; X86XOP-NEXT: vmovd %xmm1, %ecx 1246; X86XOP-NEXT: shrdl $16, %ecx, %eax 1247; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1248; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1249; X86XOP-NEXT: shrdl $16, %eax, %ecx 1250; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1251; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1252; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1253; X86XOP-NEXT: vmovd %xmm1, %ecx 1254; X86XOP-NEXT: shrdl $16, %ecx, %eax 1255; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1256; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1257; X86XOP-NEXT: shrdl $16, %eax, %ecx 1258; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1259; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1260; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1261; X86XOP-NEXT: vmovd %xmm1, %ebp 1262; X86XOP-NEXT: shrdl $16, %ebp, %eax 1263; X86XOP-NEXT: movl %eax, (%esp) # 4-byte Spill 1264; X86XOP-NEXT: vpextrd $1, %xmm1, %ebx 1265; X86XOP-NEXT: shrdl $16, %ebx, %ebp 1266; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1267; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1268; X86XOP-NEXT: vmovd %xmm1, %esi 1269; X86XOP-NEXT: shrdl $16, %esi, %ebx 1270; X86XOP-NEXT: vpextrd $1, %xmm1, %edx 1271; X86XOP-NEXT: shrdl $16, %edx, %esi 1272; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1273; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm0 1274; X86XOP-NEXT: vmovd %xmm0, %ecx 1275; X86XOP-NEXT: shrdl $16, %ecx, %edx 1276; X86XOP-NEXT: vpextrd $1, %xmm0, %edi 1277; X86XOP-NEXT: shrdl $16, %edi, %ecx 1278; X86XOP-NEXT: movl {{[0-9]+}}(%esp), %eax 1279; X86XOP-NEXT: movl %ecx, 60(%eax) 1280; X86XOP-NEXT: movl %edx, 56(%eax) 1281; X86XOP-NEXT: movl %esi, 52(%eax) 1282; X86XOP-NEXT: movl %ebx, 48(%eax) 1283; X86XOP-NEXT: movl %ebp, 44(%eax) 1284; X86XOP-NEXT: movl (%esp), %ecx # 4-byte Reload 1285; X86XOP-NEXT: movl %ecx, 40(%eax) 1286; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1287; X86XOP-NEXT: movl %ecx, 36(%eax) 1288; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1289; X86XOP-NEXT: movl %ecx, 32(%eax) 1290; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1291; X86XOP-NEXT: movl %ecx, 28(%eax) 1292; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1293; X86XOP-NEXT: movl %ecx, 24(%eax) 1294; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1295; X86XOP-NEXT: movl %ecx, 20(%eax) 1296; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1297; X86XOP-NEXT: movl %ecx, 16(%eax) 1298; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1299; X86XOP-NEXT: movl %ecx, 12(%eax) 1300; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1301; X86XOP-NEXT: movl %ecx, 8(%eax) 1302; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1303; X86XOP-NEXT: movl %ecx, 4(%eax) 1304; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1305; X86XOP-NEXT: movl %ecx, (%eax) 1306; X86XOP-NEXT: shrl $16, %edi 1307; X86XOP-NEXT: movw %di, 64(%eax) 1308; X86XOP-NEXT: addl $44, %esp 1309; X86XOP-NEXT: popl %esi 1310; X86XOP-NEXT: popl %edi 1311; X86XOP-NEXT: popl %ebx 1312; X86XOP-NEXT: popl %ebp 1313; X86XOP-NEXT: retl $4 1314 %Z = call i528 @llvm.bitreverse.i528(i528 %A) 1315 ret i528 %Z 1316} 1317declare i528 @llvm.bitreverse.i528(i528) 1318