1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 4 5; These tests just check that the plumbing is in place for @llvm.bitreverse. The 6; actual output is massive at the moment as llvm.bitreverse is not yet legal. 7 8declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone 9 10define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind { 11; X86-LABEL: test_bitreverse_v2i16: 12; X86: # %bb.0: 13; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 14; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 15; X86-NEXT: rolw $8, %ax 16; X86-NEXT: movl %eax, %edx 17; X86-NEXT: andl $3855, %edx # imm = 0xF0F 18; X86-NEXT: shll $4, %edx 19; X86-NEXT: andl $61680, %eax # imm = 0xF0F0 20; X86-NEXT: shrl $4, %eax 21; X86-NEXT: orl %edx, %eax 22; X86-NEXT: movl %eax, %edx 23; X86-NEXT: andl $13107, %edx # imm = 0x3333 24; X86-NEXT: andl $52428, %eax # imm = 0xCCCC 25; X86-NEXT: shrl $2, %eax 26; X86-NEXT: leal (%eax,%edx,4), %eax 27; X86-NEXT: movl %eax, %edx 28; X86-NEXT: andl $21845, %edx # imm = 0x5555 29; X86-NEXT: andl $43690, %eax # imm = 0xAAAA 30; X86-NEXT: shrl %eax 31; X86-NEXT: leal (%eax,%edx,2), %eax 32; X86-NEXT: rolw $8, %cx 33; X86-NEXT: movl %ecx, %edx 34; X86-NEXT: andl $3855, %edx # imm = 0xF0F 35; X86-NEXT: shll $4, %edx 36; X86-NEXT: andl $61680, %ecx # imm = 0xF0F0 37; X86-NEXT: shrl $4, %ecx 38; X86-NEXT: orl %edx, %ecx 39; X86-NEXT: movl %ecx, %edx 40; X86-NEXT: andl $13107, %edx # imm = 0x3333 41; X86-NEXT: andl $52428, %ecx # imm = 0xCCCC 42; X86-NEXT: shrl $2, %ecx 43; X86-NEXT: leal (%ecx,%edx,4), %ecx 44; X86-NEXT: movl %ecx, %edx 45; X86-NEXT: andl $21845, %edx # imm = 0x5555 46; X86-NEXT: andl $43690, %ecx # imm = 0xAAAA 47; X86-NEXT: shrl %ecx 48; X86-NEXT: leal (%ecx,%edx,2), %edx 49; X86-NEXT: # kill: def $ax killed $ax killed $eax 50; X86-NEXT: # kill: def $dx killed $dx killed $edx 51; X86-NEXT: retl 52; 53; X64-LABEL: test_bitreverse_v2i16: 54; X64: # %bb.0: 55; X64-NEXT: pxor %xmm1, %xmm1 56; X64-NEXT: movdqa %xmm0, %xmm2 57; X64-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 58; X64-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,2,4,5,6,7] 59; X64-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,4,7,6] 60; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 61; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] 62; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] 63; X64-NEXT: packuswb %xmm2, %xmm0 64; X64-NEXT: movdqa %xmm0, %xmm1 65; X64-NEXT: psllw $4, %xmm1 66; X64-NEXT: pand {{.*}}(%rip), %xmm1 67; X64-NEXT: psrlw $4, %xmm0 68; X64-NEXT: pand {{.*}}(%rip), %xmm0 69; X64-NEXT: por %xmm1, %xmm0 70; X64-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 71; X64-NEXT: pand %xmm0, %xmm1 72; X64-NEXT: psllw $2, %xmm1 73; X64-NEXT: pand {{.*}}(%rip), %xmm0 74; X64-NEXT: psrlw $2, %xmm0 75; X64-NEXT: por %xmm1, %xmm0 76; X64-NEXT: movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85] 77; X64-NEXT: pand %xmm0, %xmm1 78; X64-NEXT: paddb %xmm1, %xmm1 79; X64-NEXT: pand {{.*}}(%rip), %xmm0 80; X64-NEXT: psrlw $1, %xmm0 81; X64-NEXT: por %xmm1, %xmm0 82; X64-NEXT: retq 83 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) 84 ret <2 x i16> %b 85} 86 87declare i64 @llvm.bitreverse.i64(i64) readnone 88 89define i64 @test_bitreverse_i64(i64 %a) nounwind { 90; X86-LABEL: test_bitreverse_i64: 91; X86: # %bb.0: 92; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 93; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 94; X86-NEXT: bswapl %eax 95; X86-NEXT: movl %eax, %edx 96; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 97; X86-NEXT: shll $4, %edx 98; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 99; X86-NEXT: shrl $4, %eax 100; X86-NEXT: orl %edx, %eax 101; X86-NEXT: movl %eax, %edx 102; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 103; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 104; X86-NEXT: shrl $2, %eax 105; X86-NEXT: leal (%eax,%edx,4), %eax 106; X86-NEXT: movl %eax, %edx 107; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 108; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 109; X86-NEXT: shrl %eax 110; X86-NEXT: leal (%eax,%edx,2), %eax 111; X86-NEXT: bswapl %ecx 112; X86-NEXT: movl %ecx, %edx 113; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 114; X86-NEXT: shll $4, %edx 115; X86-NEXT: andl $-252645136, %ecx # imm = 0xF0F0F0F0 116; X86-NEXT: shrl $4, %ecx 117; X86-NEXT: orl %edx, %ecx 118; X86-NEXT: movl %ecx, %edx 119; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 120; X86-NEXT: andl $-858993460, %ecx # imm = 0xCCCCCCCC 121; X86-NEXT: shrl $2, %ecx 122; X86-NEXT: leal (%ecx,%edx,4), %ecx 123; X86-NEXT: movl %ecx, %edx 124; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 125; X86-NEXT: andl $-1431655766, %ecx # imm = 0xAAAAAAAA 126; X86-NEXT: shrl %ecx 127; X86-NEXT: leal (%ecx,%edx,2), %edx 128; X86-NEXT: retl 129; 130; X64-LABEL: test_bitreverse_i64: 131; X64: # %bb.0: 132; X64-NEXT: bswapq %rdi 133; X64-NEXT: movabsq $1085102592571150095, %rax # imm = 0xF0F0F0F0F0F0F0F 134; X64-NEXT: andq %rdi, %rax 135; X64-NEXT: shlq $4, %rax 136; X64-NEXT: movabsq $-1085102592571150096, %rcx # imm = 0xF0F0F0F0F0F0F0F0 137; X64-NEXT: andq %rdi, %rcx 138; X64-NEXT: shrq $4, %rcx 139; X64-NEXT: orq %rax, %rcx 140; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 141; X64-NEXT: andq %rcx, %rax 142; X64-NEXT: movabsq $-3689348814741910324, %rdx # imm = 0xCCCCCCCCCCCCCCCC 143; X64-NEXT: andq %rcx, %rdx 144; X64-NEXT: shrq $2, %rdx 145; X64-NEXT: leaq (%rdx,%rax,4), %rax 146; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 147; X64-NEXT: andq %rax, %rcx 148; X64-NEXT: movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA 149; X64-NEXT: andq %rax, %rdx 150; X64-NEXT: shrq %rdx 151; X64-NEXT: leaq (%rdx,%rcx,2), %rax 152; X64-NEXT: retq 153 %b = call i64 @llvm.bitreverse.i64(i64 %a) 154 ret i64 %b 155} 156 157declare i32 @llvm.bitreverse.i32(i32) readnone 158 159define i32 @test_bitreverse_i32(i32 %a) nounwind { 160; X86-LABEL: test_bitreverse_i32: 161; X86: # %bb.0: 162; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 163; X86-NEXT: bswapl %eax 164; X86-NEXT: movl %eax, %ecx 165; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 166; X86-NEXT: shll $4, %ecx 167; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 168; X86-NEXT: shrl $4, %eax 169; X86-NEXT: orl %ecx, %eax 170; X86-NEXT: movl %eax, %ecx 171; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 172; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 173; X86-NEXT: shrl $2, %eax 174; X86-NEXT: leal (%eax,%ecx,4), %eax 175; X86-NEXT: movl %eax, %ecx 176; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 177; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 178; X86-NEXT: shrl %eax 179; X86-NEXT: leal (%eax,%ecx,2), %eax 180; X86-NEXT: retl 181; 182; X64-LABEL: test_bitreverse_i32: 183; X64: # %bb.0: 184; X64-NEXT: # kill: def $edi killed $edi def $rdi 185; X64-NEXT: bswapl %edi 186; X64-NEXT: movl %edi, %eax 187; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 188; X64-NEXT: shll $4, %eax 189; X64-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 190; X64-NEXT: shrl $4, %edi 191; X64-NEXT: orl %eax, %edi 192; X64-NEXT: movl %edi, %eax 193; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 194; X64-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC 195; X64-NEXT: shrl $2, %edi 196; X64-NEXT: leal (%rdi,%rax,4), %eax 197; X64-NEXT: movl %eax, %ecx 198; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 199; X64-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 200; X64-NEXT: shrl %eax 201; X64-NEXT: leal (%rax,%rcx,2), %eax 202; X64-NEXT: retq 203 %b = call i32 @llvm.bitreverse.i32(i32 %a) 204 ret i32 %b 205} 206 207declare i24 @llvm.bitreverse.i24(i24) readnone 208 209define i24 @test_bitreverse_i24(i24 %a) nounwind { 210; X86-LABEL: test_bitreverse_i24: 211; X86: # %bb.0: 212; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 213; X86-NEXT: bswapl %eax 214; X86-NEXT: movl %eax, %ecx 215; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 216; X86-NEXT: shll $4, %ecx 217; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 218; X86-NEXT: shrl $4, %eax 219; X86-NEXT: orl %ecx, %eax 220; X86-NEXT: movl %eax, %ecx 221; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 222; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 223; X86-NEXT: shrl $2, %eax 224; X86-NEXT: leal (%eax,%ecx,4), %eax 225; X86-NEXT: movl %eax, %ecx 226; X86-NEXT: andl $1431655680, %ecx # imm = 0x55555500 227; X86-NEXT: andl $-1431655936, %eax # imm = 0xAAAAAA00 228; X86-NEXT: shrl %eax 229; X86-NEXT: leal (%eax,%ecx,2), %eax 230; X86-NEXT: shrl $8, %eax 231; X86-NEXT: retl 232; 233; X64-LABEL: test_bitreverse_i24: 234; X64: # %bb.0: 235; X64-NEXT: # kill: def $edi killed $edi def $rdi 236; X64-NEXT: bswapl %edi 237; X64-NEXT: movl %edi, %eax 238; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 239; X64-NEXT: shll $4, %eax 240; X64-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 241; X64-NEXT: shrl $4, %edi 242; X64-NEXT: orl %eax, %edi 243; X64-NEXT: movl %edi, %eax 244; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 245; X64-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC 246; X64-NEXT: shrl $2, %edi 247; X64-NEXT: leal (%rdi,%rax,4), %eax 248; X64-NEXT: movl %eax, %ecx 249; X64-NEXT: andl $1431655680, %ecx # imm = 0x55555500 250; X64-NEXT: andl $-1431655936, %eax # imm = 0xAAAAAA00 251; X64-NEXT: shrl %eax 252; X64-NEXT: leal (%rax,%rcx,2), %eax 253; X64-NEXT: shrl $8, %eax 254; X64-NEXT: retq 255 %b = call i24 @llvm.bitreverse.i24(i24 %a) 256 ret i24 %b 257} 258 259declare i16 @llvm.bitreverse.i16(i16) readnone 260 261define i16 @test_bitreverse_i16(i16 %a) nounwind { 262; X86-LABEL: test_bitreverse_i16: 263; X86: # %bb.0: 264; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 265; X86-NEXT: rolw $8, %ax 266; X86-NEXT: movl %eax, %ecx 267; X86-NEXT: andl $3855, %ecx # imm = 0xF0F 268; X86-NEXT: shll $4, %ecx 269; X86-NEXT: andl $61680, %eax # imm = 0xF0F0 270; X86-NEXT: shrl $4, %eax 271; X86-NEXT: orl %ecx, %eax 272; X86-NEXT: movl %eax, %ecx 273; X86-NEXT: andl $13107, %ecx # imm = 0x3333 274; X86-NEXT: andl $52428, %eax # imm = 0xCCCC 275; X86-NEXT: shrl $2, %eax 276; X86-NEXT: leal (%eax,%ecx,4), %eax 277; X86-NEXT: movl %eax, %ecx 278; X86-NEXT: andl $21845, %ecx # imm = 0x5555 279; X86-NEXT: andl $43690, %eax # imm = 0xAAAA 280; X86-NEXT: shrl %eax 281; X86-NEXT: leal (%eax,%ecx,2), %eax 282; X86-NEXT: # kill: def $ax killed $ax killed $eax 283; X86-NEXT: retl 284; 285; X64-LABEL: test_bitreverse_i16: 286; X64: # %bb.0: 287; X64-NEXT: # kill: def $edi killed $edi def $rdi 288; X64-NEXT: rolw $8, %di 289; X64-NEXT: movl %edi, %eax 290; X64-NEXT: andl $3855, %eax # imm = 0xF0F 291; X64-NEXT: shll $4, %eax 292; X64-NEXT: andl $61680, %edi # imm = 0xF0F0 293; X64-NEXT: shrl $4, %edi 294; X64-NEXT: orl %eax, %edi 295; X64-NEXT: movl %edi, %eax 296; X64-NEXT: andl $13107, %eax # imm = 0x3333 297; X64-NEXT: andl $52428, %edi # imm = 0xCCCC 298; X64-NEXT: shrl $2, %edi 299; X64-NEXT: leal (%rdi,%rax,4), %eax 300; X64-NEXT: movl %eax, %ecx 301; X64-NEXT: andl $21845, %ecx # imm = 0x5555 302; X64-NEXT: andl $43690, %eax # imm = 0xAAAA 303; X64-NEXT: shrl %eax 304; X64-NEXT: leal (%rax,%rcx,2), %eax 305; X64-NEXT: # kill: def $ax killed $ax killed $eax 306; X64-NEXT: retq 307 %b = call i16 @llvm.bitreverse.i16(i16 %a) 308 ret i16 %b 309} 310 311declare i8 @llvm.bitreverse.i8(i8) readnone 312 313define i8 @test_bitreverse_i8(i8 %a) { 314; X86-LABEL: test_bitreverse_i8: 315; X86: # %bb.0: 316; X86-NEXT: movb {{[0-9]+}}(%esp), %al 317; X86-NEXT: rolb $4, %al 318; X86-NEXT: movl %eax, %ecx 319; X86-NEXT: andb $51, %cl 320; X86-NEXT: shlb $2, %cl 321; X86-NEXT: andb $-52, %al 322; X86-NEXT: shrb $2, %al 323; X86-NEXT: orb %cl, %al 324; X86-NEXT: movl %eax, %ecx 325; X86-NEXT: andb $85, %cl 326; X86-NEXT: addb %cl, %cl 327; X86-NEXT: andb $-86, %al 328; X86-NEXT: shrb %al 329; X86-NEXT: orb %cl, %al 330; X86-NEXT: retl 331; 332; X64-LABEL: test_bitreverse_i8: 333; X64: # %bb.0: 334; X64-NEXT: # kill: def $edi killed $edi def $rdi 335; X64-NEXT: rolb $4, %dil 336; X64-NEXT: movl %edi, %eax 337; X64-NEXT: andb $51, %al 338; X64-NEXT: shlb $2, %al 339; X64-NEXT: andb $-52, %dil 340; X64-NEXT: shrb $2, %dil 341; X64-NEXT: orb %al, %dil 342; X64-NEXT: movl %edi, %eax 343; X64-NEXT: andb $85, %al 344; X64-NEXT: addb %al, %al 345; X64-NEXT: andb $-86, %dil 346; X64-NEXT: shrb %dil 347; X64-NEXT: addl %edi, %eax 348; X64-NEXT: # kill: def $al killed $al killed $eax 349; X64-NEXT: retq 350 %b = call i8 @llvm.bitreverse.i8(i8 %a) 351 ret i8 %b 352} 353 354declare i4 @llvm.bitreverse.i4(i4) readnone 355 356define i4 @test_bitreverse_i4(i4 %a) { 357; X86-LABEL: test_bitreverse_i4: 358; X86: # %bb.0: 359; X86-NEXT: movb {{[0-9]+}}(%esp), %al 360; X86-NEXT: rolb $4, %al 361; X86-NEXT: movl %eax, %ecx 362; X86-NEXT: andb $51, %cl 363; X86-NEXT: shlb $2, %cl 364; X86-NEXT: andb $-52, %al 365; X86-NEXT: shrb $2, %al 366; X86-NEXT: orb %cl, %al 367; X86-NEXT: movl %eax, %ecx 368; X86-NEXT: andb $80, %cl 369; X86-NEXT: addb %cl, %cl 370; X86-NEXT: andb $-96, %al 371; X86-NEXT: shrb %al 372; X86-NEXT: orb %cl, %al 373; X86-NEXT: shrb $4, %al 374; X86-NEXT: retl 375; 376; X64-LABEL: test_bitreverse_i4: 377; X64: # %bb.0: 378; X64-NEXT: # kill: def $edi killed $edi def $rdi 379; X64-NEXT: rolb $4, %dil 380; X64-NEXT: movl %edi, %eax 381; X64-NEXT: andb $51, %al 382; X64-NEXT: shlb $2, %al 383; X64-NEXT: andb $-52, %dil 384; X64-NEXT: shrb $2, %dil 385; X64-NEXT: orb %al, %dil 386; X64-NEXT: movl %edi, %eax 387; X64-NEXT: andb $80, %al 388; X64-NEXT: addb %al, %al 389; X64-NEXT: andb $-96, %dil 390; X64-NEXT: shrb %dil 391; X64-NEXT: addl %edi, %eax 392; X64-NEXT: shrb $4, %al 393; X64-NEXT: # kill: def $al killed $al killed $eax 394; X64-NEXT: retq 395 %b = call i4 @llvm.bitreverse.i4(i4 %a) 396 ret i4 %b 397} 398 399; These tests check that bitreverse(constant) calls are folded 400 401define <2 x i16> @fold_v2i16() { 402; X86-LABEL: fold_v2i16: 403; X86: # %bb.0: 404; X86-NEXT: movw $-4096, %ax # imm = 0xF000 405; X86-NEXT: movw $240, %dx 406; X86-NEXT: retl 407; 408; X64-LABEL: fold_v2i16: 409; X64: # %bb.0: 410; X64-NEXT: movaps {{.*#+}} xmm0 = <61440,240,u,u,u,u,u,u> 411; X64-NEXT: retq 412 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> <i16 15, i16 3840>) 413 ret <2 x i16> %b 414} 415 416define i24 @fold_i24() { 417; X86-LABEL: fold_i24: 418; X86: # %bb.0: 419; X86-NEXT: movl $2048, %eax # imm = 0x800 420; X86-NEXT: retl 421; 422; X64-LABEL: fold_i24: 423; X64: # %bb.0: 424; X64-NEXT: movl $2048, %eax # imm = 0x800 425; X64-NEXT: retq 426 %b = call i24 @llvm.bitreverse.i24(i24 4096) 427 ret i24 %b 428} 429 430define i8 @fold_i8() { 431; X86-LABEL: fold_i8: 432; X86: # %bb.0: 433; X86-NEXT: movb $-16, %al 434; X86-NEXT: retl 435; 436; X64-LABEL: fold_i8: 437; X64: # %bb.0: 438; X64-NEXT: movb $-16, %al 439; X64-NEXT: retq 440 %b = call i8 @llvm.bitreverse.i8(i8 15) 441 ret i8 %b 442} 443 444define i4 @fold_i4() { 445; X86-LABEL: fold_i4: 446; X86: # %bb.0: 447; X86-NEXT: movb $1, %al 448; X86-NEXT: retl 449; 450; X64-LABEL: fold_i4: 451; X64: # %bb.0: 452; X64-NEXT: movb $1, %al 453; X64-NEXT: retq 454 %b = call i4 @llvm.bitreverse.i4(i4 8) 455 ret i4 %b 456} 457 458; These tests check that bitreverse(bitreverse()) calls are removed 459 460define i8 @identity_i8(i8 %a) { 461; X86-LABEL: identity_i8: 462; X86: # %bb.0: 463; X86-NEXT: movb {{[0-9]+}}(%esp), %al 464; X86-NEXT: retl 465; 466; X64-LABEL: identity_i8: 467; X64: # %bb.0: 468; X64-NEXT: movl %edi, %eax 469; X64-NEXT: # kill: def $al killed $al killed $eax 470; X64-NEXT: retq 471 %b = call i8 @llvm.bitreverse.i8(i8 %a) 472 %c = call i8 @llvm.bitreverse.i8(i8 %b) 473 ret i8 %c 474} 475 476define <2 x i16> @identity_v2i16(<2 x i16> %a) { 477; X86-LABEL: identity_v2i16: 478; X86: # %bb.0: 479; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 480; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx 481; X86-NEXT: retl 482; 483; X64-LABEL: identity_v2i16: 484; X64: # %bb.0: 485; X64-NEXT: retq 486 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) 487 %c = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %b) 488 ret <2 x i16> %c 489} 490 491; These tests check that bitreverse(undef) calls are removed 492 493define i8 @undef_i8() { 494; X86-LABEL: undef_i8: 495; X86: # %bb.0: 496; X86-NEXT: retl 497; 498; X64-LABEL: undef_i8: 499; X64: # %bb.0: 500; X64-NEXT: retq 501 %b = call i8 @llvm.bitreverse.i8(i8 undef) 502 ret i8 %b 503} 504 505define <2 x i16> @undef_v2i16() { 506; X86-LABEL: undef_v2i16: 507; X86: # %bb.0: 508; X86-NEXT: retl 509; 510; X64-LABEL: undef_v2i16: 511; X64: # %bb.0: 512; X64-NEXT: retq 513 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef) 514 ret <2 x i16> %b 515} 516 517; Make sure we don't assert during type legalization promoting a large 518; bitreverse due to the need for a large shift that won't fit in the i8 returned 519; from getShiftAmountTy. 520define i528 @large_promotion(i528 %A) nounwind { 521; X86-LABEL: large_promotion: 522; X86: # %bb.0: 523; X86-NEXT: pushl %ebp 524; X86-NEXT: pushl %ebx 525; X86-NEXT: pushl %edi 526; X86-NEXT: pushl %esi 527; X86-NEXT: subl $56, %esp 528; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 529; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 530; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 531; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 532; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 533; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 534; X86-NEXT: bswapl %ebx 535; X86-NEXT: movl %ebx, %ebp 536; X86-NEXT: andl $252645135, %ebp # imm = 0xF0F0F0F 537; X86-NEXT: shll $4, %ebp 538; X86-NEXT: andl $-252645136, %ebx # imm = 0xF0F0F0F0 539; X86-NEXT: shrl $4, %ebx 540; X86-NEXT: orl %ebp, %ebx 541; X86-NEXT: movl %ebx, %ebp 542; X86-NEXT: andl $858993459, %ebp # imm = 0x33333333 543; X86-NEXT: andl $-858993460, %ebx # imm = 0xCCCCCCCC 544; X86-NEXT: shrl $2, %ebx 545; X86-NEXT: leal (%ebx,%ebp,4), %ebx 546; X86-NEXT: movl %ebx, %ebp 547; X86-NEXT: andl $1431633920, %ebp # imm = 0x55550000 548; X86-NEXT: andl $-1431699456, %ebx # imm = 0xAAAA0000 549; X86-NEXT: shrl %ebx 550; X86-NEXT: leal (%ebx,%ebp,2), %ebx 551; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill 552; X86-NEXT: bswapl %edi 553; X86-NEXT: movl %edi, %ebx 554; X86-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F 555; X86-NEXT: shll $4, %ebx 556; X86-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 557; X86-NEXT: shrl $4, %edi 558; X86-NEXT: orl %ebx, %edi 559; X86-NEXT: movl %edi, %ebx 560; X86-NEXT: andl $858993459, %ebx # imm = 0x33333333 561; X86-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC 562; X86-NEXT: shrl $2, %edi 563; X86-NEXT: leal (%edi,%ebx,4), %edi 564; X86-NEXT: movl %edi, %ebx 565; X86-NEXT: andl $1431655765, %ebx # imm = 0x55555555 566; X86-NEXT: andl $-1431655766, %edi # imm = 0xAAAAAAAA 567; X86-NEXT: shrl %edi 568; X86-NEXT: leal (%edi,%ebx,2), %edi 569; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 570; X86-NEXT: bswapl %esi 571; X86-NEXT: movl %esi, %edi 572; X86-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F 573; X86-NEXT: shll $4, %edi 574; X86-NEXT: andl $-252645136, %esi # imm = 0xF0F0F0F0 575; X86-NEXT: shrl $4, %esi 576; X86-NEXT: orl %edi, %esi 577; X86-NEXT: movl %esi, %edi 578; X86-NEXT: andl $858993459, %edi # imm = 0x33333333 579; X86-NEXT: andl $-858993460, %esi # imm = 0xCCCCCCCC 580; X86-NEXT: shrl $2, %esi 581; X86-NEXT: leal (%esi,%edi,4), %esi 582; X86-NEXT: movl %esi, %edi 583; X86-NEXT: andl $1431655765, %edi # imm = 0x55555555 584; X86-NEXT: andl $-1431655766, %esi # imm = 0xAAAAAAAA 585; X86-NEXT: shrl %esi 586; X86-NEXT: leal (%esi,%edi,2), %ebx 587; X86-NEXT: bswapl %edx 588; X86-NEXT: movl %edx, %esi 589; X86-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F 590; X86-NEXT: shll $4, %esi 591; X86-NEXT: andl $-252645136, %edx # imm = 0xF0F0F0F0 592; X86-NEXT: shrl $4, %edx 593; X86-NEXT: orl %esi, %edx 594; X86-NEXT: movl %edx, %esi 595; X86-NEXT: andl $858993459, %esi # imm = 0x33333333 596; X86-NEXT: andl $-858993460, %edx # imm = 0xCCCCCCCC 597; X86-NEXT: shrl $2, %edx 598; X86-NEXT: leal (%edx,%esi,4), %edx 599; X86-NEXT: movl %edx, %esi 600; X86-NEXT: andl $1431655765, %esi # imm = 0x55555555 601; X86-NEXT: andl $-1431655766, %edx # imm = 0xAAAAAAAA 602; X86-NEXT: shrl %edx 603; X86-NEXT: leal (%edx,%esi,2), %edx 604; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 605; X86-NEXT: bswapl %ecx 606; X86-NEXT: movl %ecx, %edx 607; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 608; X86-NEXT: shll $4, %edx 609; X86-NEXT: andl $-252645136, %ecx # imm = 0xF0F0F0F0 610; X86-NEXT: shrl $4, %ecx 611; X86-NEXT: orl %edx, %ecx 612; X86-NEXT: movl %ecx, %edx 613; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 614; X86-NEXT: andl $-858993460, %ecx # imm = 0xCCCCCCCC 615; X86-NEXT: shrl $2, %ecx 616; X86-NEXT: leal (%ecx,%edx,4), %ecx 617; X86-NEXT: movl %ecx, %edx 618; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 619; X86-NEXT: andl $-1431655766, %ecx # imm = 0xAAAAAAAA 620; X86-NEXT: shrl %ecx 621; X86-NEXT: leal (%ecx,%edx,2), %ecx 622; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 623; X86-NEXT: bswapl %eax 624; X86-NEXT: movl %eax, %ecx 625; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 626; X86-NEXT: shll $4, %ecx 627; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 628; X86-NEXT: shrl $4, %eax 629; X86-NEXT: orl %ecx, %eax 630; X86-NEXT: movl %eax, %ecx 631; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 632; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 633; X86-NEXT: shrl $2, %eax 634; X86-NEXT: leal (%eax,%ecx,4), %eax 635; X86-NEXT: movl %eax, %ecx 636; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 637; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 638; X86-NEXT: shrl %eax 639; X86-NEXT: leal (%eax,%ecx,2), %eax 640; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 641; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 642; X86-NEXT: bswapl %eax 643; X86-NEXT: movl %eax, %ecx 644; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 645; X86-NEXT: shll $4, %ecx 646; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 647; X86-NEXT: shrl $4, %eax 648; X86-NEXT: orl %ecx, %eax 649; X86-NEXT: movl %eax, %ecx 650; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 651; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 652; X86-NEXT: shrl $2, %eax 653; X86-NEXT: leal (%eax,%ecx,4), %eax 654; X86-NEXT: movl %eax, %ecx 655; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 656; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 657; X86-NEXT: shrl %eax 658; X86-NEXT: leal (%eax,%ecx,2), %eax 659; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 660; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 661; X86-NEXT: bswapl %eax 662; X86-NEXT: movl %eax, %ecx 663; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 664; X86-NEXT: shll $4, %ecx 665; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 666; X86-NEXT: shrl $4, %eax 667; X86-NEXT: orl %ecx, %eax 668; X86-NEXT: movl %eax, %ecx 669; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 670; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 671; X86-NEXT: shrl $2, %eax 672; X86-NEXT: leal (%eax,%ecx,4), %eax 673; X86-NEXT: movl %eax, %ecx 674; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 675; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 676; X86-NEXT: shrl %eax 677; X86-NEXT: leal (%eax,%ecx,2), %eax 678; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 679; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 680; X86-NEXT: bswapl %eax 681; X86-NEXT: movl %eax, %ecx 682; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 683; X86-NEXT: shll $4, %ecx 684; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 685; X86-NEXT: shrl $4, %eax 686; X86-NEXT: orl %ecx, %eax 687; X86-NEXT: movl %eax, %ecx 688; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 689; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 690; X86-NEXT: shrl $2, %eax 691; X86-NEXT: leal (%eax,%ecx,4), %eax 692; X86-NEXT: movl %eax, %ecx 693; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 694; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 695; X86-NEXT: shrl %eax 696; X86-NEXT: leal (%eax,%ecx,2), %eax 697; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 698; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 699; X86-NEXT: bswapl %eax 700; X86-NEXT: movl %eax, %ecx 701; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 702; X86-NEXT: shll $4, %ecx 703; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 704; X86-NEXT: shrl $4, %eax 705; X86-NEXT: orl %ecx, %eax 706; X86-NEXT: movl %eax, %ecx 707; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 708; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 709; X86-NEXT: shrl $2, %eax 710; X86-NEXT: leal (%eax,%ecx,4), %eax 711; X86-NEXT: movl %eax, %ecx 712; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 713; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 714; X86-NEXT: shrl %eax 715; X86-NEXT: leal (%eax,%ecx,2), %eax 716; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 717; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 718; X86-NEXT: bswapl %eax 719; X86-NEXT: movl %eax, %ecx 720; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 721; X86-NEXT: shll $4, %ecx 722; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 723; X86-NEXT: shrl $4, %eax 724; X86-NEXT: orl %ecx, %eax 725; X86-NEXT: movl %eax, %ecx 726; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 727; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 728; X86-NEXT: shrl $2, %eax 729; X86-NEXT: leal (%eax,%ecx,4), %eax 730; X86-NEXT: movl %eax, %ecx 731; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 732; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 733; X86-NEXT: shrl %eax 734; X86-NEXT: leal (%eax,%ecx,2), %eax 735; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 736; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 737; X86-NEXT: bswapl %eax 738; X86-NEXT: movl %eax, %ecx 739; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 740; X86-NEXT: shll $4, %ecx 741; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 742; X86-NEXT: shrl $4, %eax 743; X86-NEXT: orl %ecx, %eax 744; X86-NEXT: movl %eax, %ecx 745; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 746; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 747; X86-NEXT: shrl $2, %eax 748; X86-NEXT: leal (%eax,%ecx,4), %eax 749; X86-NEXT: movl %eax, %ecx 750; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 751; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 752; X86-NEXT: shrl %eax 753; X86-NEXT: leal (%eax,%ecx,2), %eax 754; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 755; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 756; X86-NEXT: bswapl %eax 757; X86-NEXT: movl %eax, %ecx 758; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 759; X86-NEXT: shll $4, %ecx 760; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 761; X86-NEXT: shrl $4, %eax 762; X86-NEXT: orl %ecx, %eax 763; X86-NEXT: movl %eax, %ecx 764; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 765; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 766; X86-NEXT: shrl $2, %eax 767; X86-NEXT: leal (%eax,%ecx,4), %eax 768; X86-NEXT: movl %eax, %ecx 769; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 770; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 771; X86-NEXT: shrl %eax 772; X86-NEXT: leal (%eax,%ecx,2), %eax 773; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 774; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 775; X86-NEXT: bswapl %eax 776; X86-NEXT: movl %eax, %ecx 777; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 778; X86-NEXT: shll $4, %ecx 779; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 780; X86-NEXT: shrl $4, %eax 781; X86-NEXT: orl %ecx, %eax 782; X86-NEXT: movl %eax, %ecx 783; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 784; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 785; X86-NEXT: shrl $2, %eax 786; X86-NEXT: leal (%eax,%ecx,4), %eax 787; X86-NEXT: movl %eax, %ecx 788; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 789; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 790; X86-NEXT: shrl %eax 791; X86-NEXT: leal (%eax,%ecx,2), %eax 792; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 793; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 794; X86-NEXT: bswapl %eax 795; X86-NEXT: movl %eax, %ecx 796; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 797; X86-NEXT: shll $4, %ecx 798; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 799; X86-NEXT: shrl $4, %eax 800; X86-NEXT: orl %ecx, %eax 801; X86-NEXT: movl %eax, %ecx 802; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 803; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 804; X86-NEXT: shrl $2, %eax 805; X86-NEXT: leal (%eax,%ecx,4), %eax 806; X86-NEXT: movl %eax, %ecx 807; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 808; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 809; X86-NEXT: shrl %eax 810; X86-NEXT: leal (%eax,%ecx,2), %eax 811; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 812; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 813; X86-NEXT: bswapl %eax 814; X86-NEXT: movl %eax, %ecx 815; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 816; X86-NEXT: shll $4, %ecx 817; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 818; X86-NEXT: shrl $4, %eax 819; X86-NEXT: orl %ecx, %eax 820; X86-NEXT: movl %eax, %ecx 821; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 822; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 823; X86-NEXT: shrl $2, %eax 824; X86-NEXT: leal (%eax,%ecx,4), %eax 825; X86-NEXT: movl %eax, %ecx 826; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 827; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 828; X86-NEXT: shrl %eax 829; X86-NEXT: leal (%eax,%ecx,2), %edi 830; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 831; X86-NEXT: bswapl %eax 832; X86-NEXT: movl %eax, %ecx 833; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 834; X86-NEXT: shll $4, %ecx 835; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 836; X86-NEXT: shrl $4, %eax 837; X86-NEXT: orl %ecx, %eax 838; X86-NEXT: movl %eax, %ecx 839; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 840; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC 841; X86-NEXT: shrl $2, %eax 842; X86-NEXT: leal (%eax,%ecx,4), %eax 843; X86-NEXT: movl %eax, %ecx 844; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 845; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA 846; X86-NEXT: shrl %eax 847; X86-NEXT: leal (%eax,%ecx,2), %edx 848; X86-NEXT: movl (%esp), %esi # 4-byte Reload 849; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 850; X86-NEXT: shrdl $16, %eax, %esi 851; X86-NEXT: shrdl $16, %ebx, %eax 852; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 853; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 854; X86-NEXT: shrdl $16, %ecx, %ebx 855; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill 856; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 857; X86-NEXT: shrdl $16, %eax, %ecx 858; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 859; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 860; X86-NEXT: shrdl $16, %ecx, %eax 861; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 862; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 863; X86-NEXT: shrdl $16, %eax, %ecx 864; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 865; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 866; X86-NEXT: shrdl $16, %ecx, %eax 867; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 868; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 869; X86-NEXT: shrdl $16, %eax, %ecx 870; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 871; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 872; X86-NEXT: shrdl $16, %ecx, %eax 873; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 874; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 875; X86-NEXT: shrdl $16, %eax, %ecx 876; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 877; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 878; X86-NEXT: shrdl $16, %ecx, %eax 879; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 880; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 881; X86-NEXT: shrdl $16, %ebp, %ecx 882; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 883; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload 884; X86-NEXT: shrdl $16, %ebx, %ebp 885; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 886; X86-NEXT: shrdl $16, %eax, %ebx 887; X86-NEXT: shrdl $16, %edi, %eax 888; X86-NEXT: movl %eax, %ecx 889; X86-NEXT: shrdl $16, %edx, %edi 890; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 891; X86-NEXT: movl %edi, 60(%eax) 892; X86-NEXT: movl %ecx, 56(%eax) 893; X86-NEXT: movl %ebx, 52(%eax) 894; X86-NEXT: movl %ebp, 48(%eax) 895; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 896; X86-NEXT: movl %ecx, 44(%eax) 897; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 898; X86-NEXT: movl %ecx, 40(%eax) 899; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 900; X86-NEXT: movl %ecx, 36(%eax) 901; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 902; X86-NEXT: movl %ecx, 32(%eax) 903; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 904; X86-NEXT: movl %ecx, 28(%eax) 905; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 906; X86-NEXT: movl %ecx, 24(%eax) 907; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 908; X86-NEXT: movl %ecx, 20(%eax) 909; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 910; X86-NEXT: movl %ecx, 16(%eax) 911; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 912; X86-NEXT: movl %ecx, 12(%eax) 913; X86-NEXT: movl (%esp), %ecx # 4-byte Reload 914; X86-NEXT: movl %ecx, 8(%eax) 915; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 916; X86-NEXT: movl %ecx, 4(%eax) 917; X86-NEXT: movl %esi, (%eax) 918; X86-NEXT: shrl $16, %edx 919; X86-NEXT: movw %dx, 64(%eax) 920; X86-NEXT: addl $56, %esp 921; X86-NEXT: popl %esi 922; X86-NEXT: popl %edi 923; X86-NEXT: popl %ebx 924; X86-NEXT: popl %ebp 925; X86-NEXT: retl $4 926; 927; X64-LABEL: large_promotion: 928; X64: # %bb.0: 929; X64-NEXT: pushq %rbp 930; X64-NEXT: pushq %r15 931; X64-NEXT: pushq %r14 932; X64-NEXT: pushq %r13 933; X64-NEXT: pushq %r12 934; X64-NEXT: pushq %rbx 935; X64-NEXT: movq %rdi, %r12 936; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp 937; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx 938; X64-NEXT: bswapq %rbx 939; X64-NEXT: movabsq $1085102592571150095, %r13 # imm = 0xF0F0F0F0F0F0F0F 940; X64-NEXT: movq %rbx, %r10 941; X64-NEXT: andq %r13, %r10 942; X64-NEXT: shlq $4, %r10 943; X64-NEXT: movabsq $-1085102592571150096, %rax # imm = 0xF0F0F0F0F0F0F0F0 944; X64-NEXT: andq %rax, %rbx 945; X64-NEXT: shrq $4, %rbx 946; X64-NEXT: orq %r10, %rbx 947; X64-NEXT: movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333 948; X64-NEXT: movq %rbx, %r10 949; X64-NEXT: andq %r11, %r10 950; X64-NEXT: movabsq $-3689348814741910324, %r14 # imm = 0xCCCCCCCCCCCCCCCC 951; X64-NEXT: andq %r14, %rbx 952; X64-NEXT: shrq $2, %rbx 953; X64-NEXT: leaq (%rbx,%r10,4), %r10 954; X64-NEXT: movabsq $6148820866244280320, %rbx # imm = 0x5555000000000000 955; X64-NEXT: andq %r10, %rbx 956; X64-NEXT: movabsq $-6149102341220990976, %rdi # imm = 0xAAAA000000000000 957; X64-NEXT: andq %r10, %rdi 958; X64-NEXT: shrq %rdi 959; X64-NEXT: leaq (%rdi,%rbx,2), %rdi 960; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 961; X64-NEXT: bswapq %rbp 962; X64-NEXT: movq %rbp, %rdi 963; X64-NEXT: andq %r13, %rdi 964; X64-NEXT: shlq $4, %rdi 965; X64-NEXT: andq %rax, %rbp 966; X64-NEXT: shrq $4, %rbp 967; X64-NEXT: orq %rdi, %rbp 968; X64-NEXT: movq %rbp, %rdi 969; X64-NEXT: andq %r11, %rdi 970; X64-NEXT: andq %r14, %rbp 971; X64-NEXT: shrq $2, %rbp 972; X64-NEXT: leaq (%rbp,%rdi,4), %rbp 973; X64-NEXT: movabsq $6148914691236517205, %rbx # imm = 0x5555555555555555 974; X64-NEXT: movq %rbp, %r10 975; X64-NEXT: andq %rbx, %r10 976; X64-NEXT: movabsq $-6148914691236517206, %rdi # imm = 0xAAAAAAAAAAAAAAAA 977; X64-NEXT: andq %rdi, %rbp 978; X64-NEXT: shrq %rbp 979; X64-NEXT: leaq (%rbp,%r10,2), %rbp 980; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 981; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp 982; X64-NEXT: bswapq %rbp 983; X64-NEXT: movq %rbp, %r10 984; X64-NEXT: andq %r13, %r10 985; X64-NEXT: shlq $4, %r10 986; X64-NEXT: andq %rax, %rbp 987; X64-NEXT: movq %rax, %r15 988; X64-NEXT: shrq $4, %rbp 989; X64-NEXT: orq %r10, %rbp 990; X64-NEXT: movq %rbp, %r10 991; X64-NEXT: andq %r11, %r10 992; X64-NEXT: andq %r14, %rbp 993; X64-NEXT: shrq $2, %rbp 994; X64-NEXT: leaq (%rbp,%r10,4), %rbp 995; X64-NEXT: movq %rbp, %r10 996; X64-NEXT: andq %rbx, %r10 997; X64-NEXT: andq %rdi, %rbp 998; X64-NEXT: shrq %rbp 999; X64-NEXT: leaq (%rbp,%r10,2), %rbp 1000; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 1001; X64-NEXT: bswapq %r10 1002; X64-NEXT: movq %r10, %rax 1003; X64-NEXT: andq %r13, %rax 1004; X64-NEXT: shlq $4, %rax 1005; X64-NEXT: movq %r15, %rdi 1006; X64-NEXT: andq %r15, %r10 1007; X64-NEXT: shrq $4, %r10 1008; X64-NEXT: orq %rax, %r10 1009; X64-NEXT: movq %r10, %rax 1010; X64-NEXT: andq %r11, %rax 1011; X64-NEXT: andq %r14, %r10 1012; X64-NEXT: shrq $2, %r10 1013; X64-NEXT: leaq (%r10,%rax,4), %rax 1014; X64-NEXT: movq %rax, %r10 1015; X64-NEXT: andq %rbx, %r10 1016; X64-NEXT: movabsq $-6148914691236517206, %r15 # imm = 0xAAAAAAAAAAAAAAAA 1017; X64-NEXT: andq %r15, %rax 1018; X64-NEXT: shrq %rax 1019; X64-NEXT: leaq (%rax,%r10,2), %r10 1020; X64-NEXT: bswapq %r9 1021; X64-NEXT: movq %r9, %rax 1022; X64-NEXT: andq %r13, %rax 1023; X64-NEXT: shlq $4, %rax 1024; X64-NEXT: andq %rdi, %r9 1025; X64-NEXT: shrq $4, %r9 1026; X64-NEXT: orq %rax, %r9 1027; X64-NEXT: movq %r9, %rax 1028; X64-NEXT: andq %r11, %rax 1029; X64-NEXT: andq %r14, %r9 1030; X64-NEXT: shrq $2, %r9 1031; X64-NEXT: leaq (%r9,%rax,4), %rax 1032; X64-NEXT: movq %rax, %r9 1033; X64-NEXT: andq %rbx, %r9 1034; X64-NEXT: andq %r15, %rax 1035; X64-NEXT: shrq %rax 1036; X64-NEXT: leaq (%rax,%r9,2), %r9 1037; X64-NEXT: bswapq %r8 1038; X64-NEXT: movq %r8, %rax 1039; X64-NEXT: andq %r13, %rax 1040; X64-NEXT: shlq $4, %rax 1041; X64-NEXT: andq %rdi, %r8 1042; X64-NEXT: shrq $4, %r8 1043; X64-NEXT: orq %rax, %r8 1044; X64-NEXT: movq %r8, %rax 1045; X64-NEXT: andq %r11, %rax 1046; X64-NEXT: andq %r14, %r8 1047; X64-NEXT: shrq $2, %r8 1048; X64-NEXT: leaq (%r8,%rax,4), %rax 1049; X64-NEXT: movq %rax, %r8 1050; X64-NEXT: andq %rbx, %r8 1051; X64-NEXT: andq %r15, %rax 1052; X64-NEXT: shrq %rax 1053; X64-NEXT: leaq (%rax,%r8,2), %r8 1054; X64-NEXT: bswapq %rcx 1055; X64-NEXT: movq %rcx, %rax 1056; X64-NEXT: andq %r13, %rax 1057; X64-NEXT: shlq $4, %rax 1058; X64-NEXT: andq %rdi, %rcx 1059; X64-NEXT: shrq $4, %rcx 1060; X64-NEXT: orq %rax, %rcx 1061; X64-NEXT: movq %rcx, %rax 1062; X64-NEXT: andq %r11, %rax 1063; X64-NEXT: andq %r14, %rcx 1064; X64-NEXT: shrq $2, %rcx 1065; X64-NEXT: leaq (%rcx,%rax,4), %rax 1066; X64-NEXT: movq %rax, %rcx 1067; X64-NEXT: andq %rbx, %rcx 1068; X64-NEXT: andq %r15, %rax 1069; X64-NEXT: shrq %rax 1070; X64-NEXT: leaq (%rax,%rcx,2), %rcx 1071; X64-NEXT: bswapq %rdx 1072; X64-NEXT: movq %rdx, %rax 1073; X64-NEXT: andq %r13, %rax 1074; X64-NEXT: shlq $4, %rax 1075; X64-NEXT: andq %rdi, %rdx 1076; X64-NEXT: shrq $4, %rdx 1077; X64-NEXT: orq %rax, %rdx 1078; X64-NEXT: movq %rdx, %rax 1079; X64-NEXT: andq %r11, %rax 1080; X64-NEXT: andq %r14, %rdx 1081; X64-NEXT: shrq $2, %rdx 1082; X64-NEXT: leaq (%rdx,%rax,4), %rax 1083; X64-NEXT: movq %rax, %rdx 1084; X64-NEXT: andq %rbx, %rdx 1085; X64-NEXT: andq %r15, %rax 1086; X64-NEXT: shrq %rax 1087; X64-NEXT: leaq (%rax,%rdx,2), %rax 1088; X64-NEXT: bswapq %rsi 1089; X64-NEXT: andq %rsi, %r13 1090; X64-NEXT: andq %rdi, %rsi 1091; X64-NEXT: shlq $4, %r13 1092; X64-NEXT: shrq $4, %rsi 1093; X64-NEXT: orq %r13, %rsi 1094; X64-NEXT: andq %rsi, %r11 1095; X64-NEXT: andq %r14, %rsi 1096; X64-NEXT: shrq $2, %rsi 1097; X64-NEXT: leaq (%rsi,%r11,4), %rdx 1098; X64-NEXT: andq %rdx, %rbx 1099; X64-NEXT: andq %r15, %rdx 1100; X64-NEXT: shrq %rdx 1101; X64-NEXT: leaq (%rdx,%rbx,2), %rdx 1102; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload 1103; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload 1104; X64-NEXT: shrdq $48, %rdi, %rsi 1105; X64-NEXT: shrdq $48, %rbp, %rdi 1106; X64-NEXT: shrdq $48, %r10, %rbp 1107; X64-NEXT: shrdq $48, %r9, %r10 1108; X64-NEXT: shrdq $48, %r8, %r9 1109; X64-NEXT: shrdq $48, %rcx, %r8 1110; X64-NEXT: shrdq $48, %rax, %rcx 1111; X64-NEXT: shrdq $48, %rdx, %rax 1112; X64-NEXT: movq %rax, 56(%r12) 1113; X64-NEXT: movq %rcx, 48(%r12) 1114; X64-NEXT: movq %r8, 40(%r12) 1115; X64-NEXT: movq %r9, 32(%r12) 1116; X64-NEXT: movq %r10, 24(%r12) 1117; X64-NEXT: movq %rbp, 16(%r12) 1118; X64-NEXT: movq %rdi, 8(%r12) 1119; X64-NEXT: movq %rsi, (%r12) 1120; X64-NEXT: shrq $48, %rdx 1121; X64-NEXT: movw %dx, 64(%r12) 1122; X64-NEXT: movq %r12, %rax 1123; X64-NEXT: popq %rbx 1124; X64-NEXT: popq %r12 1125; X64-NEXT: popq %r13 1126; X64-NEXT: popq %r14 1127; X64-NEXT: popq %r15 1128; X64-NEXT: popq %rbp 1129; X64-NEXT: retq 1130 %Z = call i528 @llvm.bitreverse.i528(i528 %A) 1131 ret i528 %Z 1132} 1133declare i528 @llvm.bitreverse.i528(i528) 1134