1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,X86-SSE2 3; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,X64-AVX2 4 5declare i8 @llvm.fshl.i8(i8, i8, i8) 6declare i16 @llvm.fshl.i16(i16, i16, i16) 7declare i32 @llvm.fshl.i32(i32, i32, i32) 8declare i64 @llvm.fshl.i64(i64, i64, i64) 9declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 10declare i128 @llvm.fshl.i128(i128, i128, i128) 11 12declare i8 @llvm.fshr.i8(i8, i8, i8) 13declare i16 @llvm.fshr.i16(i16, i16, i16) 14declare i32 @llvm.fshr.i32(i32, i32, i32) 15declare i64 @llvm.fshr.i64(i64, i64, i64) 16declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 17 18; General case - all operands can be variables 19 20define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind { 21; X86-SSE2-LABEL: fshl_i32: 22; X86-SSE2: # %bb.0: 23; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 24; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 25; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 26; X86-SSE2-NEXT: shldl %cl, %edx, %eax 27; X86-SSE2-NEXT: retl 28; 29; X64-AVX2-LABEL: fshl_i32: 30; X64-AVX2: # %bb.0: 31; X64-AVX2-NEXT: movl %edx, %ecx 32; X64-AVX2-NEXT: movl %edi, %eax 33; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 34; X64-AVX2-NEXT: shldl %cl, %esi, %eax 35; X64-AVX2-NEXT: retq 36 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 37 ret i32 %f 38} 39 40define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) nounwind { 41; X86-SSE2-LABEL: fshl_i64: 42; X86-SSE2: # %bb.0: 43; X86-SSE2-NEXT: pushl %edi 44; X86-SSE2-NEXT: pushl %esi 45; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 46; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 47; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 48; X86-SSE2-NEXT: testb $32, %cl 49; X86-SSE2-NEXT: movl %edx, %edi 50; X86-SSE2-NEXT: cmovnel %esi, %edi 51; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edx 52; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %esi 53; X86-SSE2-NEXT: movl %edi, %eax 54; X86-SSE2-NEXT: shldl %cl, %esi, %eax 55; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx 56; X86-SSE2-NEXT: shldl %cl, %edi, %edx 57; X86-SSE2-NEXT: popl %esi 58; X86-SSE2-NEXT: popl %edi 59; X86-SSE2-NEXT: retl 60; 61; X64-AVX2-LABEL: fshl_i64: 62; X64-AVX2: # %bb.0: 63; X64-AVX2-NEXT: movq %rdx, %rcx 64; X64-AVX2-NEXT: movq %rdi, %rax 65; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx 66; X64-AVX2-NEXT: shldq %cl, %rsi, %rax 67; X64-AVX2-NEXT: retq 68 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) 69 ret i64 %f 70} 71 72define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { 73; X86-SSE2-LABEL: fshl_i128: 74; X86-SSE2: # %bb.0: 75; X86-SSE2-NEXT: pushl %ebp 76; X86-SSE2-NEXT: pushl %ebx 77; X86-SSE2-NEXT: pushl %edi 78; X86-SSE2-NEXT: pushl %esi 79; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx 80; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi 81; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 82; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 83; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 84; X86-SSE2-NEXT: testb $64, %cl 85; X86-SSE2-NEXT: movl %esi, %eax 86; X86-SSE2-NEXT: cmovnel %ebx, %eax 87; X86-SSE2-NEXT: movl %edx, %ebp 88; X86-SSE2-NEXT: cmovnel %edi, %ebp 89; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %edi 90; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %ebx 91; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edx 92; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi 93; X86-SSE2-NEXT: testb $32, %cl 94; X86-SSE2-NEXT: cmovnel %esi, %edx 95; X86-SSE2-NEXT: cmovnel %ebp, %esi 96; X86-SSE2-NEXT: cmovnel %eax, %ebp 97; X86-SSE2-NEXT: cmovel %edi, %ebx 98; X86-SSE2-NEXT: cmovel %eax, %edi 99; X86-SSE2-NEXT: movl %edi, %eax 100; X86-SSE2-NEXT: shldl %cl, %ebx, %eax 101; X86-SSE2-NEXT: movl %ebp, %ebx 102; X86-SSE2-NEXT: shldl %cl, %edi, %ebx 103; X86-SSE2-NEXT: movl %esi, %edi 104; X86-SSE2-NEXT: shldl %cl, %ebp, %edi 105; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx 106; X86-SSE2-NEXT: shldl %cl, %esi, %edx 107; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 108; X86-SSE2-NEXT: movl %edx, 12(%ecx) 109; X86-SSE2-NEXT: movl %edi, 8(%ecx) 110; X86-SSE2-NEXT: movl %ebx, 4(%ecx) 111; X86-SSE2-NEXT: movl %eax, (%ecx) 112; X86-SSE2-NEXT: movl %ecx, %eax 113; X86-SSE2-NEXT: popl %esi 114; X86-SSE2-NEXT: popl %edi 115; X86-SSE2-NEXT: popl %ebx 116; X86-SSE2-NEXT: popl %ebp 117; X86-SSE2-NEXT: retl $4 118; 119; X64-AVX2-LABEL: fshl_i128: 120; X64-AVX2: # %bb.0: 121; X64-AVX2-NEXT: testb $64, %r8b 122; X64-AVX2-NEXT: cmovneq %rdi, %rsi 123; X64-AVX2-NEXT: cmoveq %rcx, %rdx 124; X64-AVX2-NEXT: cmovneq %rcx, %rdi 125; X64-AVX2-NEXT: movq %rdi, %rax 126; X64-AVX2-NEXT: movl %r8d, %ecx 127; X64-AVX2-NEXT: shldq %cl, %rdx, %rax 128; X64-AVX2-NEXT: shldq %cl, %rdi, %rsi 129; X64-AVX2-NEXT: movq %rsi, %rdx 130; X64-AVX2-NEXT: retq 131 %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) 132 ret i128 %f 133} 134 135; Verify that weird types are minimally supported. 136declare i37 @llvm.fshl.i37(i37, i37, i37) 137define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind { 138; X86-SSE2-LABEL: fshl_i37: 139; X86-SSE2: # %bb.0: 140; X86-SSE2-NEXT: pushl %ebx 141; X86-SSE2-NEXT: pushl %edi 142; X86-SSE2-NEXT: pushl %esi 143; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 144; X86-SSE2-NEXT: andl $31, %eax 145; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 146; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx 147; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi 148; X86-SSE2-NEXT: shldl $27, %ebx, %edi 149; X86-SSE2-NEXT: pushl $0 150; X86-SSE2-NEXT: pushl $37 151; X86-SSE2-NEXT: pushl %eax 152; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp) 153; X86-SSE2-NEXT: calll __umoddi3 154; X86-SSE2-NEXT: addl $16, %esp 155; X86-SSE2-NEXT: movl %eax, %ecx 156; X86-SSE2-NEXT: testb $32, %cl 157; X86-SSE2-NEXT: jne .LBB3_1 158; X86-SSE2-NEXT: # %bb.2: 159; X86-SSE2-NEXT: movl %edi, %ebx 160; X86-SSE2-NEXT: movl %esi, %edi 161; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 162; X86-SSE2-NEXT: jmp .LBB3_3 163; X86-SSE2-NEXT: .LBB3_1: 164; X86-SSE2-NEXT: shll $27, %ebx 165; X86-SSE2-NEXT: .LBB3_3: 166; X86-SSE2-NEXT: movl %edi, %eax 167; X86-SSE2-NEXT: shldl %cl, %ebx, %eax 168; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx 169; X86-SSE2-NEXT: shldl %cl, %edi, %esi 170; X86-SSE2-NEXT: movl %esi, %edx 171; X86-SSE2-NEXT: popl %esi 172; X86-SSE2-NEXT: popl %edi 173; X86-SSE2-NEXT: popl %ebx 174; X86-SSE2-NEXT: retl 175; 176; X64-AVX2-LABEL: fshl_i37: 177; X64-AVX2: # %bb.0: 178; X64-AVX2-NEXT: movq %rdx, %rcx 179; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF 180; X64-AVX2-NEXT: andq %rdx, %rax 181; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B 182; X64-AVX2-NEXT: mulq %rdx 183; X64-AVX2-NEXT: shrq $5, %rdx 184; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax 185; X64-AVX2-NEXT: leal (%rdx,%rax,4), %eax 186; X64-AVX2-NEXT: subl %eax, %ecx 187; X64-AVX2-NEXT: shlq $27, %rsi 188; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx 189; X64-AVX2-NEXT: shldq %cl, %rsi, %rdi 190; X64-AVX2-NEXT: movq %rdi, %rax 191; X64-AVX2-NEXT: retq 192 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) 193 ret i37 %f 194} 195 196; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 197 198declare i7 @llvm.fshl.i7(i7, i7, i7) 199define i7 @fshl_i7_const_fold() { 200; CHECK-LABEL: fshl_i7_const_fold: 201; CHECK: # %bb.0: 202; CHECK-NEXT: movb $67, %al 203; CHECK-NEXT: ret{{[l|q]}} 204 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) 205 ret i7 %f 206} 207 208; With constant shift amount, this is 'shld' with constant operand. 209 210define i32 @fshl_i32_const_shift(i32 %x, i32 %y) nounwind { 211; X86-SSE2-LABEL: fshl_i32_const_shift: 212; X86-SSE2: # %bb.0: 213; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 214; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 215; X86-SSE2-NEXT: shldl $9, %ecx, %eax 216; X86-SSE2-NEXT: retl 217; 218; X64-AVX2-LABEL: fshl_i32_const_shift: 219; X64-AVX2: # %bb.0: 220; X64-AVX2-NEXT: movl %edi, %eax 221; X64-AVX2-NEXT: shldl $9, %esi, %eax 222; X64-AVX2-NEXT: retq 223 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 224 ret i32 %f 225} 226 227; Check modulo math on shift amount. 228 229define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) nounwind { 230; X86-SSE2-LABEL: fshl_i32_const_overshift: 231; X86-SSE2: # %bb.0: 232; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 233; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 234; X86-SSE2-NEXT: shldl $9, %ecx, %eax 235; X86-SSE2-NEXT: retl 236; 237; X64-AVX2-LABEL: fshl_i32_const_overshift: 238; X64-AVX2: # %bb.0: 239; X64-AVX2-NEXT: movl %edi, %eax 240; X64-AVX2-NEXT: shldl $9, %esi, %eax 241; X64-AVX2-NEXT: retq 242 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) 243 ret i32 %f 244} 245 246; 64-bit should also work. 247 248define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) nounwind { 249; X86-SSE2-LABEL: fshl_i64_const_overshift: 250; X86-SSE2: # %bb.0: 251; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 252; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 253; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 254; X86-SSE2-NEXT: shldl $9, %ecx, %edx 255; X86-SSE2-NEXT: shrdl $23, %ecx, %eax 256; X86-SSE2-NEXT: retl 257; 258; X64-AVX2-LABEL: fshl_i64_const_overshift: 259; X64-AVX2: # %bb.0: 260; X64-AVX2-NEXT: movq %rdi, %rax 261; X64-AVX2-NEXT: shldq $41, %rsi, %rax 262; X64-AVX2-NEXT: retq 263 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) 264 ret i64 %f 265} 266 267; This should work without any node-specific logic. 268 269define i8 @fshl_i8_const_fold() nounwind { 270; CHECK-LABEL: fshl_i8_const_fold: 271; CHECK: # %bb.0: 272; CHECK-NEXT: movb $-128, %al 273; CHECK-NEXT: ret{{[l|q]}} 274 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) 275 ret i8 %f 276} 277 278; Repeat everything for funnel shift right. 279 280; General case - all operands can be variables 281 282define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) nounwind { 283; X86-SSE2-LABEL: fshr_i32: 284; X86-SSE2: # %bb.0: 285; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 286; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 287; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 288; X86-SSE2-NEXT: shrdl %cl, %edx, %eax 289; X86-SSE2-NEXT: retl 290; 291; X64-AVX2-LABEL: fshr_i32: 292; X64-AVX2: # %bb.0: 293; X64-AVX2-NEXT: movl %edx, %ecx 294; X64-AVX2-NEXT: movl %esi, %eax 295; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 296; X64-AVX2-NEXT: shrdl %cl, %edi, %eax 297; X64-AVX2-NEXT: retq 298 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 299 ret i32 %f 300} 301 302; Verify that weird types are minimally supported. 303declare i37 @llvm.fshr.i37(i37, i37, i37) 304define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind { 305; X86-SSE2-LABEL: fshr_i37: 306; X86-SSE2: # %bb.0: 307; X86-SSE2-NEXT: pushl %ebx 308; X86-SSE2-NEXT: pushl %edi 309; X86-SSE2-NEXT: pushl %esi 310; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 311; X86-SSE2-NEXT: andl $31, %eax 312; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi 313; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx 314; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 315; X86-SSE2-NEXT: shldl $27, %ebx, %esi 316; X86-SSE2-NEXT: pushl $0 317; X86-SSE2-NEXT: pushl $37 318; X86-SSE2-NEXT: pushl %eax 319; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp) 320; X86-SSE2-NEXT: calll __umoddi3 321; X86-SSE2-NEXT: addl $16, %esp 322; X86-SSE2-NEXT: movl %eax, %ecx 323; X86-SSE2-NEXT: addl $27, %ecx 324; X86-SSE2-NEXT: testb $32, %cl 325; X86-SSE2-NEXT: je .LBB10_1 326; X86-SSE2-NEXT: # %bb.2: 327; X86-SSE2-NEXT: movl %edi, %edx 328; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi 329; X86-SSE2-NEXT: jmp .LBB10_3 330; X86-SSE2-NEXT: .LBB10_1: 331; X86-SSE2-NEXT: shll $27, %ebx 332; X86-SSE2-NEXT: movl %esi, %edx 333; X86-SSE2-NEXT: movl %ebx, %esi 334; X86-SSE2-NEXT: .LBB10_3: 335; X86-SSE2-NEXT: shrdl %cl, %edx, %esi 336; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx 337; X86-SSE2-NEXT: shrdl %cl, %edi, %edx 338; X86-SSE2-NEXT: movl %esi, %eax 339; X86-SSE2-NEXT: popl %esi 340; X86-SSE2-NEXT: popl %edi 341; X86-SSE2-NEXT: popl %ebx 342; X86-SSE2-NEXT: retl 343; 344; X64-AVX2-LABEL: fshr_i37: 345; X64-AVX2: # %bb.0: 346; X64-AVX2-NEXT: movq %rdx, %rcx 347; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF 348; X64-AVX2-NEXT: andq %rdx, %rax 349; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B 350; X64-AVX2-NEXT: mulq %rdx 351; X64-AVX2-NEXT: shrq $5, %rdx 352; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax 353; X64-AVX2-NEXT: leal (%rdx,%rax,4), %eax 354; X64-AVX2-NEXT: subl %eax, %ecx 355; X64-AVX2-NEXT: addl $27, %ecx 356; X64-AVX2-NEXT: shlq $27, %rsi 357; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx 358; X64-AVX2-NEXT: shrdq %cl, %rdi, %rsi 359; X64-AVX2-NEXT: movq %rsi, %rax 360; X64-AVX2-NEXT: retq 361 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) 362 ret i37 %f 363} 364 365; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 366 367declare i7 @llvm.fshr.i7(i7, i7, i7) 368define i7 @fshr_i7_const_fold() nounwind { 369; CHECK-LABEL: fshr_i7_const_fold: 370; CHECK: # %bb.0: 371; CHECK-NEXT: movb $31, %al 372; CHECK-NEXT: ret{{[l|q]}} 373 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) 374 ret i7 %f 375} 376 377; demanded bits tests 378 379define i32 @fshl_i32_demandedbits(i32 %a0, i32 %a1) nounwind { 380; X86-SSE2-LABEL: fshl_i32_demandedbits: 381; X86-SSE2: # %bb.0: 382; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 383; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 384; X86-SSE2-NEXT: shldl $9, %ecx, %eax 385; X86-SSE2-NEXT: retl 386; 387; X64-AVX2-LABEL: fshl_i32_demandedbits: 388; X64-AVX2: # %bb.0: 389; X64-AVX2-NEXT: movl %edi, %eax 390; X64-AVX2-NEXT: shldl $9, %esi, %eax 391; X64-AVX2-NEXT: retq 392 %x = or i32 %a0, 2147483648 393 %y = or i32 %a1, 1 394 %res = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 395 ret i32 %res 396} 397 398define i32 @fshr_i32_demandedbits(i32 %a0, i32 %a1) nounwind { 399; X86-SSE2-LABEL: fshr_i32_demandedbits: 400; X86-SSE2: # %bb.0: 401; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 402; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 403; X86-SSE2-NEXT: shrdl $9, %ecx, %eax 404; X86-SSE2-NEXT: retl 405; 406; X64-AVX2-LABEL: fshr_i32_demandedbits: 407; X64-AVX2: # %bb.0: 408; X64-AVX2-NEXT: movl %edi, %eax 409; X64-AVX2-NEXT: shldl $23, %esi, %eax 410; X64-AVX2-NEXT: retq 411 %x = or i32 %a0, 2147483648 412 %y = or i32 %a1, 1 413 %res = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 414 ret i32 %res 415} 416 417; undef handling 418 419define i32 @fshl_i32_undef0(i32 %a0, i32 %a1) nounwind { 420; X86-SSE2-LABEL: fshl_i32_undef0: 421; X86-SSE2: # %bb.0: 422; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 423; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 424; X86-SSE2-NEXT: shldl %cl, %eax, %eax 425; X86-SSE2-NEXT: retl 426; 427; X64-AVX2-LABEL: fshl_i32_undef0: 428; X64-AVX2: # %bb.0: 429; X64-AVX2-NEXT: movl %esi, %ecx 430; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 431; X64-AVX2-NEXT: shldl %cl, %edi, %eax 432; X64-AVX2-NEXT: retq 433 %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %a1) 434 ret i32 %res 435} 436 437define i32 @fshl_i32_undef0_msk(i32 %a0, i32 %a1) nounwind { 438; X86-SSE2-LABEL: fshl_i32_undef0_msk: 439; X86-SSE2: # %bb.0: 440; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 441; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 442; X86-SSE2-NEXT: andl $7, %ecx 443; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx 444; X86-SSE2-NEXT: shldl %cl, %eax, %eax 445; X86-SSE2-NEXT: retl 446; 447; X64-AVX2-LABEL: fshl_i32_undef0_msk: 448; X64-AVX2: # %bb.0: 449; X64-AVX2-NEXT: movl %esi, %ecx 450; X64-AVX2-NEXT: andl $7, %ecx 451; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 452; X64-AVX2-NEXT: shldl %cl, %edi, %eax 453; X64-AVX2-NEXT: retq 454 %m = and i32 %a1, 7 455 %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %m) 456 ret i32 %res 457} 458 459define i32 @fshl_i32_undef0_cst(i32 %a0) nounwind { 460; X86-SSE2-LABEL: fshl_i32_undef0_cst: 461; X86-SSE2: # %bb.0: 462; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 463; X86-SSE2-NEXT: shrl $23, %eax 464; X86-SSE2-NEXT: retl 465; 466; X64-AVX2-LABEL: fshl_i32_undef0_cst: 467; X64-AVX2: # %bb.0: 468; X64-AVX2-NEXT: movl %edi, %eax 469; X64-AVX2-NEXT: shrl $23, %eax 470; X64-AVX2-NEXT: retq 471 %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9) 472 ret i32 %res 473} 474 475define i32 @fshl_i32_undef1(i32 %a0, i32 %a1) nounwind { 476; X86-SSE2-LABEL: fshl_i32_undef1: 477; X86-SSE2: # %bb.0: 478; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 479; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 480; X86-SSE2-NEXT: shldl %cl, %eax, %eax 481; X86-SSE2-NEXT: retl 482; 483; X64-AVX2-LABEL: fshl_i32_undef1: 484; X64-AVX2: # %bb.0: 485; X64-AVX2-NEXT: movl %esi, %ecx 486; X64-AVX2-NEXT: movl %edi, %eax 487; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 488; X64-AVX2-NEXT: shldl %cl, %eax, %eax 489; X64-AVX2-NEXT: retq 490 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %a1) 491 ret i32 %res 492} 493 494define i32 @fshl_i32_undef1_msk(i32 %a0, i32 %a1) nounwind { 495; X86-SSE2-LABEL: fshl_i32_undef1_msk: 496; X86-SSE2: # %bb.0: 497; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 498; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 499; X86-SSE2-NEXT: andb $7, %cl 500; X86-SSE2-NEXT: shll %cl, %eax 501; X86-SSE2-NEXT: retl 502; 503; X64-AVX2-LABEL: fshl_i32_undef1_msk: 504; X64-AVX2: # %bb.0: 505; X64-AVX2-NEXT: movl %esi, %ecx 506; X64-AVX2-NEXT: movl %edi, %eax 507; X64-AVX2-NEXT: andb $7, %cl 508; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 509; X64-AVX2-NEXT: shll %cl, %eax 510; X64-AVX2-NEXT: retq 511 %m = and i32 %a1, 7 512 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %m) 513 ret i32 %res 514} 515 516define i32 @fshl_i32_undef1_cst(i32 %a0) nounwind { 517; X86-SSE2-LABEL: fshl_i32_undef1_cst: 518; X86-SSE2: # %bb.0: 519; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 520; X86-SSE2-NEXT: shll $9, %eax 521; X86-SSE2-NEXT: retl 522; 523; X64-AVX2-LABEL: fshl_i32_undef1_cst: 524; X64-AVX2: # %bb.0: 525; X64-AVX2-NEXT: movl %edi, %eax 526; X64-AVX2-NEXT: shll $9, %eax 527; X64-AVX2-NEXT: retq 528 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9) 529 ret i32 %res 530} 531 532define i32 @fshl_i32_undef2(i32 %a0, i32 %a1) nounwind { 533; X86-SSE2-LABEL: fshl_i32_undef2: 534; X86-SSE2: # %bb.0: 535; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 536; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 537; X86-SSE2-NEXT: shldl %cl, %ecx, %eax 538; X86-SSE2-NEXT: retl 539; 540; X64-AVX2-LABEL: fshl_i32_undef2: 541; X64-AVX2: # %bb.0: 542; X64-AVX2-NEXT: movl %edi, %eax 543; X64-AVX2-NEXT: shldl %cl, %esi, %eax 544; X64-AVX2-NEXT: retq 545 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 undef) 546 ret i32 %res 547} 548 549define i32 @fshr_i32_undef0(i32 %a0, i32 %a1) nounwind { 550; X86-SSE2-LABEL: fshr_i32_undef0: 551; X86-SSE2: # %bb.0: 552; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 553; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 554; X86-SSE2-NEXT: shrdl %cl, %eax, %eax 555; X86-SSE2-NEXT: retl 556; 557; X64-AVX2-LABEL: fshr_i32_undef0: 558; X64-AVX2: # %bb.0: 559; X64-AVX2-NEXT: movl %esi, %ecx 560; X64-AVX2-NEXT: movl %edi, %eax 561; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 562; X64-AVX2-NEXT: shrdl %cl, %eax, %eax 563; X64-AVX2-NEXT: retq 564 %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %a1) 565 ret i32 %res 566} 567 568define i32 @fshr_i32_undef0_msk(i32 %a0, i32 %a1) nounwind { 569; X86-SSE2-LABEL: fshr_i32_undef0_msk: 570; X86-SSE2: # %bb.0: 571; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 572; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 573; X86-SSE2-NEXT: andb $7, %cl 574; X86-SSE2-NEXT: shrl %cl, %eax 575; X86-SSE2-NEXT: retl 576; 577; X64-AVX2-LABEL: fshr_i32_undef0_msk: 578; X64-AVX2: # %bb.0: 579; X64-AVX2-NEXT: movl %esi, %ecx 580; X64-AVX2-NEXT: movl %edi, %eax 581; X64-AVX2-NEXT: andb $7, %cl 582; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 583; X64-AVX2-NEXT: shrl %cl, %eax 584; X64-AVX2-NEXT: retq 585 %m = and i32 %a1, 7 586 %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %m) 587 ret i32 %res 588} 589 590define i32 @fshr_i32_undef0_cst(i32 %a0) nounwind { 591; X86-SSE2-LABEL: fshr_i32_undef0_cst: 592; X86-SSE2: # %bb.0: 593; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 594; X86-SSE2-NEXT: shrl $9, %eax 595; X86-SSE2-NEXT: retl 596; 597; X64-AVX2-LABEL: fshr_i32_undef0_cst: 598; X64-AVX2: # %bb.0: 599; X64-AVX2-NEXT: movl %edi, %eax 600; X64-AVX2-NEXT: shrl $9, %eax 601; X64-AVX2-NEXT: retq 602 %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9) 603 ret i32 %res 604} 605 606define i32 @fshr_i32_undef1(i32 %a0, i32 %a1) nounwind { 607; X86-SSE2-LABEL: fshr_i32_undef1: 608; X86-SSE2: # %bb.0: 609; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 610; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 611; X86-SSE2-NEXT: shrdl %cl, %eax, %eax 612; X86-SSE2-NEXT: retl 613; 614; X64-AVX2-LABEL: fshr_i32_undef1: 615; X64-AVX2: # %bb.0: 616; X64-AVX2-NEXT: movl %esi, %ecx 617; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 618; X64-AVX2-NEXT: shrdl %cl, %edi, %eax 619; X64-AVX2-NEXT: retq 620 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %a1) 621 ret i32 %res 622} 623 624define i32 @fshr_i32_undef1_msk(i32 %a0, i32 %a1) nounwind { 625; X86-SSE2-LABEL: fshr_i32_undef1_msk: 626; X86-SSE2: # %bb.0: 627; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 628; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 629; X86-SSE2-NEXT: andl $7, %ecx 630; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx 631; X86-SSE2-NEXT: shrdl %cl, %eax, %eax 632; X86-SSE2-NEXT: retl 633; 634; X64-AVX2-LABEL: fshr_i32_undef1_msk: 635; X64-AVX2: # %bb.0: 636; X64-AVX2-NEXT: movl %esi, %ecx 637; X64-AVX2-NEXT: andl $7, %ecx 638; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 639; X64-AVX2-NEXT: shrdl %cl, %edi, %eax 640; X64-AVX2-NEXT: retq 641 %m = and i32 %a1, 7 642 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %m) 643 ret i32 %res 644} 645 646define i32 @fshr_i32_undef1_cst(i32 %a0) nounwind { 647; X86-SSE2-LABEL: fshr_i32_undef1_cst: 648; X86-SSE2: # %bb.0: 649; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 650; X86-SSE2-NEXT: shll $23, %eax 651; X86-SSE2-NEXT: retl 652; 653; X64-AVX2-LABEL: fshr_i32_undef1_cst: 654; X64-AVX2: # %bb.0: 655; X64-AVX2-NEXT: movl %edi, %eax 656; X64-AVX2-NEXT: shll $23, %eax 657; X64-AVX2-NEXT: retq 658 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9) 659 ret i32 %res 660} 661 662define i32 @fshr_i32_undef2(i32 %a0, i32 %a1) nounwind { 663; X86-SSE2-LABEL: fshr_i32_undef2: 664; X86-SSE2: # %bb.0: 665; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 666; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 667; X86-SSE2-NEXT: shrdl %cl, %ecx, %eax 668; X86-SSE2-NEXT: retl 669; 670; X64-AVX2-LABEL: fshr_i32_undef2: 671; X64-AVX2: # %bb.0: 672; X64-AVX2-NEXT: movl %esi, %eax 673; X64-AVX2-NEXT: shrdl %cl, %edi, %eax 674; X64-AVX2-NEXT: retq 675 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 undef) 676 ret i32 %res 677} 678 679; shift zero args 680 681define i32 @fshl_i32_zero0(i32 %a0, i32 %a1) nounwind { 682; X86-SSE2-LABEL: fshl_i32_zero0: 683; X86-SSE2: # %bb.0: 684; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 685; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 686; X86-SSE2-NEXT: xorl %eax, %eax 687; X86-SSE2-NEXT: shldl %cl, %edx, %eax 688; X86-SSE2-NEXT: retl 689; 690; X64-AVX2-LABEL: fshl_i32_zero0: 691; X64-AVX2: # %bb.0: 692; X64-AVX2-NEXT: movl %esi, %ecx 693; X64-AVX2-NEXT: xorl %eax, %eax 694; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 695; X64-AVX2-NEXT: shldl %cl, %edi, %eax 696; X64-AVX2-NEXT: retq 697 %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 %a1) 698 ret i32 %res 699} 700 701define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind { 702; X86-SSE2-LABEL: fshl_i32_zero0_cst: 703; X86-SSE2: # %bb.0: 704; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 705; X86-SSE2-NEXT: shrl $23, %eax 706; X86-SSE2-NEXT: retl 707; 708; X64-AVX2-LABEL: fshl_i32_zero0_cst: 709; X64-AVX2: # %bb.0: 710; X64-AVX2-NEXT: movl %edi, %eax 711; X64-AVX2-NEXT: shrl $23, %eax 712; X64-AVX2-NEXT: retq 713 %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 9) 714 ret i32 %res 715} 716 717define i32 @fshl_i32_zero1(i32 %a0, i32 %a1) nounwind { 718; X86-SSE2-LABEL: fshl_i32_zero1: 719; X86-SSE2: # %bb.0: 720; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 721; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 722; X86-SSE2-NEXT: xorl %edx, %edx 723; X86-SSE2-NEXT: shldl %cl, %edx, %eax 724; X86-SSE2-NEXT: retl 725; 726; X64-AVX2-LABEL: fshl_i32_zero1: 727; X64-AVX2: # %bb.0: 728; X64-AVX2-NEXT: movl %esi, %ecx 729; X64-AVX2-NEXT: movl %edi, %eax 730; X64-AVX2-NEXT: xorl %edx, %edx 731; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 732; X64-AVX2-NEXT: shldl %cl, %edx, %eax 733; X64-AVX2-NEXT: retq 734 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 %a1) 735 ret i32 %res 736} 737 738define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind { 739; X86-SSE2-LABEL: fshl_i32_zero1_cst: 740; X86-SSE2: # %bb.0: 741; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 742; X86-SSE2-NEXT: shll $9, %eax 743; X86-SSE2-NEXT: retl 744; 745; X64-AVX2-LABEL: fshl_i32_zero1_cst: 746; X64-AVX2: # %bb.0: 747; X64-AVX2-NEXT: movl %edi, %eax 748; X64-AVX2-NEXT: shll $9, %eax 749; X64-AVX2-NEXT: retq 750 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 9) 751 ret i32 %res 752} 753 754define i32 @fshr_i32_zero0(i32 %a0, i32 %a1) nounwind { 755; X86-SSE2-LABEL: fshr_i32_zero0: 756; X86-SSE2: # %bb.0: 757; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 758; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 759; X86-SSE2-NEXT: xorl %edx, %edx 760; X86-SSE2-NEXT: shrdl %cl, %edx, %eax 761; X86-SSE2-NEXT: retl 762; 763; X64-AVX2-LABEL: fshr_i32_zero0: 764; X64-AVX2: # %bb.0: 765; X64-AVX2-NEXT: movl %esi, %ecx 766; X64-AVX2-NEXT: movl %edi, %eax 767; X64-AVX2-NEXT: xorl %edx, %edx 768; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 769; X64-AVX2-NEXT: shrdl %cl, %edx, %eax 770; X64-AVX2-NEXT: retq 771 %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 %a1) 772 ret i32 %res 773} 774 775define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind { 776; X86-SSE2-LABEL: fshr_i32_zero0_cst: 777; X86-SSE2: # %bb.0: 778; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 779; X86-SSE2-NEXT: shrl $9, %eax 780; X86-SSE2-NEXT: retl 781; 782; X64-AVX2-LABEL: fshr_i32_zero0_cst: 783; X64-AVX2: # %bb.0: 784; X64-AVX2-NEXT: movl %edi, %eax 785; X64-AVX2-NEXT: shrl $9, %eax 786; X64-AVX2-NEXT: retq 787 %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 9) 788 ret i32 %res 789} 790 791define i32 @fshr_i32_zero1(i32 %a0, i32 %a1) nounwind { 792; X86-SSE2-LABEL: fshr_i32_zero1: 793; X86-SSE2: # %bb.0: 794; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 795; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 796; X86-SSE2-NEXT: xorl %eax, %eax 797; X86-SSE2-NEXT: shrdl %cl, %edx, %eax 798; X86-SSE2-NEXT: retl 799; 800; X64-AVX2-LABEL: fshr_i32_zero1: 801; X64-AVX2: # %bb.0: 802; X64-AVX2-NEXT: movl %esi, %ecx 803; X64-AVX2-NEXT: xorl %eax, %eax 804; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 805; X64-AVX2-NEXT: shrdl %cl, %edi, %eax 806; X64-AVX2-NEXT: retq 807 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 %a1) 808 ret i32 %res 809} 810 811define i32 @fshr_i32_zero1_cst(i32 %a0) nounwind { 812; X86-SSE2-LABEL: fshr_i32_zero1_cst: 813; X86-SSE2: # %bb.0: 814; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 815; X86-SSE2-NEXT: shll $23, %eax 816; X86-SSE2-NEXT: retl 817; 818; X64-AVX2-LABEL: fshr_i32_zero1_cst: 819; X64-AVX2: # %bb.0: 820; X64-AVX2-NEXT: movl %edi, %eax 821; X64-AVX2-NEXT: shll $23, %eax 822; X64-AVX2-NEXT: retq 823 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 9) 824 ret i32 %res 825} 826 827; shift by zero 828 829define i32 @fshl_i32_zero2(i32 %a0, i32 %a1) nounwind { 830; X86-SSE2-LABEL: fshl_i32_zero2: 831; X86-SSE2: # %bb.0: 832; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 833; X86-SSE2-NEXT: retl 834; 835; X64-AVX2-LABEL: fshl_i32_zero2: 836; X64-AVX2: # %bb.0: 837; X64-AVX2-NEXT: movl %edi, %eax 838; X64-AVX2-NEXT: retq 839 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 0) 840 ret i32 %res 841} 842 843define i32 @fshr_i32_zero2(i32 %a0, i32 %a1) nounwind { 844; X86-SSE2-LABEL: fshr_i32_zero2: 845; X86-SSE2: # %bb.0: 846; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 847; X86-SSE2-NEXT: retl 848; 849; X64-AVX2-LABEL: fshr_i32_zero2: 850; X64-AVX2: # %bb.0: 851; X64-AVX2-NEXT: movl %esi, %eax 852; X64-AVX2-NEXT: retq 853 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 0) 854 ret i32 %res 855} 856 857; With constant shift amount, this is 'shrd' or 'shld'. 858 859define i32 @fshr_i32_const_shift(i32 %x, i32 %y) nounwind { 860; X86-SSE2-LABEL: fshr_i32_const_shift: 861; X86-SSE2: # %bb.0: 862; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 863; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 864; X86-SSE2-NEXT: shrdl $9, %ecx, %eax 865; X86-SSE2-NEXT: retl 866; 867; X64-AVX2-LABEL: fshr_i32_const_shift: 868; X64-AVX2: # %bb.0: 869; X64-AVX2-NEXT: movl %edi, %eax 870; X64-AVX2-NEXT: shldl $23, %esi, %eax 871; X64-AVX2-NEXT: retq 872 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 873 ret i32 %f 874} 875 876; Check modulo math on shift amount. 41-32=9, but right-shift may became left, so 32-9=23. 877 878define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) nounwind { 879; X86-SSE2-LABEL: fshr_i32_const_overshift: 880; X86-SSE2: # %bb.0: 881; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 882; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 883; X86-SSE2-NEXT: shrdl $9, %ecx, %eax 884; X86-SSE2-NEXT: retl 885; 886; X64-AVX2-LABEL: fshr_i32_const_overshift: 887; X64-AVX2: # %bb.0: 888; X64-AVX2-NEXT: movl %edi, %eax 889; X64-AVX2-NEXT: shldl $23, %esi, %eax 890; X64-AVX2-NEXT: retq 891 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) 892 ret i32 %f 893} 894 895; 64-bit should also work. 105-64 = 41, but right-shift became left, so 64-41=23. 896 897define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) nounwind { 898; X86-SSE2-LABEL: fshr_i64_const_overshift: 899; X86-SSE2: # %bb.0: 900; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 901; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 902; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 903; X86-SSE2-NEXT: shrdl $9, %ecx, %eax 904; X86-SSE2-NEXT: shldl $23, %ecx, %edx 905; X86-SSE2-NEXT: retl 906; 907; X64-AVX2-LABEL: fshr_i64_const_overshift: 908; X64-AVX2: # %bb.0: 909; X64-AVX2-NEXT: movq %rdi, %rax 910; X64-AVX2-NEXT: shldq $23, %rsi, %rax 911; X64-AVX2-NEXT: retq 912 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) 913 ret i64 %f 914} 915 916; This should work without any node-specific logic. 917 918define i8 @fshr_i8_const_fold() nounwind { 919; CHECK-LABEL: fshr_i8_const_fold: 920; CHECK: # %bb.0: 921; CHECK-NEXT: movb $-2, %al 922; CHECK-NEXT: ret{{[l|q]}} 923 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) 924 ret i8 %f 925} 926 927define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind { 928; X86-SSE2-LABEL: fshl_i32_shift_by_bitwidth: 929; X86-SSE2: # %bb.0: 930; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 931; X86-SSE2-NEXT: retl 932; 933; X64-AVX2-LABEL: fshl_i32_shift_by_bitwidth: 934; X64-AVX2: # %bb.0: 935; X64-AVX2-NEXT: movl %edi, %eax 936; X64-AVX2-NEXT: retq 937 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) 938 ret i32 %f 939} 940 941define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind { 942; X86-SSE2-LABEL: fshr_i32_shift_by_bitwidth: 943; X86-SSE2: # %bb.0: 944; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 945; X86-SSE2-NEXT: retl 946; 947; X64-AVX2-LABEL: fshr_i32_shift_by_bitwidth: 948; X64-AVX2: # %bb.0: 949; X64-AVX2-NEXT: movl %esi, %eax 950; X64-AVX2-NEXT: retq 951 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) 952 ret i32 %f 953} 954 955define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind { 956; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: 957; CHECK: # %bb.0: 958; CHECK-NEXT: ret{{[l|q]}} 959 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 960 ret <4 x i32> %f 961} 962 963define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind { 964; X86-SSE2-LABEL: fshr_v4i32_shift_by_bitwidth: 965; X86-SSE2: # %bb.0: 966; X86-SSE2-NEXT: movaps %xmm1, %xmm0 967; X86-SSE2-NEXT: retl 968; 969; X64-AVX2-LABEL: fshr_v4i32_shift_by_bitwidth: 970; X64-AVX2: # %bb.0: 971; X64-AVX2-NEXT: vmovaps %xmm1, %xmm0 972; X64-AVX2-NEXT: retq 973 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 974 ret <4 x i32> %f 975} 976 977%struct.S = type { [11 x i8], i8 } 978define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind { 979; X86-SSE2-LABEL: PR45265: 980; X86-SSE2: # %bb.0: 981; X86-SSE2-NEXT: pushl %edi 982; X86-SSE2-NEXT: pushl %esi 983; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 984; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 985; X86-SSE2-NEXT: leal (%eax,%eax,2), %edx 986; X86-SSE2-NEXT: movzwl 8(%ecx,%edx,4), %esi 987; X86-SSE2-NEXT: movl 4(%ecx,%edx,4), %edi 988; X86-SSE2-NEXT: shrdl $8, %esi, %edi 989; X86-SSE2-NEXT: xorl %eax, %edi 990; X86-SSE2-NEXT: sarl $31, %eax 991; X86-SSE2-NEXT: movzbl 10(%ecx,%edx,4), %ecx 992; X86-SSE2-NEXT: shll $16, %ecx 993; X86-SSE2-NEXT: orl %esi, %ecx 994; X86-SSE2-NEXT: shll $8, %ecx 995; X86-SSE2-NEXT: movl %ecx, %edx 996; X86-SSE2-NEXT: sarl $8, %edx 997; X86-SSE2-NEXT: sarl $31, %ecx 998; X86-SSE2-NEXT: shldl $24, %edx, %ecx 999; X86-SSE2-NEXT: xorl %eax, %ecx 1000; X86-SSE2-NEXT: orl %ecx, %edi 1001; X86-SSE2-NEXT: jne .LBB46_1 1002; X86-SSE2-NEXT: # %bb.2: 1003; X86-SSE2-NEXT: popl %esi 1004; X86-SSE2-NEXT: popl %edi 1005; X86-SSE2-NEXT: jmp _Z3foov # TAILCALL 1006; X86-SSE2-NEXT: .LBB46_1: 1007; X86-SSE2-NEXT: popl %esi 1008; X86-SSE2-NEXT: popl %edi 1009; X86-SSE2-NEXT: retl 1010; 1011; X64-AVX2-LABEL: PR45265: 1012; X64-AVX2: # %bb.0: 1013; X64-AVX2-NEXT: movslq %edi, %rax 1014; X64-AVX2-NEXT: leaq (%rax,%rax,2), %rcx 1015; X64-AVX2-NEXT: movsbq 10(%rsi,%rcx,4), %rdx 1016; X64-AVX2-NEXT: shlq $16, %rdx 1017; X64-AVX2-NEXT: movzwl 8(%rsi,%rcx,4), %edi 1018; X64-AVX2-NEXT: orq %rdx, %rdi 1019; X64-AVX2-NEXT: movq (%rsi,%rcx,4), %rcx 1020; X64-AVX2-NEXT: shrdq $40, %rdi, %rcx 1021; X64-AVX2-NEXT: cmpq %rax, %rcx 1022; X64-AVX2-NEXT: jne .LBB46_1 1023; X64-AVX2-NEXT: # %bb.2: 1024; X64-AVX2-NEXT: jmp _Z3foov # TAILCALL 1025; X64-AVX2-NEXT: .LBB46_1: 1026; X64-AVX2-NEXT: retq 1027 %3 = sext i32 %0 to i64 1028 %4 = getelementptr inbounds %struct.S, %struct.S* %1, i64 %3 1029 %5 = bitcast %struct.S* %4 to i88* 1030 %6 = load i88, i88* %5, align 1 1031 %7 = ashr i88 %6, 40 1032 %8 = trunc i88 %7 to i64 1033 %9 = icmp eq i64 %8, %3 1034 br i1 %9, label %10, label %11 1035 103610: 1037 tail call void @_Z3foov() 1038 br label %11 1039 104011: 1041 ret void 1042} 1043declare dso_local void @_Z3foov() 1044 1045define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) nounwind { 1046; X86-SSE2-LABEL: or_shl_fshl: 1047; X86-SSE2: # %bb.0: 1048; X86-SSE2-NEXT: pushl %esi 1049; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1050; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1051; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1052; X86-SSE2-NEXT: movl %edx, %esi 1053; X86-SSE2-NEXT: shll %cl, %esi 1054; X86-SSE2-NEXT: shldl %cl, %edx, %eax 1055; X86-SSE2-NEXT: orl %esi, %eax 1056; X86-SSE2-NEXT: popl %esi 1057; X86-SSE2-NEXT: retl 1058; 1059; X64-AVX2-LABEL: or_shl_fshl: 1060; X64-AVX2: # %bb.0: 1061; X64-AVX2-NEXT: movl %edx, %ecx 1062; X64-AVX2-NEXT: movl %esi, %eax 1063; X64-AVX2-NEXT: shll %cl, %eax 1064; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 1065; X64-AVX2-NEXT: shldl %cl, %esi, %edi 1066; X64-AVX2-NEXT: orl %edi, %eax 1067; X64-AVX2-NEXT: retq 1068 %shy = shl i32 %y, %s 1069 %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s) 1070 %or = or i32 %fun, %shy 1071 ret i32 %or 1072} 1073 1074define i32 @or_shl_rotl(i32 %x, i32 %y, i32 %s) nounwind { 1075; X86-SSE2-LABEL: or_shl_rotl: 1076; X86-SSE2: # %bb.0: 1077; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1078; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1079; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1080; X86-SSE2-NEXT: shll %cl, %edx 1081; X86-SSE2-NEXT: roll %cl, %eax 1082; X86-SSE2-NEXT: orl %edx, %eax 1083; X86-SSE2-NEXT: retl 1084; 1085; X64-AVX2-LABEL: or_shl_rotl: 1086; X64-AVX2: # %bb.0: 1087; X64-AVX2-NEXT: movl %edx, %ecx 1088; X64-AVX2-NEXT: movl %esi, %eax 1089; X64-AVX2-NEXT: shll %cl, %edi 1090; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 1091; X64-AVX2-NEXT: roll %cl, %eax 1092; X64-AVX2-NEXT: orl %edi, %eax 1093; X64-AVX2-NEXT: retq 1094 %shx = shl i32 %x, %s 1095 %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s) 1096 %or = or i32 %rot, %shx 1097 ret i32 %or 1098} 1099 1100define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) nounwind { 1101; X86-SSE2-LABEL: or_shl_fshl_commute: 1102; X86-SSE2: # %bb.0: 1103; X86-SSE2-NEXT: pushl %esi 1104; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1105; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1106; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1107; X86-SSE2-NEXT: movl %edx, %esi 1108; X86-SSE2-NEXT: shll %cl, %esi 1109; X86-SSE2-NEXT: shldl %cl, %edx, %eax 1110; X86-SSE2-NEXT: orl %esi, %eax 1111; X86-SSE2-NEXT: popl %esi 1112; X86-SSE2-NEXT: retl 1113; 1114; X64-AVX2-LABEL: or_shl_fshl_commute: 1115; X64-AVX2: # %bb.0: 1116; X64-AVX2-NEXT: movl %edx, %ecx 1117; X64-AVX2-NEXT: movl %esi, %eax 1118; X64-AVX2-NEXT: shll %cl, %eax 1119; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 1120; X64-AVX2-NEXT: shldl %cl, %esi, %edi 1121; X64-AVX2-NEXT: orl %edi, %eax 1122; X64-AVX2-NEXT: retq 1123 %shy = shl i32 %y, %s 1124 %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s) 1125 %or = or i32 %shy, %fun 1126 ret i32 %or 1127} 1128 1129define i32 @or_shl_rotl_commute(i32 %x, i32 %y, i32 %s) nounwind { 1130; X86-SSE2-LABEL: or_shl_rotl_commute: 1131; X86-SSE2: # %bb.0: 1132; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1133; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1134; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1135; X86-SSE2-NEXT: shll %cl, %edx 1136; X86-SSE2-NEXT: roll %cl, %eax 1137; X86-SSE2-NEXT: orl %edx, %eax 1138; X86-SSE2-NEXT: retl 1139; 1140; X64-AVX2-LABEL: or_shl_rotl_commute: 1141; X64-AVX2: # %bb.0: 1142; X64-AVX2-NEXT: movl %edx, %ecx 1143; X64-AVX2-NEXT: movl %esi, %eax 1144; X64-AVX2-NEXT: shll %cl, %edi 1145; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 1146; X64-AVX2-NEXT: roll %cl, %eax 1147; X64-AVX2-NEXT: orl %edi, %eax 1148; X64-AVX2-NEXT: retq 1149 %shx = shl i32 %x, %s 1150 %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s) 1151 %or = or i32 %shx, %rot 1152 ret i32 %or 1153} 1154 1155define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) nounwind { 1156; X86-SSE2-LABEL: or_lshr_fshr: 1157; X86-SSE2: # %bb.0: 1158; X86-SSE2-NEXT: pushl %esi 1159; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1160; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1161; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1162; X86-SSE2-NEXT: movl %edx, %esi 1163; X86-SSE2-NEXT: shrl %cl, %esi 1164; X86-SSE2-NEXT: shrdl %cl, %edx, %eax 1165; X86-SSE2-NEXT: orl %esi, %eax 1166; X86-SSE2-NEXT: popl %esi 1167; X86-SSE2-NEXT: retl 1168; 1169; X64-AVX2-LABEL: or_lshr_fshr: 1170; X64-AVX2: # %bb.0: 1171; X64-AVX2-NEXT: movl %edx, %ecx 1172; X64-AVX2-NEXT: movl %esi, %eax 1173; X64-AVX2-NEXT: shrl %cl, %eax 1174; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 1175; X64-AVX2-NEXT: shrdl %cl, %esi, %edi 1176; X64-AVX2-NEXT: orl %edi, %eax 1177; X64-AVX2-NEXT: retq 1178 %shy = lshr i32 %y, %s 1179 %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s) 1180 %or = or i32 %fun, %shy 1181 ret i32 %or 1182} 1183 1184define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) nounwind { 1185; X86-SSE2-LABEL: or_lshr_rotr: 1186; X86-SSE2: # %bb.0: 1187; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1188; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1189; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1190; X86-SSE2-NEXT: shrl %cl, %edx 1191; X86-SSE2-NEXT: rorl %cl, %eax 1192; X86-SSE2-NEXT: orl %edx, %eax 1193; X86-SSE2-NEXT: retl 1194; 1195; X64-AVX2-LABEL: or_lshr_rotr: 1196; X64-AVX2: # %bb.0: 1197; X64-AVX2-NEXT: movl %edx, %ecx 1198; X64-AVX2-NEXT: movl %esi, %eax 1199; X64-AVX2-NEXT: shrl %cl, %edi 1200; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 1201; X64-AVX2-NEXT: rorl %cl, %eax 1202; X64-AVX2-NEXT: orl %edi, %eax 1203; X64-AVX2-NEXT: retq 1204 %shx = lshr i32 %x, %s 1205 %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s) 1206 %or = or i32 %rot, %shx 1207 ret i32 %or 1208} 1209 1210define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) nounwind { 1211; X86-SSE2-LABEL: or_lshr_fshr_commute: 1212; X86-SSE2: # %bb.0: 1213; X86-SSE2-NEXT: pushl %esi 1214; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1215; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1216; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1217; X86-SSE2-NEXT: movl %edx, %esi 1218; X86-SSE2-NEXT: shrl %cl, %esi 1219; X86-SSE2-NEXT: shrdl %cl, %edx, %eax 1220; X86-SSE2-NEXT: orl %esi, %eax 1221; X86-SSE2-NEXT: popl %esi 1222; X86-SSE2-NEXT: retl 1223; 1224; X64-AVX2-LABEL: or_lshr_fshr_commute: 1225; X64-AVX2: # %bb.0: 1226; X64-AVX2-NEXT: movl %edx, %ecx 1227; X64-AVX2-NEXT: movl %esi, %eax 1228; X64-AVX2-NEXT: shrl %cl, %eax 1229; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 1230; X64-AVX2-NEXT: shrdl %cl, %esi, %edi 1231; X64-AVX2-NEXT: orl %edi, %eax 1232; X64-AVX2-NEXT: retq 1233 %shy = lshr i32 %y, %s 1234 %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s) 1235 %or = or i32 %shy, %fun 1236 ret i32 %or 1237} 1238 1239define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) nounwind { 1240; X86-SSE2-LABEL: or_lshr_rotr_commute: 1241; X86-SSE2: # %bb.0: 1242; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1243; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1244; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1245; X86-SSE2-NEXT: shrl %cl, %edx 1246; X86-SSE2-NEXT: rorl %cl, %eax 1247; X86-SSE2-NEXT: orl %edx, %eax 1248; X86-SSE2-NEXT: retl 1249; 1250; X64-AVX2-LABEL: or_lshr_rotr_commute: 1251; X64-AVX2: # %bb.0: 1252; X64-AVX2-NEXT: movl %edx, %ecx 1253; X64-AVX2-NEXT: movl %esi, %eax 1254; X64-AVX2-NEXT: shrl %cl, %edi 1255; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 1256; X64-AVX2-NEXT: rorl %cl, %eax 1257; X64-AVX2-NEXT: orl %edi, %eax 1258; X64-AVX2-NEXT: retq 1259 %shx = lshr i32 %x, %s 1260 %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s) 1261 %or = or i32 %shx, %rot 1262 ret i32 %or 1263} 1264 1265define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind { 1266; X86-SSE2-LABEL: or_shl_fshl_simplify: 1267; X86-SSE2: # %bb.0: 1268; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1269; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1270; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1271; X86-SSE2-NEXT: shldl %cl, %edx, %eax 1272; X86-SSE2-NEXT: retl 1273; 1274; X64-AVX2-LABEL: or_shl_fshl_simplify: 1275; X64-AVX2: # %bb.0: 1276; X64-AVX2-NEXT: movl %edx, %ecx 1277; X64-AVX2-NEXT: movl %esi, %eax 1278; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 1279; X64-AVX2-NEXT: shldl %cl, %edi, %eax 1280; X64-AVX2-NEXT: retq 1281 %shy = shl i32 %y, %s 1282 %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s) 1283 %or = or i32 %fun, %shy 1284 ret i32 %or 1285} 1286 1287define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) nounwind { 1288; X86-SSE2-LABEL: or_lshr_fshr_simplify: 1289; X86-SSE2: # %bb.0: 1290; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1291; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1292; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1293; X86-SSE2-NEXT: shrdl %cl, %edx, %eax 1294; X86-SSE2-NEXT: retl 1295; 1296; X64-AVX2-LABEL: or_lshr_fshr_simplify: 1297; X64-AVX2: # %bb.0: 1298; X64-AVX2-NEXT: movl %edx, %ecx 1299; X64-AVX2-NEXT: movl %esi, %eax 1300; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 1301; X64-AVX2-NEXT: shrdl %cl, %edi, %eax 1302; X64-AVX2-NEXT: retq 1303 %shy = lshr i32 %y, %s 1304 %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s) 1305 %or = or i32 %shy, %fun 1306 ret i32 %or 1307} 1308