1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32 3; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64 4; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64 5 6declare i8 @llvm.fshl.i8(i8, i8, i8) 7declare i16 @llvm.fshl.i16(i16, i16, i16) 8declare i32 @llvm.fshl.i32(i32, i32, i32) 9declare i64 @llvm.fshl.i64(i64, i64, i64) 10declare i128 @llvm.fshl.i128(i128, i128, i128) 11declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 12 13declare i8 @llvm.fshr.i8(i8, i8, i8) 14declare i16 @llvm.fshr.i16(i16, i16, i16) 15declare i32 @llvm.fshr.i32(i32, i32, i32) 16declare i64 @llvm.fshr.i64(i64, i64, i64) 17declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 18 19; General case - all operands can be variables. 20 21define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { 22; CHECK32-LABEL: fshl_i32: 23; CHECK32: # %bb.0: 24; CHECK32-NEXT: clrlwi 5, 5, 27 25; CHECK32-NEXT: slw 3, 3, 5 26; CHECK32-NEXT: subfic 5, 5, 32 27; CHECK32-NEXT: srw 4, 4, 5 28; CHECK32-NEXT: or 3, 3, 4 29; CHECK32-NEXT: blr 30; 31; CHECK64-LABEL: fshl_i32: 32; CHECK64: # %bb.0: 33; CHECK64-NEXT: clrlwi 5, 5, 27 34; CHECK64-NEXT: subfic 6, 5, 32 35; CHECK64-NEXT: slw 3, 3, 5 36; CHECK64-NEXT: srw 4, 4, 6 37; CHECK64-NEXT: or 3, 3, 4 38; CHECK64-NEXT: blr 39 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 40 ret i32 %f 41} 42 43define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { 44; CHECK32_32-LABEL: fshl_i64: 45; CHECK32_32: # %bb.0: 46; CHECK32_32-NEXT: andi. 7, 8, 32 47; CHECK32_32-NEXT: clrlwi 7, 8, 27 48; CHECK32_32-NEXT: subfic 8, 7, 32 49; CHECK32_32-NEXT: bc 12, 2, .LBB1_2 50; CHECK32_32-NEXT: # %bb.1: 51; CHECK32_32-NEXT: ori 9, 5, 0 52; CHECK32_32-NEXT: ori 3, 4, 0 53; CHECK32_32-NEXT: ori 4, 6, 0 54; CHECK32_32-NEXT: b .LBB1_3 55; CHECK32_32-NEXT: .LBB1_2: 56; CHECK32_32-NEXT: addi 9, 4, 0 57; CHECK32_32-NEXT: addi 4, 5, 0 58; CHECK32_32-NEXT: .LBB1_3: 59; CHECK32_32-NEXT: srw 5, 9, 8 60; CHECK32_32-NEXT: slw 3, 3, 7 61; CHECK32_32-NEXT: srw 4, 4, 8 62; CHECK32_32-NEXT: slw 6, 9, 7 63; CHECK32_32-NEXT: or 3, 3, 5 64; CHECK32_32-NEXT: or 4, 6, 4 65; CHECK32_32-NEXT: blr 66; 67; CHECK32_64-LABEL: fshl_i64: 68; CHECK32_64: # %bb.0: 69; CHECK32_64-NEXT: andi. 7, 8, 32 70; CHECK32_64-NEXT: clrlwi 7, 8, 27 71; CHECK32_64-NEXT: bc 12, 2, .LBB1_2 72; CHECK32_64-NEXT: # %bb.1: 73; CHECK32_64-NEXT: ori 9, 5, 0 74; CHECK32_64-NEXT: ori 3, 4, 0 75; CHECK32_64-NEXT: ori 5, 6, 0 76; CHECK32_64-NEXT: b .LBB1_3 77; CHECK32_64-NEXT: .LBB1_2: 78; CHECK32_64-NEXT: addi 9, 4, 0 79; CHECK32_64-NEXT: .LBB1_3: 80; CHECK32_64-NEXT: subfic 8, 7, 32 81; CHECK32_64-NEXT: srw 4, 9, 8 82; CHECK32_64-NEXT: slw 3, 3, 7 83; CHECK32_64-NEXT: srw 5, 5, 8 84; CHECK32_64-NEXT: slw 6, 9, 7 85; CHECK32_64-NEXT: or 3, 3, 4 86; CHECK32_64-NEXT: or 4, 6, 5 87; CHECK32_64-NEXT: blr 88; 89; CHECK64-LABEL: fshl_i64: 90; CHECK64: # %bb.0: 91; CHECK64-NEXT: clrlwi 5, 5, 26 92; CHECK64-NEXT: subfic 6, 5, 64 93; CHECK64-NEXT: sld 3, 3, 5 94; CHECK64-NEXT: srd 4, 4, 6 95; CHECK64-NEXT: or 3, 3, 4 96; CHECK64-NEXT: blr 97 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) 98 ret i64 %f 99} 100 101define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { 102; CHECK32_32-LABEL: fshl_i128: 103; CHECK32_32: # %bb.0: 104; CHECK32_32-NEXT: lwz 11, 20(1) 105; CHECK32_32-NEXT: andi. 12, 11, 64 106; CHECK32_32-NEXT: mcrf 1, 0 107; CHECK32_32-NEXT: andi. 12, 11, 32 108; CHECK32_32-NEXT: clrlwi 11, 11, 27 109; CHECK32_32-NEXT: bc 12, 6, .LBB2_2 110; CHECK32_32-NEXT: # %bb.1: 111; CHECK32_32-NEXT: ori 4, 6, 0 112; CHECK32_32-NEXT: ori 12, 7, 0 113; CHECK32_32-NEXT: ori 3, 5, 0 114; CHECK32_32-NEXT: ori 5, 8, 0 115; CHECK32_32-NEXT: ori 6, 9, 0 116; CHECK32_32-NEXT: ori 7, 10, 0 117; CHECK32_32-NEXT: b .LBB2_3 118; CHECK32_32-NEXT: .LBB2_2: 119; CHECK32_32-NEXT: addi 12, 5, 0 120; CHECK32_32-NEXT: addi 5, 6, 0 121; CHECK32_32-NEXT: addi 6, 7, 0 122; CHECK32_32-NEXT: addi 7, 8, 0 123; CHECK32_32-NEXT: .LBB2_3: 124; CHECK32_32-NEXT: subfic 8, 11, 32 125; CHECK32_32-NEXT: bc 12, 2, .LBB2_5 126; CHECK32_32-NEXT: # %bb.4: 127; CHECK32_32-NEXT: ori 9, 12, 0 128; CHECK32_32-NEXT: ori 3, 4, 0 129; CHECK32_32-NEXT: ori 4, 5, 0 130; CHECK32_32-NEXT: ori 5, 6, 0 131; CHECK32_32-NEXT: ori 6, 7, 0 132; CHECK32_32-NEXT: b .LBB2_6 133; CHECK32_32-NEXT: .LBB2_5: 134; CHECK32_32-NEXT: addi 9, 4, 0 135; CHECK32_32-NEXT: addi 4, 12, 0 136; CHECK32_32-NEXT: .LBB2_6: 137; CHECK32_32-NEXT: srw 7, 9, 8 138; CHECK32_32-NEXT: slw 3, 3, 11 139; CHECK32_32-NEXT: srw 10, 4, 8 140; CHECK32_32-NEXT: slw 9, 9, 11 141; CHECK32_32-NEXT: srw 12, 5, 8 142; CHECK32_32-NEXT: slw 0, 4, 11 143; CHECK32_32-NEXT: srw 6, 6, 8 144; CHECK32_32-NEXT: slw 8, 5, 11 145; CHECK32_32-NEXT: or 3, 3, 7 146; CHECK32_32-NEXT: or 4, 9, 10 147; CHECK32_32-NEXT: or 5, 0, 12 148; CHECK32_32-NEXT: or 6, 8, 6 149; CHECK32_32-NEXT: blr 150; 151; CHECK32_64-LABEL: fshl_i128: 152; CHECK32_64: # %bb.0: 153; CHECK32_64-NEXT: stwu 1, -16(1) 154; CHECK32_64-NEXT: lwz 11, 36(1) 155; CHECK32_64-NEXT: andi. 12, 11, 64 156; CHECK32_64-NEXT: stw 30, 8(1) # 4-byte Folded Spill 157; CHECK32_64-NEXT: mcrf 1, 0 158; CHECK32_64-NEXT: clrlwi 12, 11, 27 159; CHECK32_64-NEXT: andi. 11, 11, 32 160; CHECK32_64-NEXT: bc 12, 6, .LBB2_2 161; CHECK32_64-NEXT: # %bb.1: 162; CHECK32_64-NEXT: ori 4, 6, 0 163; CHECK32_64-NEXT: ori 30, 7, 0 164; CHECK32_64-NEXT: ori 3, 5, 0 165; CHECK32_64-NEXT: ori 7, 9, 0 166; CHECK32_64-NEXT: b .LBB2_3 167; CHECK32_64-NEXT: .LBB2_2: 168; CHECK32_64-NEXT: addi 30, 5, 0 169; CHECK32_64-NEXT: .LBB2_3: 170; CHECK32_64-NEXT: bc 12, 2, .LBB2_5 171; CHECK32_64-NEXT: # %bb.4: 172; CHECK32_64-NEXT: ori 5, 30, 0 173; CHECK32_64-NEXT: ori 3, 4, 0 174; CHECK32_64-NEXT: b .LBB2_6 175; CHECK32_64-NEXT: .LBB2_5: 176; CHECK32_64-NEXT: addi 5, 4, 0 177; CHECK32_64-NEXT: .LBB2_6: 178; CHECK32_64-NEXT: bc 12, 6, .LBB2_8 179; CHECK32_64-NEXT: # %bb.7: 180; CHECK32_64-NEXT: ori 4, 8, 0 181; CHECK32_64-NEXT: ori 8, 10, 0 182; CHECK32_64-NEXT: b .LBB2_9 183; CHECK32_64-NEXT: .LBB2_8: 184; CHECK32_64-NEXT: addi 4, 6, 0 185; CHECK32_64-NEXT: .LBB2_9: 186; CHECK32_64-NEXT: subfic 11, 12, 32 187; CHECK32_64-NEXT: bc 12, 2, .LBB2_11 188; CHECK32_64-NEXT: # %bb.10: 189; CHECK32_64-NEXT: ori 0, 4, 0 190; CHECK32_64-NEXT: ori 4, 7, 0 191; CHECK32_64-NEXT: ori 7, 8, 0 192; CHECK32_64-NEXT: b .LBB2_12 193; CHECK32_64-NEXT: .LBB2_11: 194; CHECK32_64-NEXT: addi 0, 30, 0 195; CHECK32_64-NEXT: .LBB2_12: 196; CHECK32_64-NEXT: srw 6, 5, 11 197; CHECK32_64-NEXT: lwz 30, 8(1) # 4-byte Folded Reload 198; CHECK32_64-NEXT: slw 3, 3, 12 199; CHECK32_64-NEXT: srw 9, 0, 11 200; CHECK32_64-NEXT: slw 5, 5, 12 201; CHECK32_64-NEXT: srw 10, 4, 11 202; CHECK32_64-NEXT: slw 0, 0, 12 203; CHECK32_64-NEXT: srw 7, 7, 11 204; CHECK32_64-NEXT: slw 8, 4, 12 205; CHECK32_64-NEXT: or 3, 3, 6 206; CHECK32_64-NEXT: or 4, 5, 9 207; CHECK32_64-NEXT: or 5, 0, 10 208; CHECK32_64-NEXT: or 6, 8, 7 209; CHECK32_64-NEXT: addi 1, 1, 16 210; CHECK32_64-NEXT: blr 211; 212; CHECK64-LABEL: fshl_i128: 213; CHECK64: # %bb.0: 214; CHECK64-NEXT: andi. 8, 7, 64 215; CHECK64-NEXT: clrlwi 7, 7, 26 216; CHECK64-NEXT: iseleq 5, 6, 5 217; CHECK64-NEXT: subfic 8, 7, 64 218; CHECK64-NEXT: iseleq 6, 3, 6 219; CHECK64-NEXT: iseleq 3, 4, 3 220; CHECK64-NEXT: srd 4, 5, 8 221; CHECK64-NEXT: sld 5, 6, 7 222; CHECK64-NEXT: srd 6, 6, 8 223; CHECK64-NEXT: sld 7, 3, 7 224; CHECK64-NEXT: or 3, 5, 4 225; CHECK64-NEXT: or 4, 7, 6 226; CHECK64-NEXT: blr 227 %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) 228 ret i128 %f 229} 230 231; Verify that weird types are minimally supported. 232declare i37 @llvm.fshl.i37(i37, i37, i37) 233define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { 234; CHECK32_32-LABEL: fshl_i37: 235; CHECK32_32: # %bb.0: 236; CHECK32_32-NEXT: mflr 0 237; CHECK32_32-NEXT: stw 0, 4(1) 238; CHECK32_32-NEXT: stwu 1, -32(1) 239; CHECK32_32-NEXT: .cfi_def_cfa_offset 32 240; CHECK32_32-NEXT: .cfi_offset lr, 4 241; CHECK32_32-NEXT: .cfi_offset r27, -20 242; CHECK32_32-NEXT: .cfi_offset r28, -16 243; CHECK32_32-NEXT: .cfi_offset r29, -12 244; CHECK32_32-NEXT: .cfi_offset r30, -8 245; CHECK32_32-NEXT: stw 27, 12(1) # 4-byte Folded Spill 246; CHECK32_32-NEXT: mr 27, 3 247; CHECK32_32-NEXT: stw 28, 16(1) # 4-byte Folded Spill 248; CHECK32_32-NEXT: mr 28, 4 249; CHECK32_32-NEXT: stw 29, 20(1) # 4-byte Folded Spill 250; CHECK32_32-NEXT: mr 29, 5 251; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill 252; CHECK32_32-NEXT: mr 30, 6 253; CHECK32_32-NEXT: clrlwi 3, 7, 27 254; CHECK32_32-NEXT: mr 4, 8 255; CHECK32_32-NEXT: li 5, 0 256; CHECK32_32-NEXT: li 6, 37 257; CHECK32_32-NEXT: bl __umoddi3 258; CHECK32_32-NEXT: rotlwi 3, 30, 27 259; CHECK32_32-NEXT: slwi 5, 30, 27 260; CHECK32_32-NEXT: andi. 6, 4, 32 261; CHECK32_32-NEXT: rlwimi 3, 29, 27, 0, 4 262; CHECK32_32-NEXT: clrlwi 4, 4, 27 263; CHECK32_32-NEXT: subfic 6, 4, 32 264; CHECK32_32-NEXT: bc 12, 2, .LBB3_2 265; CHECK32_32-NEXT: # %bb.1: 266; CHECK32_32-NEXT: ori 7, 3, 0 267; CHECK32_32-NEXT: ori 8, 28, 0 268; CHECK32_32-NEXT: ori 3, 5, 0 269; CHECK32_32-NEXT: b .LBB3_3 270; CHECK32_32-NEXT: .LBB3_2: 271; CHECK32_32-NEXT: addi 7, 28, 0 272; CHECK32_32-NEXT: addi 8, 27, 0 273; CHECK32_32-NEXT: .LBB3_3: 274; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 275; CHECK32_32-NEXT: srw 5, 7, 6 276; CHECK32_32-NEXT: slw 8, 8, 4 277; CHECK32_32-NEXT: srw 6, 3, 6 278; CHECK32_32-NEXT: slw 4, 7, 4 279; CHECK32_32-NEXT: or 3, 8, 5 280; CHECK32_32-NEXT: or 4, 4, 6 281; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 282; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload 283; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload 284; CHECK32_32-NEXT: lwz 0, 36(1) 285; CHECK32_32-NEXT: addi 1, 1, 32 286; CHECK32_32-NEXT: mtlr 0 287; CHECK32_32-NEXT: blr 288; 289; CHECK32_64-LABEL: fshl_i37: 290; CHECK32_64: # %bb.0: 291; CHECK32_64-NEXT: mflr 0 292; CHECK32_64-NEXT: stw 0, 4(1) 293; CHECK32_64-NEXT: stwu 1, -32(1) 294; CHECK32_64-NEXT: .cfi_def_cfa_offset 32 295; CHECK32_64-NEXT: .cfi_offset lr, 4 296; CHECK32_64-NEXT: .cfi_offset r27, -20 297; CHECK32_64-NEXT: .cfi_offset r28, -16 298; CHECK32_64-NEXT: .cfi_offset r29, -12 299; CHECK32_64-NEXT: .cfi_offset r30, -8 300; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill 301; CHECK32_64-NEXT: mr 27, 3 302; CHECK32_64-NEXT: clrlwi 3, 7, 27 303; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill 304; CHECK32_64-NEXT: mr 28, 4 305; CHECK32_64-NEXT: mr 4, 8 306; CHECK32_64-NEXT: stw 29, 20(1) # 4-byte Folded Spill 307; CHECK32_64-NEXT: mr 29, 5 308; CHECK32_64-NEXT: li 5, 0 309; CHECK32_64-NEXT: stw 30, 24(1) # 4-byte Folded Spill 310; CHECK32_64-NEXT: mr 30, 6 311; CHECK32_64-NEXT: li 6, 37 312; CHECK32_64-NEXT: bl __umoddi3 313; CHECK32_64-NEXT: rotlwi 3, 30, 27 314; CHECK32_64-NEXT: andi. 5, 4, 32 315; CHECK32_64-NEXT: bc 12, 2, .LBB3_2 316; CHECK32_64-NEXT: # %bb.1: 317; CHECK32_64-NEXT: ori 8, 28, 0 318; CHECK32_64-NEXT: b .LBB3_3 319; CHECK32_64-NEXT: .LBB3_2: 320; CHECK32_64-NEXT: addi 8, 27, 0 321; CHECK32_64-NEXT: .LBB3_3: 322; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload 323; CHECK32_64-NEXT: rlwimi 3, 29, 27, 0, 4 324; CHECK32_64-NEXT: clrlwi 4, 4, 27 325; CHECK32_64-NEXT: bc 12, 2, .LBB3_5 326; CHECK32_64-NEXT: # %bb.4: 327; CHECK32_64-NEXT: ori 7, 3, 0 328; CHECK32_64-NEXT: b .LBB3_6 329; CHECK32_64-NEXT: .LBB3_5: 330; CHECK32_64-NEXT: addi 7, 28, 0 331; CHECK32_64-NEXT: .LBB3_6: 332; CHECK32_64-NEXT: slwi 5, 30, 27 333; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 334; CHECK32_64-NEXT: bc 12, 2, .LBB3_8 335; CHECK32_64-NEXT: # %bb.7: 336; CHECK32_64-NEXT: ori 3, 5, 0 337; CHECK32_64-NEXT: b .LBB3_8 338; CHECK32_64-NEXT: .LBB3_8: 339; CHECK32_64-NEXT: subfic 6, 4, 32 340; CHECK32_64-NEXT: slw 8, 8, 4 341; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 342; CHECK32_64-NEXT: srw 9, 7, 6 343; CHECK32_64-NEXT: srw 5, 3, 6 344; CHECK32_64-NEXT: slw 4, 7, 4 345; CHECK32_64-NEXT: or 3, 8, 9 346; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload 347; CHECK32_64-NEXT: or 4, 4, 5 348; CHECK32_64-NEXT: lwz 0, 36(1) 349; CHECK32_64-NEXT: addi 1, 1, 32 350; CHECK32_64-NEXT: mtlr 0 351; CHECK32_64-NEXT: blr 352; 353; CHECK64-LABEL: fshl_i37: 354; CHECK64: # %bb.0: 355; CHECK64-NEXT: lis 6, 28339 356; CHECK64-NEXT: clrldi 7, 5, 27 357; CHECK64-NEXT: ori 6, 6, 58451 358; CHECK64-NEXT: sldi 4, 4, 27 359; CHECK64-NEXT: rldic 6, 6, 33, 0 360; CHECK64-NEXT: oris 6, 6, 3542 361; CHECK64-NEXT: ori 6, 6, 31883 362; CHECK64-NEXT: mulhdu 6, 7, 6 363; CHECK64-NEXT: rldicl 6, 6, 59, 5 364; CHECK64-NEXT: mulli 6, 6, 37 365; CHECK64-NEXT: sub 5, 5, 6 366; CHECK64-NEXT: clrlwi 5, 5, 26 367; CHECK64-NEXT: subfic 6, 5, 64 368; CHECK64-NEXT: sld 3, 3, 5 369; CHECK64-NEXT: srd 4, 4, 6 370; CHECK64-NEXT: or 3, 3, 4 371; CHECK64-NEXT: blr 372 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) 373 ret i37 %f 374} 375 376; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 377 378declare i7 @llvm.fshl.i7(i7, i7, i7) 379define i7 @fshl_i7_const_fold() { 380; CHECK-LABEL: fshl_i7_const_fold: 381; CHECK: # %bb.0: 382; CHECK-NEXT: li 3, 67 383; CHECK-NEXT: blr 384 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) 385 ret i7 %f 386} 387 388; With constant shift amount, this is rotate + insert (missing extended mnemonics). 389 390define i32 @fshl_i32_const_shift(i32 %x, i32 %y) { 391; CHECK-LABEL: fshl_i32_const_shift: 392; CHECK: # %bb.0: 393; CHECK-NEXT: rotlwi 4, 4, 9 394; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22 395; CHECK-NEXT: mr 3, 4 396; CHECK-NEXT: blr 397 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 398 ret i32 %f 399} 400 401; Check modulo math on shift amount. 402 403define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { 404; CHECK-LABEL: fshl_i32_const_overshift: 405; CHECK: # %bb.0: 406; CHECK-NEXT: rotlwi 4, 4, 9 407; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22 408; CHECK-NEXT: mr 3, 4 409; CHECK-NEXT: blr 410 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) 411 ret i32 %f 412} 413 414; 64-bit should also work. 415 416define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { 417; CHECK32-LABEL: fshl_i64_const_overshift: 418; CHECK32: # %bb.0: 419; CHECK32-NEXT: rotlwi 6, 6, 9 420; CHECK32-NEXT: rotlwi 3, 5, 9 421; CHECK32-NEXT: rlwimi 6, 5, 9, 0, 22 422; CHECK32-NEXT: rlwimi 3, 4, 9, 0, 22 423; CHECK32-NEXT: mr 4, 6 424; CHECK32-NEXT: blr 425; 426; CHECK64-LABEL: fshl_i64_const_overshift: 427; CHECK64: # %bb.0: 428; CHECK64-NEXT: rotldi 4, 4, 41 429; CHECK64-NEXT: rldimi 4, 3, 41, 0 430; CHECK64-NEXT: mr 3, 4 431; CHECK64-NEXT: blr 432 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) 433 ret i64 %f 434} 435 436; This should work without any node-specific logic. 437 438define i8 @fshl_i8_const_fold() { 439; CHECK-LABEL: fshl_i8_const_fold: 440; CHECK: # %bb.0: 441; CHECK-NEXT: li 3, 128 442; CHECK-NEXT: blr 443 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) 444 ret i8 %f 445} 446 447; Repeat everything for funnel shift right. 448 449; General case - all operands can be variables. 450 451define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { 452; CHECK32-LABEL: fshr_i32: 453; CHECK32: # %bb.0: 454; CHECK32-NEXT: clrlwi 5, 5, 27 455; CHECK32-NEXT: srw 4, 4, 5 456; CHECK32-NEXT: subfic 5, 5, 32 457; CHECK32-NEXT: slw 3, 3, 5 458; CHECK32-NEXT: or 3, 3, 4 459; CHECK32-NEXT: blr 460; 461; CHECK64-LABEL: fshr_i32: 462; CHECK64: # %bb.0: 463; CHECK64-NEXT: clrlwi 5, 5, 27 464; CHECK64-NEXT: subfic 6, 5, 32 465; CHECK64-NEXT: srw 4, 4, 5 466; CHECK64-NEXT: slw 3, 3, 6 467; CHECK64-NEXT: or 3, 3, 4 468; CHECK64-NEXT: blr 469 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 470 ret i32 %f 471} 472 473define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) { 474; CHECK32_32-LABEL: fshr_i64: 475; CHECK32_32: # %bb.0: 476; CHECK32_32-NEXT: andi. 7, 8, 32 477; CHECK32_32-NEXT: clrlwi 7, 8, 27 478; CHECK32_32-NEXT: subfic 8, 7, 32 479; CHECK32_32-NEXT: bc 12, 2, .LBB10_2 480; CHECK32_32-NEXT: # %bb.1: 481; CHECK32_32-NEXT: ori 9, 4, 0 482; CHECK32_32-NEXT: ori 4, 5, 0 483; CHECK32_32-NEXT: b .LBB10_3 484; CHECK32_32-NEXT: .LBB10_2: 485; CHECK32_32-NEXT: addi 9, 5, 0 486; CHECK32_32-NEXT: addi 3, 4, 0 487; CHECK32_32-NEXT: addi 4, 6, 0 488; CHECK32_32-NEXT: .LBB10_3: 489; CHECK32_32-NEXT: srw 5, 9, 7 490; CHECK32_32-NEXT: slw 3, 3, 8 491; CHECK32_32-NEXT: srw 4, 4, 7 492; CHECK32_32-NEXT: slw 6, 9, 8 493; CHECK32_32-NEXT: or 3, 3, 5 494; CHECK32_32-NEXT: or 4, 6, 4 495; CHECK32_32-NEXT: blr 496; 497; CHECK32_64-LABEL: fshr_i64: 498; CHECK32_64: # %bb.0: 499; CHECK32_64-NEXT: andi. 7, 8, 32 500; CHECK32_64-NEXT: clrlwi 7, 8, 27 501; CHECK32_64-NEXT: bc 12, 2, .LBB10_2 502; CHECK32_64-NEXT: # %bb.1: 503; CHECK32_64-NEXT: ori 9, 4, 0 504; CHECK32_64-NEXT: b .LBB10_3 505; CHECK32_64-NEXT: .LBB10_2: 506; CHECK32_64-NEXT: addi 9, 5, 0 507; CHECK32_64-NEXT: addi 3, 4, 0 508; CHECK32_64-NEXT: addi 5, 6, 0 509; CHECK32_64-NEXT: .LBB10_3: 510; CHECK32_64-NEXT: subfic 8, 7, 32 511; CHECK32_64-NEXT: srw 4, 9, 7 512; CHECK32_64-NEXT: slw 3, 3, 8 513; CHECK32_64-NEXT: srw 5, 5, 7 514; CHECK32_64-NEXT: slw 6, 9, 8 515; CHECK32_64-NEXT: or 3, 3, 4 516; CHECK32_64-NEXT: or 4, 6, 5 517; CHECK32_64-NEXT: blr 518; 519; CHECK64-LABEL: fshr_i64: 520; CHECK64: # %bb.0: 521; CHECK64-NEXT: clrlwi 5, 5, 26 522; CHECK64-NEXT: subfic 6, 5, 64 523; CHECK64-NEXT: srd 4, 4, 5 524; CHECK64-NEXT: sld 3, 3, 6 525; CHECK64-NEXT: or 3, 3, 4 526; CHECK64-NEXT: blr 527 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z) 528 ret i64 %f 529} 530 531; Verify that weird types are minimally supported. 532declare i37 @llvm.fshr.i37(i37, i37, i37) 533define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { 534; CHECK32_32-LABEL: fshr_i37: 535; CHECK32_32: # %bb.0: 536; CHECK32_32-NEXT: mflr 0 537; CHECK32_32-NEXT: stw 0, 4(1) 538; CHECK32_32-NEXT: stwu 1, -32(1) 539; CHECK32_32-NEXT: .cfi_def_cfa_offset 32 540; CHECK32_32-NEXT: .cfi_offset lr, 4 541; CHECK32_32-NEXT: .cfi_offset r27, -20 542; CHECK32_32-NEXT: .cfi_offset r28, -16 543; CHECK32_32-NEXT: .cfi_offset r29, -12 544; CHECK32_32-NEXT: .cfi_offset r30, -8 545; CHECK32_32-NEXT: stw 27, 12(1) # 4-byte Folded Spill 546; CHECK32_32-NEXT: mr 27, 3 547; CHECK32_32-NEXT: stw 28, 16(1) # 4-byte Folded Spill 548; CHECK32_32-NEXT: mr 28, 4 549; CHECK32_32-NEXT: stw 29, 20(1) # 4-byte Folded Spill 550; CHECK32_32-NEXT: mr 29, 5 551; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill 552; CHECK32_32-NEXT: mr 30, 6 553; CHECK32_32-NEXT: clrlwi 3, 7, 27 554; CHECK32_32-NEXT: mr 4, 8 555; CHECK32_32-NEXT: li 5, 0 556; CHECK32_32-NEXT: li 6, 37 557; CHECK32_32-NEXT: bl __umoddi3 558; CHECK32_32-NEXT: rotlwi 3, 30, 27 559; CHECK32_32-NEXT: addi 4, 4, 27 560; CHECK32_32-NEXT: slwi 5, 30, 27 561; CHECK32_32-NEXT: rlwimi 3, 29, 27, 0, 4 562; CHECK32_32-NEXT: andi. 6, 4, 32 563; CHECK32_32-NEXT: clrlwi 4, 4, 27 564; CHECK32_32-NEXT: subfic 6, 4, 32 565; CHECK32_32-NEXT: bc 12, 2, .LBB11_2 566; CHECK32_32-NEXT: # %bb.1: 567; CHECK32_32-NEXT: ori 7, 28, 0 568; CHECK32_32-NEXT: ori 8, 27, 0 569; CHECK32_32-NEXT: b .LBB11_3 570; CHECK32_32-NEXT: .LBB11_2: 571; CHECK32_32-NEXT: addi 7, 3, 0 572; CHECK32_32-NEXT: addi 8, 28, 0 573; CHECK32_32-NEXT: addi 3, 5, 0 574; CHECK32_32-NEXT: .LBB11_3: 575; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 576; CHECK32_32-NEXT: srw 5, 7, 4 577; CHECK32_32-NEXT: slw 8, 8, 6 578; CHECK32_32-NEXT: srw 4, 3, 4 579; CHECK32_32-NEXT: slw 6, 7, 6 580; CHECK32_32-NEXT: or 3, 8, 5 581; CHECK32_32-NEXT: or 4, 6, 4 582; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 583; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload 584; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload 585; CHECK32_32-NEXT: lwz 0, 36(1) 586; CHECK32_32-NEXT: addi 1, 1, 32 587; CHECK32_32-NEXT: mtlr 0 588; CHECK32_32-NEXT: blr 589; 590; CHECK32_64-LABEL: fshr_i37: 591; CHECK32_64: # %bb.0: 592; CHECK32_64-NEXT: mflr 0 593; CHECK32_64-NEXT: stw 0, 4(1) 594; CHECK32_64-NEXT: stwu 1, -32(1) 595; CHECK32_64-NEXT: .cfi_def_cfa_offset 32 596; CHECK32_64-NEXT: .cfi_offset lr, 4 597; CHECK32_64-NEXT: .cfi_offset r27, -20 598; CHECK32_64-NEXT: .cfi_offset r28, -16 599; CHECK32_64-NEXT: .cfi_offset r29, -12 600; CHECK32_64-NEXT: .cfi_offset r30, -8 601; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill 602; CHECK32_64-NEXT: mr 27, 3 603; CHECK32_64-NEXT: clrlwi 3, 7, 27 604; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill 605; CHECK32_64-NEXT: mr 28, 4 606; CHECK32_64-NEXT: mr 4, 8 607; CHECK32_64-NEXT: stw 29, 20(1) # 4-byte Folded Spill 608; CHECK32_64-NEXT: mr 29, 5 609; CHECK32_64-NEXT: li 5, 0 610; CHECK32_64-NEXT: stw 30, 24(1) # 4-byte Folded Spill 611; CHECK32_64-NEXT: mr 30, 6 612; CHECK32_64-NEXT: li 6, 37 613; CHECK32_64-NEXT: bl __umoddi3 614; CHECK32_64-NEXT: addi 4, 4, 27 615; CHECK32_64-NEXT: rotlwi 3, 30, 27 616; CHECK32_64-NEXT: andi. 5, 4, 32 617; CHECK32_64-NEXT: rlwimi 3, 29, 27, 0, 4 618; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 619; CHECK32_64-NEXT: bc 12, 2, .LBB11_2 620; CHECK32_64-NEXT: # %bb.1: 621; CHECK32_64-NEXT: ori 7, 28, 0 622; CHECK32_64-NEXT: ori 8, 27, 0 623; CHECK32_64-NEXT: b .LBB11_3 624; CHECK32_64-NEXT: .LBB11_2: 625; CHECK32_64-NEXT: addi 7, 3, 0 626; CHECK32_64-NEXT: addi 8, 28, 0 627; CHECK32_64-NEXT: .LBB11_3: 628; CHECK32_64-NEXT: clrlwi 4, 4, 27 629; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload 630; CHECK32_64-NEXT: slwi 5, 30, 27 631; CHECK32_64-NEXT: subfic 6, 4, 32 632; CHECK32_64-NEXT: bc 12, 2, .LBB11_4 633; CHECK32_64-NEXT: b .LBB11_5 634; CHECK32_64-NEXT: .LBB11_4: 635; CHECK32_64-NEXT: addi 3, 5, 0 636; CHECK32_64-NEXT: .LBB11_5: 637; CHECK32_64-NEXT: srw 9, 7, 4 638; CHECK32_64-NEXT: slw 8, 8, 6 639; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 640; CHECK32_64-NEXT: srw 4, 3, 4 641; CHECK32_64-NEXT: slw 5, 7, 6 642; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload 643; CHECK32_64-NEXT: or 3, 8, 9 644; CHECK32_64-NEXT: or 4, 5, 4 645; CHECK32_64-NEXT: lwz 0, 36(1) 646; CHECK32_64-NEXT: addi 1, 1, 32 647; CHECK32_64-NEXT: mtlr 0 648; CHECK32_64-NEXT: blr 649; 650; CHECK64-LABEL: fshr_i37: 651; CHECK64: # %bb.0: 652; CHECK64-NEXT: lis 6, 28339 653; CHECK64-NEXT: clrldi 7, 5, 27 654; CHECK64-NEXT: ori 6, 6, 58451 655; CHECK64-NEXT: sldi 4, 4, 27 656; CHECK64-NEXT: rldic 6, 6, 33, 0 657; CHECK64-NEXT: oris 6, 6, 3542 658; CHECK64-NEXT: ori 6, 6, 31883 659; CHECK64-NEXT: mulhdu 6, 7, 6 660; CHECK64-NEXT: rldicl 6, 6, 59, 5 661; CHECK64-NEXT: mulli 6, 6, 37 662; CHECK64-NEXT: sub 5, 5, 6 663; CHECK64-NEXT: addi 5, 5, 27 664; CHECK64-NEXT: clrlwi 5, 5, 26 665; CHECK64-NEXT: subfic 6, 5, 64 666; CHECK64-NEXT: srd 4, 4, 5 667; CHECK64-NEXT: sld 3, 3, 6 668; CHECK64-NEXT: or 3, 3, 4 669; CHECK64-NEXT: blr 670 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) 671 ret i37 %f 672} 673 674; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 675 676declare i7 @llvm.fshr.i7(i7, i7, i7) 677define i7 @fshr_i7_const_fold() { 678; CHECK-LABEL: fshr_i7_const_fold: 679; CHECK: # %bb.0: 680; CHECK-NEXT: li 3, 31 681; CHECK-NEXT: blr 682 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) 683 ret i7 %f 684} 685 686; With constant shift amount, this is rotate + insert (missing extended mnemonics). 687 688define i32 @fshr_i32_const_shift(i32 %x, i32 %y) { 689; CHECK-LABEL: fshr_i32_const_shift: 690; CHECK: # %bb.0: 691; CHECK-NEXT: rotlwi 4, 4, 23 692; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8 693; CHECK-NEXT: mr 3, 4 694; CHECK-NEXT: blr 695 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 696 ret i32 %f 697} 698 699; Check modulo math on shift amount. 41-32=9. 700 701define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) { 702; CHECK-LABEL: fshr_i32_const_overshift: 703; CHECK: # %bb.0: 704; CHECK-NEXT: rotlwi 4, 4, 23 705; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8 706; CHECK-NEXT: mr 3, 4 707; CHECK-NEXT: blr 708 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) 709 ret i32 %f 710} 711 712; 64-bit should also work. 105-64 = 41. 713 714define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) { 715; CHECK32-LABEL: fshr_i64_const_overshift: 716; CHECK32: # %bb.0: 717; CHECK32-NEXT: rotlwi 6, 4, 23 718; CHECK32-NEXT: rotlwi 5, 5, 23 719; CHECK32-NEXT: rlwimi 6, 3, 23, 0, 8 720; CHECK32-NEXT: rlwimi 5, 4, 23, 0, 8 721; CHECK32-NEXT: mr 3, 6 722; CHECK32-NEXT: mr 4, 5 723; CHECK32-NEXT: blr 724; 725; CHECK64-LABEL: fshr_i64_const_overshift: 726; CHECK64: # %bb.0: 727; CHECK64-NEXT: rotldi 4, 4, 23 728; CHECK64-NEXT: rldimi 4, 3, 23, 0 729; CHECK64-NEXT: mr 3, 4 730; CHECK64-NEXT: blr 731 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) 732 ret i64 %f 733} 734 735; This should work without any node-specific logic. 736 737define i8 @fshr_i8_const_fold() { 738; CHECK-LABEL: fshr_i8_const_fold: 739; CHECK: # %bb.0: 740; CHECK-NEXT: li 3, 254 741; CHECK-NEXT: blr 742 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) 743 ret i8 %f 744} 745 746define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { 747; CHECK-LABEL: fshl_i32_shift_by_bitwidth: 748; CHECK: # %bb.0: 749; CHECK-NEXT: blr 750 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) 751 ret i32 %f 752} 753 754define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) { 755; CHECK-LABEL: fshr_i32_shift_by_bitwidth: 756; CHECK: # %bb.0: 757; CHECK-NEXT: mr 3, 4 758; CHECK-NEXT: blr 759 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) 760 ret i32 %f 761} 762 763define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 764; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: 765; CHECK: # %bb.0: 766; CHECK-NEXT: blr 767 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 768 ret <4 x i32> %f 769} 770 771define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 772; CHECK32_32-LABEL: fshr_v4i32_shift_by_bitwidth: 773; CHECK32_32: # %bb.0: 774; CHECK32_32-NEXT: mr 6, 10 775; CHECK32_32-NEXT: mr 5, 9 776; CHECK32_32-NEXT: mr 4, 8 777; CHECK32_32-NEXT: mr 3, 7 778; CHECK32_32-NEXT: blr 779; 780; CHECK32_64-LABEL: fshr_v4i32_shift_by_bitwidth: 781; CHECK32_64: # %bb.0: 782; CHECK32_64-NEXT: vmr 2, 3 783; CHECK32_64-NEXT: blr 784; 785; CHECK64-LABEL: fshr_v4i32_shift_by_bitwidth: 786; CHECK64: # %bb.0: 787; CHECK64-NEXT: vmr 2, 3 788; CHECK64-NEXT: blr 789 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 790 ret <4 x i32> %f 791} 792 793