1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32 3; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64 4; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64 5 6declare i8 @llvm.fshl.i8(i8, i8, i8) 7declare i16 @llvm.fshl.i16(i16, i16, i16) 8declare i32 @llvm.fshl.i32(i32, i32, i32) 9declare i64 @llvm.fshl.i64(i64, i64, i64) 10declare i128 @llvm.fshl.i128(i128, i128, i128) 11declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 12 13declare i8 @llvm.fshr.i8(i8, i8, i8) 14declare i16 @llvm.fshr.i16(i16, i16, i16) 15declare i32 @llvm.fshr.i32(i32, i32, i32) 16declare i64 @llvm.fshr.i64(i64, i64, i64) 17declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 18 19; General case - all operands can be variables. 20 21define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { 22; CHECK32-LABEL: fshl_i32: 23; CHECK32: # %bb.0: 24; CHECK32-NEXT: clrlwi 5, 5, 27 25; CHECK32-NEXT: slw 3, 3, 5 26; CHECK32-NEXT: subfic 5, 5, 32 27; CHECK32-NEXT: srw 4, 4, 5 28; CHECK32-NEXT: or 3, 3, 4 29; CHECK32-NEXT: blr 30; 31; CHECK64-LABEL: fshl_i32: 32; CHECK64: # %bb.0: 33; CHECK64-NEXT: clrlwi 5, 5, 27 34; CHECK64-NEXT: subfic 6, 5, 32 35; CHECK64-NEXT: slw 3, 3, 5 36; CHECK64-NEXT: srw 4, 4, 6 37; CHECK64-NEXT: or 3, 3, 4 38; CHECK64-NEXT: blr 39 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 40 ret i32 %f 41} 42 43define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { 44; CHECK32_32-LABEL: fshl_i64: 45; CHECK32_32: # %bb.0: 46; CHECK32_32-NEXT: andi. 7, 8, 32 47; CHECK32_32-NEXT: clrlwi 7, 8, 27 48; CHECK32_32-NEXT: subfic 8, 7, 32 49; CHECK32_32-NEXT: bc 12, 2, .LBB1_2 50; CHECK32_32-NEXT: # %bb.1: 51; CHECK32_32-NEXT: ori 9, 5, 0 52; CHECK32_32-NEXT: ori 3, 4, 0 53; CHECK32_32-NEXT: ori 4, 6, 0 54; CHECK32_32-NEXT: b .LBB1_3 55; CHECK32_32-NEXT: .LBB1_2: 56; CHECK32_32-NEXT: addi 9, 4, 0 57; CHECK32_32-NEXT: addi 4, 5, 0 58; CHECK32_32-NEXT: .LBB1_3: 59; CHECK32_32-NEXT: srw 5, 9, 8 60; CHECK32_32-NEXT: slw 3, 3, 7 61; CHECK32_32-NEXT: srw 4, 4, 8 62; CHECK32_32-NEXT: slw 6, 9, 7 63; CHECK32_32-NEXT: or 3, 3, 5 64; CHECK32_32-NEXT: or 4, 6, 4 65; CHECK32_32-NEXT: blr 66; 67; CHECK32_64-LABEL: fshl_i64: 68; CHECK32_64: # %bb.0: 69; CHECK32_64-NEXT: andi. 7, 8, 32 70; CHECK32_64-NEXT: clrlwi 7, 8, 27 71; CHECK32_64-NEXT: bc 12, 2, .LBB1_2 72; CHECK32_64-NEXT: # %bb.1: 73; CHECK32_64-NEXT: ori 9, 5, 0 74; CHECK32_64-NEXT: ori 3, 4, 0 75; CHECK32_64-NEXT: ori 5, 6, 0 76; CHECK32_64-NEXT: b .LBB1_3 77; CHECK32_64-NEXT: .LBB1_2: 78; CHECK32_64-NEXT: addi 9, 4, 0 79; CHECK32_64-NEXT: .LBB1_3: 80; CHECK32_64-NEXT: subfic 8, 7, 32 81; CHECK32_64-NEXT: srw 4, 9, 8 82; CHECK32_64-NEXT: slw 3, 3, 7 83; CHECK32_64-NEXT: srw 5, 5, 8 84; CHECK32_64-NEXT: slw 6, 9, 7 85; CHECK32_64-NEXT: or 3, 3, 4 86; CHECK32_64-NEXT: or 4, 6, 5 87; CHECK32_64-NEXT: blr 88; 89; CHECK64-LABEL: fshl_i64: 90; CHECK64: # %bb.0: 91; CHECK64-NEXT: clrlwi 5, 5, 26 92; CHECK64-NEXT: subfic 6, 5, 64 93; CHECK64-NEXT: sld 3, 3, 5 94; CHECK64-NEXT: srd 4, 4, 6 95; CHECK64-NEXT: or 3, 3, 4 96; CHECK64-NEXT: blr 97 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) 98 ret i64 %f 99} 100 101define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { 102; CHECK32_32-LABEL: fshl_i128: 103; CHECK32_32: # %bb.0: 104; CHECK32_32-NEXT: lwz 11, 20(1) 105; CHECK32_32-NEXT: andi. 12, 11, 64 106; CHECK32_32-NEXT: mcrf 1, 0 107; CHECK32_32-NEXT: andi. 12, 11, 32 108; CHECK32_32-NEXT: clrlwi 11, 11, 27 109; CHECK32_32-NEXT: bc 12, 6, .LBB2_2 110; CHECK32_32-NEXT: # %bb.1: 111; CHECK32_32-NEXT: ori 4, 6, 0 112; CHECK32_32-NEXT: ori 12, 7, 0 113; CHECK32_32-NEXT: ori 3, 5, 0 114; CHECK32_32-NEXT: ori 5, 8, 0 115; CHECK32_32-NEXT: ori 6, 9, 0 116; CHECK32_32-NEXT: ori 7, 10, 0 117; CHECK32_32-NEXT: b .LBB2_3 118; CHECK32_32-NEXT: .LBB2_2: 119; CHECK32_32-NEXT: addi 12, 5, 0 120; CHECK32_32-NEXT: addi 5, 6, 0 121; CHECK32_32-NEXT: addi 6, 7, 0 122; CHECK32_32-NEXT: addi 7, 8, 0 123; CHECK32_32-NEXT: .LBB2_3: 124; CHECK32_32-NEXT: subfic 8, 11, 32 125; CHECK32_32-NEXT: bc 12, 2, .LBB2_5 126; CHECK32_32-NEXT: # %bb.4: 127; CHECK32_32-NEXT: ori 9, 12, 0 128; CHECK32_32-NEXT: ori 3, 4, 0 129; CHECK32_32-NEXT: ori 4, 5, 0 130; CHECK32_32-NEXT: ori 5, 6, 0 131; CHECK32_32-NEXT: ori 6, 7, 0 132; CHECK32_32-NEXT: b .LBB2_6 133; CHECK32_32-NEXT: .LBB2_5: 134; CHECK32_32-NEXT: addi 9, 4, 0 135; CHECK32_32-NEXT: addi 4, 12, 0 136; CHECK32_32-NEXT: .LBB2_6: 137; CHECK32_32-NEXT: srw 7, 9, 8 138; CHECK32_32-NEXT: slw 3, 3, 11 139; CHECK32_32-NEXT: srw 10, 4, 8 140; CHECK32_32-NEXT: slw 9, 9, 11 141; CHECK32_32-NEXT: srw 12, 5, 8 142; CHECK32_32-NEXT: slw 0, 4, 11 143; CHECK32_32-NEXT: srw 6, 6, 8 144; CHECK32_32-NEXT: slw 8, 5, 11 145; CHECK32_32-NEXT: or 3, 3, 7 146; CHECK32_32-NEXT: or 4, 9, 10 147; CHECK32_32-NEXT: or 5, 0, 12 148; CHECK32_32-NEXT: or 6, 8, 6 149; CHECK32_32-NEXT: blr 150; 151; CHECK32_64-LABEL: fshl_i128: 152; CHECK32_64: # %bb.0: 153; CHECK32_64-NEXT: stwu 1, -16(1) 154; CHECK32_64-NEXT: lwz 11, 36(1) 155; CHECK32_64-NEXT: andi. 12, 11, 64 156; CHECK32_64-NEXT: stw 30, 8(1) # 4-byte Folded Spill 157; CHECK32_64-NEXT: mcrf 1, 0 158; CHECK32_64-NEXT: clrlwi 12, 11, 27 159; CHECK32_64-NEXT: andi. 11, 11, 32 160; CHECK32_64-NEXT: bc 12, 6, .LBB2_2 161; CHECK32_64-NEXT: # %bb.1: 162; CHECK32_64-NEXT: ori 4, 6, 0 163; CHECK32_64-NEXT: ori 30, 7, 0 164; CHECK32_64-NEXT: ori 3, 5, 0 165; CHECK32_64-NEXT: ori 7, 9, 0 166; CHECK32_64-NEXT: b .LBB2_3 167; CHECK32_64-NEXT: .LBB2_2: 168; CHECK32_64-NEXT: addi 30, 5, 0 169; CHECK32_64-NEXT: .LBB2_3: 170; CHECK32_64-NEXT: bc 12, 2, .LBB2_5 171; CHECK32_64-NEXT: # %bb.4: 172; CHECK32_64-NEXT: ori 5, 30, 0 173; CHECK32_64-NEXT: ori 3, 4, 0 174; CHECK32_64-NEXT: b .LBB2_6 175; CHECK32_64-NEXT: .LBB2_5: 176; CHECK32_64-NEXT: addi 5, 4, 0 177; CHECK32_64-NEXT: .LBB2_6: 178; CHECK32_64-NEXT: bc 12, 6, .LBB2_8 179; CHECK32_64-NEXT: # %bb.7: 180; CHECK32_64-NEXT: ori 4, 8, 0 181; CHECK32_64-NEXT: ori 8, 10, 0 182; CHECK32_64-NEXT: b .LBB2_9 183; CHECK32_64-NEXT: .LBB2_8: 184; CHECK32_64-NEXT: addi 4, 6, 0 185; CHECK32_64-NEXT: .LBB2_9: 186; CHECK32_64-NEXT: subfic 11, 12, 32 187; CHECK32_64-NEXT: bc 12, 2, .LBB2_11 188; CHECK32_64-NEXT: # %bb.10: 189; CHECK32_64-NEXT: ori 0, 4, 0 190; CHECK32_64-NEXT: ori 4, 7, 0 191; CHECK32_64-NEXT: ori 7, 8, 0 192; CHECK32_64-NEXT: b .LBB2_12 193; CHECK32_64-NEXT: .LBB2_11: 194; CHECK32_64-NEXT: addi 0, 30, 0 195; CHECK32_64-NEXT: .LBB2_12: 196; CHECK32_64-NEXT: srw 6, 5, 11 197; CHECK32_64-NEXT: lwz 30, 8(1) # 4-byte Folded Reload 198; CHECK32_64-NEXT: slw 3, 3, 12 199; CHECK32_64-NEXT: srw 9, 0, 11 200; CHECK32_64-NEXT: slw 5, 5, 12 201; CHECK32_64-NEXT: srw 10, 4, 11 202; CHECK32_64-NEXT: slw 0, 0, 12 203; CHECK32_64-NEXT: srw 7, 7, 11 204; CHECK32_64-NEXT: slw 8, 4, 12 205; CHECK32_64-NEXT: or 3, 3, 6 206; CHECK32_64-NEXT: or 4, 5, 9 207; CHECK32_64-NEXT: or 5, 0, 10 208; CHECK32_64-NEXT: or 6, 8, 7 209; CHECK32_64-NEXT: addi 1, 1, 16 210; CHECK32_64-NEXT: blr 211; 212; CHECK64-LABEL: fshl_i128: 213; CHECK64: # %bb.0: 214; CHECK64-NEXT: andi. 8, 7, 64 215; CHECK64-NEXT: clrlwi 7, 7, 26 216; CHECK64-NEXT: iseleq 5, 6, 5 217; CHECK64-NEXT: subfic 8, 7, 64 218; CHECK64-NEXT: iseleq 6, 3, 6 219; CHECK64-NEXT: iseleq 3, 4, 3 220; CHECK64-NEXT: srd 4, 5, 8 221; CHECK64-NEXT: sld 5, 6, 7 222; CHECK64-NEXT: srd 6, 6, 8 223; CHECK64-NEXT: sld 7, 3, 7 224; CHECK64-NEXT: or 3, 5, 4 225; CHECK64-NEXT: or 4, 7, 6 226; CHECK64-NEXT: blr 227 %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) 228 ret i128 %f 229} 230 231; Verify that weird types are minimally supported. 232declare i37 @llvm.fshl.i37(i37, i37, i37) 233define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { 234; CHECK32_32-LABEL: fshl_i37: 235; CHECK32_32: # %bb.0: 236; CHECK32_32-NEXT: mflr 0 237; CHECK32_32-NEXT: stw 0, 4(1) 238; CHECK32_32-NEXT: stwu 1, -32(1) 239; CHECK32_32-NEXT: .cfi_def_cfa_offset 32 240; CHECK32_32-NEXT: .cfi_offset lr, 4 241; CHECK32_32-NEXT: .cfi_offset r27, -20 242; CHECK32_32-NEXT: .cfi_offset r28, -16 243; CHECK32_32-NEXT: .cfi_offset r29, -12 244; CHECK32_32-NEXT: .cfi_offset r30, -8 245; CHECK32_32-NEXT: stw 27, 12(1) # 4-byte Folded Spill 246; CHECK32_32-NEXT: mr 27, 3 247; CHECK32_32-NEXT: stw 28, 16(1) # 4-byte Folded Spill 248; CHECK32_32-NEXT: mr 28, 4 249; CHECK32_32-NEXT: stw 29, 20(1) # 4-byte Folded Spill 250; CHECK32_32-NEXT: mr 29, 5 251; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill 252; CHECK32_32-NEXT: mr 30, 6 253; CHECK32_32-NEXT: mr 3, 7 254; CHECK32_32-NEXT: mr 4, 8 255; CHECK32_32-NEXT: li 5, 0 256; CHECK32_32-NEXT: li 6, 37 257; CHECK32_32-NEXT: bl __umoddi3 258; CHECK32_32-NEXT: rotlwi 3, 30, 27 259; CHECK32_32-NEXT: slwi 5, 30, 27 260; CHECK32_32-NEXT: andi. 6, 4, 32 261; CHECK32_32-NEXT: rlwimi 3, 29, 27, 0, 4 262; CHECK32_32-NEXT: clrlwi 4, 4, 27 263; CHECK32_32-NEXT: subfic 6, 4, 32 264; CHECK32_32-NEXT: bc 12, 2, .LBB3_2 265; CHECK32_32-NEXT: # %bb.1: 266; CHECK32_32-NEXT: ori 7, 3, 0 267; CHECK32_32-NEXT: ori 8, 28, 0 268; CHECK32_32-NEXT: ori 3, 5, 0 269; CHECK32_32-NEXT: b .LBB3_3 270; CHECK32_32-NEXT: .LBB3_2: 271; CHECK32_32-NEXT: addi 7, 28, 0 272; CHECK32_32-NEXT: addi 8, 27, 0 273; CHECK32_32-NEXT: .LBB3_3: 274; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 275; CHECK32_32-NEXT: srw 5, 7, 6 276; CHECK32_32-NEXT: slw 8, 8, 4 277; CHECK32_32-NEXT: srw 6, 3, 6 278; CHECK32_32-NEXT: slw 4, 7, 4 279; CHECK32_32-NEXT: or 3, 8, 5 280; CHECK32_32-NEXT: or 4, 4, 6 281; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 282; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload 283; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload 284; CHECK32_32-NEXT: lwz 0, 36(1) 285; CHECK32_32-NEXT: addi 1, 1, 32 286; CHECK32_32-NEXT: mtlr 0 287; CHECK32_32-NEXT: blr 288; 289; CHECK32_64-LABEL: fshl_i37: 290; CHECK32_64: # %bb.0: 291; CHECK32_64-NEXT: mflr 0 292; CHECK32_64-NEXT: stw 0, 4(1) 293; CHECK32_64-NEXT: stwu 1, -32(1) 294; CHECK32_64-NEXT: .cfi_def_cfa_offset 32 295; CHECK32_64-NEXT: .cfi_offset lr, 4 296; CHECK32_64-NEXT: .cfi_offset r27, -20 297; CHECK32_64-NEXT: .cfi_offset r28, -16 298; CHECK32_64-NEXT: .cfi_offset r29, -12 299; CHECK32_64-NEXT: .cfi_offset r30, -8 300; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill 301; CHECK32_64-NEXT: mr 27, 3 302; CHECK32_64-NEXT: mr 3, 7 303; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill 304; CHECK32_64-NEXT: mr 28, 4 305; CHECK32_64-NEXT: mr 4, 8 306; CHECK32_64-NEXT: stw 29, 20(1) # 4-byte Folded Spill 307; CHECK32_64-NEXT: mr 29, 5 308; CHECK32_64-NEXT: li 5, 0 309; CHECK32_64-NEXT: stw 30, 24(1) # 4-byte Folded Spill 310; CHECK32_64-NEXT: mr 30, 6 311; CHECK32_64-NEXT: li 6, 37 312; CHECK32_64-NEXT: bl __umoddi3 313; CHECK32_64-NEXT: rotlwi 3, 30, 27 314; CHECK32_64-NEXT: andi. 5, 4, 32 315; CHECK32_64-NEXT: bc 12, 2, .LBB3_2 316; CHECK32_64-NEXT: # %bb.1: 317; CHECK32_64-NEXT: ori 8, 28, 0 318; CHECK32_64-NEXT: b .LBB3_3 319; CHECK32_64-NEXT: .LBB3_2: 320; CHECK32_64-NEXT: addi 8, 27, 0 321; CHECK32_64-NEXT: .LBB3_3: 322; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload 323; CHECK32_64-NEXT: rlwimi 3, 29, 27, 0, 4 324; CHECK32_64-NEXT: clrlwi 4, 4, 27 325; CHECK32_64-NEXT: bc 12, 2, .LBB3_5 326; CHECK32_64-NEXT: # %bb.4: 327; CHECK32_64-NEXT: ori 7, 3, 0 328; CHECK32_64-NEXT: b .LBB3_6 329; CHECK32_64-NEXT: .LBB3_5: 330; CHECK32_64-NEXT: addi 7, 28, 0 331; CHECK32_64-NEXT: .LBB3_6: 332; CHECK32_64-NEXT: slwi 5, 30, 27 333; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 334; CHECK32_64-NEXT: bc 12, 2, .LBB3_8 335; CHECK32_64-NEXT: # %bb.7: 336; CHECK32_64-NEXT: ori 3, 5, 0 337; CHECK32_64-NEXT: b .LBB3_8 338; CHECK32_64-NEXT: .LBB3_8: 339; CHECK32_64-NEXT: subfic 6, 4, 32 340; CHECK32_64-NEXT: slw 8, 8, 4 341; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 342; CHECK32_64-NEXT: srw 9, 7, 6 343; CHECK32_64-NEXT: srw 5, 3, 6 344; CHECK32_64-NEXT: slw 4, 7, 4 345; CHECK32_64-NEXT: or 3, 8, 9 346; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload 347; CHECK32_64-NEXT: or 4, 4, 5 348; CHECK32_64-NEXT: lwz 0, 36(1) 349; CHECK32_64-NEXT: addi 1, 1, 32 350; CHECK32_64-NEXT: mtlr 0 351; CHECK32_64-NEXT: blr 352; 353; CHECK64-LABEL: fshl_i37: 354; CHECK64: # %bb.0: 355; CHECK64-NEXT: lis 6, 28339 356; CHECK64-NEXT: sldi 4, 4, 27 357; CHECK64-NEXT: ori 6, 6, 58451 358; CHECK64-NEXT: rldic 6, 6, 33, 0 359; CHECK64-NEXT: oris 6, 6, 3542 360; CHECK64-NEXT: ori 6, 6, 31883 361; CHECK64-NEXT: mulhdu 6, 5, 6 362; CHECK64-NEXT: rldicl 6, 6, 59, 5 363; CHECK64-NEXT: mulli 6, 6, 37 364; CHECK64-NEXT: sub 5, 5, 6 365; CHECK64-NEXT: clrlwi 5, 5, 26 366; CHECK64-NEXT: subfic 6, 5, 64 367; CHECK64-NEXT: sld 3, 3, 5 368; CHECK64-NEXT: srd 4, 4, 6 369; CHECK64-NEXT: or 3, 3, 4 370; CHECK64-NEXT: blr 371 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) 372 ret i37 %f 373} 374 375; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 376 377declare i7 @llvm.fshl.i7(i7, i7, i7) 378define i7 @fshl_i7_const_fold() { 379; CHECK-LABEL: fshl_i7_const_fold: 380; CHECK: # %bb.0: 381; CHECK-NEXT: li 3, 67 382; CHECK-NEXT: blr 383 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) 384 ret i7 %f 385} 386 387; With constant shift amount, this is rotate + insert (missing extended mnemonics). 388 389define i32 @fshl_i32_const_shift(i32 %x, i32 %y) { 390; CHECK-LABEL: fshl_i32_const_shift: 391; CHECK: # %bb.0: 392; CHECK-NEXT: rotlwi 4, 4, 9 393; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22 394; CHECK-NEXT: mr 3, 4 395; CHECK-NEXT: blr 396 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 397 ret i32 %f 398} 399 400; Check modulo math on shift amount. 401 402define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { 403; CHECK-LABEL: fshl_i32_const_overshift: 404; CHECK: # %bb.0: 405; CHECK-NEXT: rotlwi 4, 4, 9 406; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22 407; CHECK-NEXT: mr 3, 4 408; CHECK-NEXT: blr 409 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) 410 ret i32 %f 411} 412 413; 64-bit should also work. 414 415define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { 416; CHECK32-LABEL: fshl_i64_const_overshift: 417; CHECK32: # %bb.0: 418; CHECK32-NEXT: rotlwi 6, 6, 9 419; CHECK32-NEXT: rotlwi 3, 5, 9 420; CHECK32-NEXT: rlwimi 6, 5, 9, 0, 22 421; CHECK32-NEXT: rlwimi 3, 4, 9, 0, 22 422; CHECK32-NEXT: mr 4, 6 423; CHECK32-NEXT: blr 424; 425; CHECK64-LABEL: fshl_i64_const_overshift: 426; CHECK64: # %bb.0: 427; CHECK64-NEXT: rotldi 4, 4, 41 428; CHECK64-NEXT: rldimi 4, 3, 41, 0 429; CHECK64-NEXT: mr 3, 4 430; CHECK64-NEXT: blr 431 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) 432 ret i64 %f 433} 434 435; This should work without any node-specific logic. 436 437define i8 @fshl_i8_const_fold() { 438; CHECK-LABEL: fshl_i8_const_fold: 439; CHECK: # %bb.0: 440; CHECK-NEXT: li 3, 128 441; CHECK-NEXT: blr 442 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) 443 ret i8 %f 444} 445 446; Repeat everything for funnel shift right. 447 448; General case - all operands can be variables. 449 450define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { 451; CHECK32-LABEL: fshr_i32: 452; CHECK32: # %bb.0: 453; CHECK32-NEXT: clrlwi 5, 5, 27 454; CHECK32-NEXT: srw 4, 4, 5 455; CHECK32-NEXT: subfic 5, 5, 32 456; CHECK32-NEXT: slw 3, 3, 5 457; CHECK32-NEXT: or 3, 3, 4 458; CHECK32-NEXT: blr 459; 460; CHECK64-LABEL: fshr_i32: 461; CHECK64: # %bb.0: 462; CHECK64-NEXT: clrlwi 5, 5, 27 463; CHECK64-NEXT: subfic 6, 5, 32 464; CHECK64-NEXT: srw 4, 4, 5 465; CHECK64-NEXT: slw 3, 3, 6 466; CHECK64-NEXT: or 3, 3, 4 467; CHECK64-NEXT: blr 468 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 469 ret i32 %f 470} 471 472define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) { 473; CHECK32_32-LABEL: fshr_i64: 474; CHECK32_32: # %bb.0: 475; CHECK32_32-NEXT: andi. 7, 8, 32 476; CHECK32_32-NEXT: clrlwi 7, 8, 27 477; CHECK32_32-NEXT: subfic 8, 7, 32 478; CHECK32_32-NEXT: bc 12, 2, .LBB10_2 479; CHECK32_32-NEXT: # %bb.1: 480; CHECK32_32-NEXT: ori 9, 4, 0 481; CHECK32_32-NEXT: ori 4, 5, 0 482; CHECK32_32-NEXT: b .LBB10_3 483; CHECK32_32-NEXT: .LBB10_2: 484; CHECK32_32-NEXT: addi 9, 5, 0 485; CHECK32_32-NEXT: addi 3, 4, 0 486; CHECK32_32-NEXT: addi 4, 6, 0 487; CHECK32_32-NEXT: .LBB10_3: 488; CHECK32_32-NEXT: srw 5, 9, 7 489; CHECK32_32-NEXT: slw 3, 3, 8 490; CHECK32_32-NEXT: srw 4, 4, 7 491; CHECK32_32-NEXT: slw 6, 9, 8 492; CHECK32_32-NEXT: or 3, 3, 5 493; CHECK32_32-NEXT: or 4, 6, 4 494; CHECK32_32-NEXT: blr 495; 496; CHECK32_64-LABEL: fshr_i64: 497; CHECK32_64: # %bb.0: 498; CHECK32_64-NEXT: andi. 7, 8, 32 499; CHECK32_64-NEXT: clrlwi 7, 8, 27 500; CHECK32_64-NEXT: bc 12, 2, .LBB10_2 501; CHECK32_64-NEXT: # %bb.1: 502; CHECK32_64-NEXT: ori 9, 4, 0 503; CHECK32_64-NEXT: b .LBB10_3 504; CHECK32_64-NEXT: .LBB10_2: 505; CHECK32_64-NEXT: addi 9, 5, 0 506; CHECK32_64-NEXT: addi 3, 4, 0 507; CHECK32_64-NEXT: addi 5, 6, 0 508; CHECK32_64-NEXT: .LBB10_3: 509; CHECK32_64-NEXT: subfic 8, 7, 32 510; CHECK32_64-NEXT: srw 4, 9, 7 511; CHECK32_64-NEXT: slw 3, 3, 8 512; CHECK32_64-NEXT: srw 5, 5, 7 513; CHECK32_64-NEXT: slw 6, 9, 8 514; CHECK32_64-NEXT: or 3, 3, 4 515; CHECK32_64-NEXT: or 4, 6, 5 516; CHECK32_64-NEXT: blr 517; 518; CHECK64-LABEL: fshr_i64: 519; CHECK64: # %bb.0: 520; CHECK64-NEXT: clrlwi 5, 5, 26 521; CHECK64-NEXT: subfic 6, 5, 64 522; CHECK64-NEXT: srd 4, 4, 5 523; CHECK64-NEXT: sld 3, 3, 6 524; CHECK64-NEXT: or 3, 3, 4 525; CHECK64-NEXT: blr 526 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z) 527 ret i64 %f 528} 529 530; Verify that weird types are minimally supported. 531declare i37 @llvm.fshr.i37(i37, i37, i37) 532define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { 533; CHECK32_32-LABEL: fshr_i37: 534; CHECK32_32: # %bb.0: 535; CHECK32_32-NEXT: mflr 0 536; CHECK32_32-NEXT: stw 0, 4(1) 537; CHECK32_32-NEXT: stwu 1, -32(1) 538; CHECK32_32-NEXT: .cfi_def_cfa_offset 32 539; CHECK32_32-NEXT: .cfi_offset lr, 4 540; CHECK32_32-NEXT: .cfi_offset r27, -20 541; CHECK32_32-NEXT: .cfi_offset r28, -16 542; CHECK32_32-NEXT: .cfi_offset r29, -12 543; CHECK32_32-NEXT: .cfi_offset r30, -8 544; CHECK32_32-NEXT: stw 27, 12(1) # 4-byte Folded Spill 545; CHECK32_32-NEXT: mr 27, 3 546; CHECK32_32-NEXT: stw 28, 16(1) # 4-byte Folded Spill 547; CHECK32_32-NEXT: mr 28, 4 548; CHECK32_32-NEXT: stw 29, 20(1) # 4-byte Folded Spill 549; CHECK32_32-NEXT: mr 29, 5 550; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill 551; CHECK32_32-NEXT: mr 30, 6 552; CHECK32_32-NEXT: mr 3, 7 553; CHECK32_32-NEXT: mr 4, 8 554; CHECK32_32-NEXT: li 5, 0 555; CHECK32_32-NEXT: li 6, 37 556; CHECK32_32-NEXT: bl __umoddi3 557; CHECK32_32-NEXT: rotlwi 3, 30, 27 558; CHECK32_32-NEXT: addi 4, 4, 27 559; CHECK32_32-NEXT: slwi 5, 30, 27 560; CHECK32_32-NEXT: rlwimi 3, 29, 27, 0, 4 561; CHECK32_32-NEXT: andi. 6, 4, 32 562; CHECK32_32-NEXT: clrlwi 4, 4, 27 563; CHECK32_32-NEXT: subfic 6, 4, 32 564; CHECK32_32-NEXT: bc 12, 2, .LBB11_2 565; CHECK32_32-NEXT: # %bb.1: 566; CHECK32_32-NEXT: ori 7, 28, 0 567; CHECK32_32-NEXT: ori 8, 27, 0 568; CHECK32_32-NEXT: b .LBB11_3 569; CHECK32_32-NEXT: .LBB11_2: 570; CHECK32_32-NEXT: addi 7, 3, 0 571; CHECK32_32-NEXT: addi 8, 28, 0 572; CHECK32_32-NEXT: addi 3, 5, 0 573; CHECK32_32-NEXT: .LBB11_3: 574; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 575; CHECK32_32-NEXT: srw 5, 7, 4 576; CHECK32_32-NEXT: slw 8, 8, 6 577; CHECK32_32-NEXT: srw 4, 3, 4 578; CHECK32_32-NEXT: slw 6, 7, 6 579; CHECK32_32-NEXT: or 3, 8, 5 580; CHECK32_32-NEXT: or 4, 6, 4 581; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 582; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload 583; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload 584; CHECK32_32-NEXT: lwz 0, 36(1) 585; CHECK32_32-NEXT: addi 1, 1, 32 586; CHECK32_32-NEXT: mtlr 0 587; CHECK32_32-NEXT: blr 588; 589; CHECK32_64-LABEL: fshr_i37: 590; CHECK32_64: # %bb.0: 591; CHECK32_64-NEXT: mflr 0 592; CHECK32_64-NEXT: stw 0, 4(1) 593; CHECK32_64-NEXT: stwu 1, -32(1) 594; CHECK32_64-NEXT: .cfi_def_cfa_offset 32 595; CHECK32_64-NEXT: .cfi_offset lr, 4 596; CHECK32_64-NEXT: .cfi_offset r27, -20 597; CHECK32_64-NEXT: .cfi_offset r28, -16 598; CHECK32_64-NEXT: .cfi_offset r29, -12 599; CHECK32_64-NEXT: .cfi_offset r30, -8 600; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill 601; CHECK32_64-NEXT: mr 27, 3 602; CHECK32_64-NEXT: mr 3, 7 603; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill 604; CHECK32_64-NEXT: mr 28, 4 605; CHECK32_64-NEXT: mr 4, 8 606; CHECK32_64-NEXT: stw 29, 20(1) # 4-byte Folded Spill 607; CHECK32_64-NEXT: mr 29, 5 608; CHECK32_64-NEXT: li 5, 0 609; CHECK32_64-NEXT: stw 30, 24(1) # 4-byte Folded Spill 610; CHECK32_64-NEXT: mr 30, 6 611; CHECK32_64-NEXT: li 6, 37 612; CHECK32_64-NEXT: bl __umoddi3 613; CHECK32_64-NEXT: addi 4, 4, 27 614; CHECK32_64-NEXT: rotlwi 3, 30, 27 615; CHECK32_64-NEXT: andi. 5, 4, 32 616; CHECK32_64-NEXT: rlwimi 3, 29, 27, 0, 4 617; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload 618; CHECK32_64-NEXT: bc 12, 2, .LBB11_2 619; CHECK32_64-NEXT: # %bb.1: 620; CHECK32_64-NEXT: ori 7, 28, 0 621; CHECK32_64-NEXT: ori 8, 27, 0 622; CHECK32_64-NEXT: b .LBB11_3 623; CHECK32_64-NEXT: .LBB11_2: 624; CHECK32_64-NEXT: addi 7, 3, 0 625; CHECK32_64-NEXT: addi 8, 28, 0 626; CHECK32_64-NEXT: .LBB11_3: 627; CHECK32_64-NEXT: clrlwi 4, 4, 27 628; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload 629; CHECK32_64-NEXT: slwi 5, 30, 27 630; CHECK32_64-NEXT: subfic 6, 4, 32 631; CHECK32_64-NEXT: bc 12, 2, .LBB11_4 632; CHECK32_64-NEXT: b .LBB11_5 633; CHECK32_64-NEXT: .LBB11_4: 634; CHECK32_64-NEXT: addi 3, 5, 0 635; CHECK32_64-NEXT: .LBB11_5: 636; CHECK32_64-NEXT: srw 9, 7, 4 637; CHECK32_64-NEXT: slw 8, 8, 6 638; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload 639; CHECK32_64-NEXT: srw 4, 3, 4 640; CHECK32_64-NEXT: slw 5, 7, 6 641; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload 642; CHECK32_64-NEXT: or 3, 8, 9 643; CHECK32_64-NEXT: or 4, 5, 4 644; CHECK32_64-NEXT: lwz 0, 36(1) 645; CHECK32_64-NEXT: addi 1, 1, 32 646; CHECK32_64-NEXT: mtlr 0 647; CHECK32_64-NEXT: blr 648; 649; CHECK64-LABEL: fshr_i37: 650; CHECK64: # %bb.0: 651; CHECK64-NEXT: lis 6, 28339 652; CHECK64-NEXT: sldi 4, 4, 27 653; CHECK64-NEXT: ori 6, 6, 58451 654; CHECK64-NEXT: rldic 6, 6, 33, 0 655; CHECK64-NEXT: oris 6, 6, 3542 656; CHECK64-NEXT: ori 6, 6, 31883 657; CHECK64-NEXT: mulhdu 6, 5, 6 658; CHECK64-NEXT: rldicl 6, 6, 59, 5 659; CHECK64-NEXT: mulli 6, 6, 37 660; CHECK64-NEXT: sub 5, 5, 6 661; CHECK64-NEXT: addi 5, 5, 27 662; CHECK64-NEXT: clrlwi 5, 5, 26 663; CHECK64-NEXT: subfic 6, 5, 64 664; CHECK64-NEXT: srd 4, 4, 5 665; CHECK64-NEXT: sld 3, 3, 6 666; CHECK64-NEXT: or 3, 3, 4 667; CHECK64-NEXT: blr 668 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) 669 ret i37 %f 670} 671 672; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 673 674declare i7 @llvm.fshr.i7(i7, i7, i7) 675define i7 @fshr_i7_const_fold() { 676; CHECK-LABEL: fshr_i7_const_fold: 677; CHECK: # %bb.0: 678; CHECK-NEXT: li 3, 31 679; CHECK-NEXT: blr 680 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) 681 ret i7 %f 682} 683 684; With constant shift amount, this is rotate + insert (missing extended mnemonics). 685 686define i32 @fshr_i32_const_shift(i32 %x, i32 %y) { 687; CHECK-LABEL: fshr_i32_const_shift: 688; CHECK: # %bb.0: 689; CHECK-NEXT: rotlwi 4, 4, 23 690; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8 691; CHECK-NEXT: mr 3, 4 692; CHECK-NEXT: blr 693 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 694 ret i32 %f 695} 696 697; Check modulo math on shift amount. 41-32=9. 698 699define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) { 700; CHECK-LABEL: fshr_i32_const_overshift: 701; CHECK: # %bb.0: 702; CHECK-NEXT: rotlwi 4, 4, 23 703; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8 704; CHECK-NEXT: mr 3, 4 705; CHECK-NEXT: blr 706 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) 707 ret i32 %f 708} 709 710; 64-bit should also work. 105-64 = 41. 711 712define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) { 713; CHECK32-LABEL: fshr_i64_const_overshift: 714; CHECK32: # %bb.0: 715; CHECK32-NEXT: rotlwi 6, 4, 23 716; CHECK32-NEXT: rotlwi 5, 5, 23 717; CHECK32-NEXT: rlwimi 6, 3, 23, 0, 8 718; CHECK32-NEXT: rlwimi 5, 4, 23, 0, 8 719; CHECK32-NEXT: mr 3, 6 720; CHECK32-NEXT: mr 4, 5 721; CHECK32-NEXT: blr 722; 723; CHECK64-LABEL: fshr_i64_const_overshift: 724; CHECK64: # %bb.0: 725; CHECK64-NEXT: rotldi 4, 4, 23 726; CHECK64-NEXT: rldimi 4, 3, 23, 0 727; CHECK64-NEXT: mr 3, 4 728; CHECK64-NEXT: blr 729 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) 730 ret i64 %f 731} 732 733; This should work without any node-specific logic. 734 735define i8 @fshr_i8_const_fold() { 736; CHECK-LABEL: fshr_i8_const_fold: 737; CHECK: # %bb.0: 738; CHECK-NEXT: li 3, 254 739; CHECK-NEXT: blr 740 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) 741 ret i8 %f 742} 743 744define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { 745; CHECK-LABEL: fshl_i32_shift_by_bitwidth: 746; CHECK: # %bb.0: 747; CHECK-NEXT: blr 748 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) 749 ret i32 %f 750} 751 752define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) { 753; CHECK-LABEL: fshr_i32_shift_by_bitwidth: 754; CHECK: # %bb.0: 755; CHECK-NEXT: mr 3, 4 756; CHECK-NEXT: blr 757 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) 758 ret i32 %f 759} 760 761define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 762; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: 763; CHECK: # %bb.0: 764; CHECK-NEXT: blr 765 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 766 ret <4 x i32> %f 767} 768 769define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 770; CHECK32_32-LABEL: fshr_v4i32_shift_by_bitwidth: 771; CHECK32_32: # %bb.0: 772; CHECK32_32-NEXT: mr 6, 10 773; CHECK32_32-NEXT: mr 5, 9 774; CHECK32_32-NEXT: mr 4, 8 775; CHECK32_32-NEXT: mr 3, 7 776; CHECK32_32-NEXT: blr 777; 778; CHECK32_64-LABEL: fshr_v4i32_shift_by_bitwidth: 779; CHECK32_64: # %bb.0: 780; CHECK32_64-NEXT: vmr 2, 3 781; CHECK32_64-NEXT: blr 782; 783; CHECK64-LABEL: fshr_v4i32_shift_by_bitwidth: 784; CHECK64: # %bb.0: 785; CHECK64-NEXT: vmr 2, 3 786; CHECK64-NEXT: blr 787 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 788 ret <4 x i32> %f 789} 790 791