1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32 3; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64 4; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64 5 6declare i8 @llvm.fshl.i8(i8, i8, i8) 7declare i16 @llvm.fshl.i16(i16, i16, i16) 8declare i32 @llvm.fshl.i32(i32, i32, i32) 9declare i64 @llvm.fshl.i64(i64, i64, i64) 10declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 11 12declare i8 @llvm.fshr.i8(i8, i8, i8) 13declare i16 @llvm.fshr.i16(i16, i16, i16) 14declare i32 @llvm.fshr.i32(i32, i32, i32) 15declare i64 @llvm.fshr.i64(i64, i64, i64) 16declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 17 18; When first 2 operands match, it's a rotate. 19 20define i8 @rotl_i8_const_shift(i8 %x) { 21; CHECK-LABEL: rotl_i8_const_shift: 22; CHECK: # %bb.0: 23; CHECK-NEXT: rotlwi 4, 3, 27 24; CHECK-NEXT: rlwimi 4, 3, 3, 0, 28 25; CHECK-NEXT: mr 3, 4 26; CHECK-NEXT: blr 27 %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3) 28 ret i8 %f 29} 30 31define i64 @rotl_i64_const_shift(i64 %x) { 32; CHECK32-LABEL: rotl_i64_const_shift: 33; CHECK32: # %bb.0: 34; CHECK32-NEXT: rotlwi 5, 4, 3 35; CHECK32-NEXT: rotlwi 6, 3, 3 36; CHECK32-NEXT: rlwimi 5, 3, 3, 0, 28 37; CHECK32-NEXT: rlwimi 6, 4, 3, 0, 28 38; CHECK32-NEXT: mr 3, 5 39; CHECK32-NEXT: mr 4, 6 40; CHECK32-NEXT: blr 41; 42; CHECK64-LABEL: rotl_i64_const_shift: 43; CHECK64: # %bb.0: 44; CHECK64-NEXT: rotldi 3, 3, 3 45; CHECK64-NEXT: blr 46 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3) 47 ret i64 %f 48} 49 50; When first 2 operands match, it's a rotate (by variable amount). 51 52define i16 @rotl_i16(i16 %x, i16 %z) { 53; CHECK32-LABEL: rotl_i16: 54; CHECK32: # %bb.0: 55; CHECK32-NEXT: clrlwi 6, 4, 28 56; CHECK32-NEXT: neg 4, 4 57; CHECK32-NEXT: clrlwi 5, 3, 16 58; CHECK32-NEXT: clrlwi 4, 4, 28 59; CHECK32-NEXT: slw 3, 3, 6 60; CHECK32-NEXT: srw 4, 5, 4 61; CHECK32-NEXT: or 3, 3, 4 62; CHECK32-NEXT: blr 63; 64; CHECK64-LABEL: rotl_i16: 65; CHECK64: # %bb.0: 66; CHECK64-NEXT: neg 5, 4 67; CHECK64-NEXT: clrlwi 6, 3, 16 68; CHECK64-NEXT: clrlwi 4, 4, 28 69; CHECK64-NEXT: clrlwi 5, 5, 28 70; CHECK64-NEXT: slw 3, 3, 4 71; CHECK64-NEXT: srw 4, 6, 5 72; CHECK64-NEXT: or 3, 3, 4 73; CHECK64-NEXT: blr 74 %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z) 75 ret i16 %f 76} 77 78define i32 @rotl_i32(i32 %x, i32 %z) { 79; CHECK-LABEL: rotl_i32: 80; CHECK: # %bb.0: 81; CHECK-NEXT: rotlw 3, 3, 4 82; CHECK-NEXT: blr 83 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z) 84 ret i32 %f 85} 86 87define i64 @rotl_i64(i64 %x, i64 %z) { 88; CHECK32_32-LABEL: rotl_i64: 89; CHECK32_32: # %bb.0: 90; CHECK32_32-NEXT: andi. 5, 6, 32 91; CHECK32_32-NEXT: clrlwi 5, 6, 27 92; CHECK32_32-NEXT: subfic 6, 5, 32 93; CHECK32_32-NEXT: bc 12, 2, .LBB4_2 94; CHECK32_32-NEXT: # %bb.1: 95; CHECK32_32-NEXT: ori 7, 3, 0 96; CHECK32_32-NEXT: ori 3, 4, 0 97; CHECK32_32-NEXT: b .LBB4_3 98; CHECK32_32-NEXT: .LBB4_2: 99; CHECK32_32-NEXT: addi 7, 4, 0 100; CHECK32_32-NEXT: .LBB4_3: 101; CHECK32_32-NEXT: srw 4, 7, 6 102; CHECK32_32-NEXT: slw 8, 3, 5 103; CHECK32_32-NEXT: srw 6, 3, 6 104; CHECK32_32-NEXT: slw 5, 7, 5 105; CHECK32_32-NEXT: or 3, 8, 4 106; CHECK32_32-NEXT: or 4, 5, 6 107; CHECK32_32-NEXT: blr 108; 109; CHECK32_64-LABEL: rotl_i64: 110; CHECK32_64: # %bb.0: 111; CHECK32_64-NEXT: andi. 5, 6, 32 112; CHECK32_64-NEXT: clrlwi 5, 6, 27 113; CHECK32_64-NEXT: bc 12, 2, .LBB4_2 114; CHECK32_64-NEXT: # %bb.1: 115; CHECK32_64-NEXT: ori 7, 3, 0 116; CHECK32_64-NEXT: ori 3, 4, 0 117; CHECK32_64-NEXT: b .LBB4_3 118; CHECK32_64-NEXT: .LBB4_2: 119; CHECK32_64-NEXT: addi 7, 4, 0 120; CHECK32_64-NEXT: .LBB4_3: 121; CHECK32_64-NEXT: subfic 6, 5, 32 122; CHECK32_64-NEXT: srw 4, 7, 6 123; CHECK32_64-NEXT: slw 8, 3, 5 124; CHECK32_64-NEXT: srw 6, 3, 6 125; CHECK32_64-NEXT: slw 5, 7, 5 126; CHECK32_64-NEXT: or 3, 8, 4 127; CHECK32_64-NEXT: or 4, 5, 6 128; CHECK32_64-NEXT: blr 129; 130; CHECK64-LABEL: rotl_i64: 131; CHECK64: # %bb.0: 132; CHECK64-NEXT: rotld 3, 3, 4 133; CHECK64-NEXT: blr 134 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) 135 ret i64 %f 136} 137 138; Vector rotate. 139 140define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) { 141; CHECK32_32-LABEL: rotl_v4i32: 142; CHECK32_32: # %bb.0: 143; CHECK32_32-NEXT: rotlw 3, 3, 7 144; CHECK32_32-NEXT: rotlw 4, 4, 8 145; CHECK32_32-NEXT: rotlw 5, 5, 9 146; CHECK32_32-NEXT: rotlw 6, 6, 10 147; CHECK32_32-NEXT: blr 148; 149; CHECK32_64-LABEL: rotl_v4i32: 150; CHECK32_64: # %bb.0: 151; CHECK32_64-NEXT: vrlw 2, 2, 3 152; CHECK32_64-NEXT: blr 153; 154; CHECK64-LABEL: rotl_v4i32: 155; CHECK64: # %bb.0: 156; CHECK64-NEXT: vrlw 2, 2, 3 157; CHECK64-NEXT: blr 158 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) 159 ret <4 x i32> %f 160} 161 162; Vector rotate by constant splat amount. 163 164define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) { 165; CHECK32_32-LABEL: rotl_v4i32_const_shift: 166; CHECK32_32: # %bb.0: 167; CHECK32_32-NEXT: rotlwi 3, 3, 3 168; CHECK32_32-NEXT: rotlwi 4, 4, 3 169; CHECK32_32-NEXT: rotlwi 5, 5, 3 170; CHECK32_32-NEXT: rotlwi 6, 6, 3 171; CHECK32_32-NEXT: blr 172; 173; CHECK32_64-LABEL: rotl_v4i32_const_shift: 174; CHECK32_64: # %bb.0: 175; CHECK32_64-NEXT: vspltisw 3, 3 176; CHECK32_64-NEXT: vrlw 2, 2, 3 177; CHECK32_64-NEXT: blr 178; 179; CHECK64-LABEL: rotl_v4i32_const_shift: 180; CHECK64: # %bb.0: 181; CHECK64-NEXT: vspltisw 3, 3 182; CHECK64-NEXT: vrlw 2, 2, 3 183; CHECK64-NEXT: blr 184 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) 185 ret <4 x i32> %f 186} 187 188; Repeat everything for funnel shift right. 189 190define i8 @rotr_i8_const_shift(i8 %x) { 191; CHECK-LABEL: rotr_i8_const_shift: 192; CHECK: # %bb.0: 193; CHECK-NEXT: rotlwi 4, 3, 29 194; CHECK-NEXT: rlwimi 4, 3, 5, 0, 26 195; CHECK-NEXT: mr 3, 4 196; CHECK-NEXT: blr 197 %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3) 198 ret i8 %f 199} 200 201define i32 @rotr_i32_const_shift(i32 %x) { 202; CHECK-LABEL: rotr_i32_const_shift: 203; CHECK: # %bb.0: 204; CHECK-NEXT: rotlwi 3, 3, 29 205; CHECK-NEXT: blr 206 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3) 207 ret i32 %f 208} 209 210; When first 2 operands match, it's a rotate (by variable amount). 211 212define i16 @rotr_i16(i16 %x, i16 %z) { 213; CHECK32-LABEL: rotr_i16: 214; CHECK32: # %bb.0: 215; CHECK32-NEXT: clrlwi 6, 4, 28 216; CHECK32-NEXT: neg 4, 4 217; CHECK32-NEXT: clrlwi 5, 3, 16 218; CHECK32-NEXT: clrlwi 4, 4, 28 219; CHECK32-NEXT: srw 5, 5, 6 220; CHECK32-NEXT: slw 3, 3, 4 221; CHECK32-NEXT: or 3, 5, 3 222; CHECK32-NEXT: blr 223; 224; CHECK64-LABEL: rotr_i16: 225; CHECK64: # %bb.0: 226; CHECK64-NEXT: neg 5, 4 227; CHECK64-NEXT: clrlwi 6, 3, 16 228; CHECK64-NEXT: clrlwi 4, 4, 28 229; CHECK64-NEXT: clrlwi 5, 5, 28 230; CHECK64-NEXT: srw 4, 6, 4 231; CHECK64-NEXT: slw 3, 3, 5 232; CHECK64-NEXT: or 3, 4, 3 233; CHECK64-NEXT: blr 234 %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z) 235 ret i16 %f 236} 237 238define i32 @rotr_i32(i32 %x, i32 %z) { 239; CHECK-LABEL: rotr_i32: 240; CHECK: # %bb.0: 241; CHECK-NEXT: neg 4, 4 242; CHECK-NEXT: rotlw 3, 3, 4 243; CHECK-NEXT: blr 244 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z) 245 ret i32 %f 246} 247 248define i64 @rotr_i64(i64 %x, i64 %z) { 249; CHECK32_32-LABEL: rotr_i64: 250; CHECK32_32: # %bb.0: 251; CHECK32_32-NEXT: andi. 5, 6, 32 252; CHECK32_32-NEXT: clrlwi 5, 6, 27 253; CHECK32_32-NEXT: subfic 6, 5, 32 254; CHECK32_32-NEXT: bc 12, 2, .LBB11_2 255; CHECK32_32-NEXT: # %bb.1: 256; CHECK32_32-NEXT: ori 7, 4, 0 257; CHECK32_32-NEXT: b .LBB11_3 258; CHECK32_32-NEXT: .LBB11_2: 259; CHECK32_32-NEXT: addi 7, 3, 0 260; CHECK32_32-NEXT: addi 3, 4, 0 261; CHECK32_32-NEXT: .LBB11_3: 262; CHECK32_32-NEXT: srw 4, 7, 5 263; CHECK32_32-NEXT: slw 8, 3, 6 264; CHECK32_32-NEXT: srw 5, 3, 5 265; CHECK32_32-NEXT: slw 6, 7, 6 266; CHECK32_32-NEXT: or 3, 8, 4 267; CHECK32_32-NEXT: or 4, 6, 5 268; CHECK32_32-NEXT: blr 269; 270; CHECK32_64-LABEL: rotr_i64: 271; CHECK32_64: # %bb.0: 272; CHECK32_64-NEXT: andi. 5, 6, 32 273; CHECK32_64-NEXT: clrlwi 5, 6, 27 274; CHECK32_64-NEXT: bc 12, 2, .LBB11_2 275; CHECK32_64-NEXT: # %bb.1: 276; CHECK32_64-NEXT: ori 7, 4, 0 277; CHECK32_64-NEXT: b .LBB11_3 278; CHECK32_64-NEXT: .LBB11_2: 279; CHECK32_64-NEXT: addi 7, 3, 0 280; CHECK32_64-NEXT: addi 3, 4, 0 281; CHECK32_64-NEXT: .LBB11_3: 282; CHECK32_64-NEXT: subfic 6, 5, 32 283; CHECK32_64-NEXT: srw 4, 7, 5 284; CHECK32_64-NEXT: slw 8, 3, 6 285; CHECK32_64-NEXT: srw 5, 3, 5 286; CHECK32_64-NEXT: slw 6, 7, 6 287; CHECK32_64-NEXT: or 3, 8, 4 288; CHECK32_64-NEXT: or 4, 6, 5 289; CHECK32_64-NEXT: blr 290; 291; CHECK64-LABEL: rotr_i64: 292; CHECK64: # %bb.0: 293; CHECK64-NEXT: neg 4, 4 294; CHECK64-NEXT: rotld 3, 3, 4 295; CHECK64-NEXT: blr 296 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) 297 ret i64 %f 298} 299 300; Vector rotate. 301 302define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) { 303; CHECK32_32-LABEL: rotr_v4i32: 304; CHECK32_32: # %bb.0: 305; CHECK32_32-NEXT: neg 7, 7 306; CHECK32_32-NEXT: neg 8, 8 307; CHECK32_32-NEXT: neg 9, 9 308; CHECK32_32-NEXT: neg 10, 10 309; CHECK32_32-NEXT: rotlw 3, 3, 7 310; CHECK32_32-NEXT: rotlw 4, 4, 8 311; CHECK32_32-NEXT: rotlw 5, 5, 9 312; CHECK32_32-NEXT: rotlw 6, 6, 10 313; CHECK32_32-NEXT: blr 314; 315; CHECK32_64-LABEL: rotr_v4i32: 316; CHECK32_64: # %bb.0: 317; CHECK32_64-NEXT: vxor 4, 4, 4 318; CHECK32_64-NEXT: vsubuwm 3, 4, 3 319; CHECK32_64-NEXT: vrlw 2, 2, 3 320; CHECK32_64-NEXT: blr 321; 322; CHECK64-LABEL: rotr_v4i32: 323; CHECK64: # %bb.0: 324; CHECK64-NEXT: xxlxor 36, 36, 36 325; CHECK64-NEXT: vsubuwm 3, 4, 3 326; CHECK64-NEXT: vrlw 2, 2, 3 327; CHECK64-NEXT: blr 328 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) 329 ret <4 x i32> %f 330} 331 332; Vector rotate by constant splat amount. 333 334define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) { 335; CHECK32_32-LABEL: rotr_v4i32_const_shift: 336; CHECK32_32: # %bb.0: 337; CHECK32_32-NEXT: rotlwi 3, 3, 29 338; CHECK32_32-NEXT: rotlwi 4, 4, 29 339; CHECK32_32-NEXT: rotlwi 5, 5, 29 340; CHECK32_32-NEXT: rotlwi 6, 6, 29 341; CHECK32_32-NEXT: blr 342; 343; CHECK32_64-LABEL: rotr_v4i32_const_shift: 344; CHECK32_64: # %bb.0: 345; CHECK32_64-NEXT: vspltisw 3, -16 346; CHECK32_64-NEXT: vspltisw 4, 13 347; CHECK32_64-NEXT: vsubuwm 3, 4, 3 348; CHECK32_64-NEXT: vrlw 2, 2, 3 349; CHECK32_64-NEXT: blr 350; 351; CHECK64-LABEL: rotr_v4i32_const_shift: 352; CHECK64: # %bb.0: 353; CHECK64-NEXT: vspltisw 3, -16 354; CHECK64-NEXT: vspltisw 4, 13 355; CHECK64-NEXT: vsubuwm 3, 4, 3 356; CHECK64-NEXT: vrlw 2, 2, 3 357; CHECK64-NEXT: blr 358 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) 359 ret <4 x i32> %f 360} 361 362define i32 @rotl_i32_shift_by_bitwidth(i32 %x) { 363; CHECK-LABEL: rotl_i32_shift_by_bitwidth: 364; CHECK: # %bb.0: 365; CHECK-NEXT: blr 366 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32) 367 ret i32 %f 368} 369 370define i32 @rotr_i32_shift_by_bitwidth(i32 %x) { 371; CHECK-LABEL: rotr_i32_shift_by_bitwidth: 372; CHECK: # %bb.0: 373; CHECK-NEXT: blr 374 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32) 375 ret i32 %f 376} 377 378define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) { 379; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth: 380; CHECK: # %bb.0: 381; CHECK-NEXT: blr 382 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 383 ret <4 x i32> %f 384} 385 386define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) { 387; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth: 388; CHECK: # %bb.0: 389; CHECK-NEXT: blr 390 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 391 ret <4 x i32> %f 392} 393 394