1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 | FileCheck %s --check-prefixes=CHECK,SCALAR 3; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON 4 5declare i8 @llvm.fshl.i8(i8, i8, i8) 6declare i16 @llvm.fshl.i16(i16, i16, i16) 7declare i32 @llvm.fshl.i32(i32, i32, i32) 8declare i64 @llvm.fshl.i64(i64, i64, i64) 9declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 10 11declare i8 @llvm.fshr.i8(i8, i8, i8) 12declare i16 @llvm.fshr.i16(i16, i16, i16) 13declare i32 @llvm.fshr.i32(i32, i32, i32) 14declare i64 @llvm.fshr.i64(i64, i64, i64) 15declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 16 17; General case - all operands can be variables. 18 19define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) { 20; CHECK-LABEL: fshl_i16: 21; CHECK: @ %bb.0: 22; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16 23; CHECK-NEXT: and r1, r2, #15 24; CHECK-NEXT: lsl r0, r0, r1 25; CHECK-NEXT: lsr r0, r0, #16 26; CHECK-NEXT: bx lr 27 %f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z) 28 ret i16 %f 29} 30 31define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { 32; CHECK-LABEL: fshl_i32: 33; CHECK: @ %bb.0: 34; CHECK-NEXT: mov r3, #31 35; CHECK-NEXT: lsr r1, r1, #1 36; CHECK-NEXT: bic r3, r3, r2 37; CHECK-NEXT: and r2, r2, #31 38; CHECK-NEXT: lsl r0, r0, r2 39; CHECK-NEXT: orr r0, r0, r1, lsr r3 40; CHECK-NEXT: bx lr 41 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 42 ret i32 %f 43} 44 45; Verify that weird types are minimally supported. 46declare i37 @llvm.fshl.i37(i37, i37, i37) 47define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { 48; SCALAR-LABEL: fshl_i37: 49; SCALAR: @ %bb.0: 50; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr} 51; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr} 52; SCALAR-NEXT: mov r4, r1 53; SCALAR-NEXT: mov r8, r0 54; SCALAR-NEXT: ldr r0, [sp, #24] 55; SCALAR-NEXT: mov r5, r3 56; SCALAR-NEXT: ldr r1, [sp, #28] 57; SCALAR-NEXT: mov r6, r2 58; SCALAR-NEXT: mov r2, #37 59; SCALAR-NEXT: mov r3, #0 60; SCALAR-NEXT: bl __aeabi_uldivmod 61; SCALAR-NEXT: lsl r1, r5, #27 62; SCALAR-NEXT: ands r12, r2, #32 63; SCALAR-NEXT: orr r1, r1, r6, lsr #5 64; SCALAR-NEXT: mov r3, r8 65; SCALAR-NEXT: and r5, r2, #31 66; SCALAR-NEXT: mov r0, #31 67; SCALAR-NEXT: movne r3, r1 68; SCALAR-NEXT: cmp r12, #0 69; SCALAR-NEXT: bic r2, r0, r2 70; SCALAR-NEXT: lslne r1, r6, #27 71; SCALAR-NEXT: movne r4, r8 72; SCALAR-NEXT: lsl r7, r3, r5 73; SCALAR-NEXT: lsr r0, r1, #1 74; SCALAR-NEXT: lsl r1, r4, r5 75; SCALAR-NEXT: lsr r3, r3, #1 76; SCALAR-NEXT: orr r0, r7, r0, lsr r2 77; SCALAR-NEXT: orr r1, r1, r3, lsr r2 78; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc} 79; 80; NEON-LABEL: fshl_i37: 81; NEON: @ %bb.0: 82; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} 83; NEON-NEXT: push {r4, r5, r6, r7, r11, lr} 84; NEON-NEXT: mov r4, r1 85; NEON-NEXT: mov r5, r0 86; NEON-NEXT: ldr r0, [sp, #24] 87; NEON-NEXT: mov r7, r3 88; NEON-NEXT: ldr r1, [sp, #28] 89; NEON-NEXT: mov r6, r2 90; NEON-NEXT: mov r2, #37 91; NEON-NEXT: mov r3, #0 92; NEON-NEXT: bl __aeabi_uldivmod 93; NEON-NEXT: mov r0, #31 94; NEON-NEXT: bic r1, r0, r2 95; NEON-NEXT: lsl r0, r7, #27 96; NEON-NEXT: ands r12, r2, #32 97; NEON-NEXT: orr r0, r0, r6, lsr #5 98; NEON-NEXT: mov r7, r5 99; NEON-NEXT: and r2, r2, #31 100; NEON-NEXT: movne r7, r0 101; NEON-NEXT: lslne r0, r6, #27 102; NEON-NEXT: cmp r12, #0 103; NEON-NEXT: lsl r3, r7, r2 104; NEON-NEXT: lsr r0, r0, #1 105; NEON-NEXT: movne r4, r5 106; NEON-NEXT: orr r0, r3, r0, lsr r1 107; NEON-NEXT: lsr r3, r7, #1 108; NEON-NEXT: lsl r2, r4, r2 109; NEON-NEXT: orr r1, r2, r3, lsr r1 110; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} 111 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) 112 ret i37 %f 113} 114 115; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 116 117declare i7 @llvm.fshl.i7(i7, i7, i7) 118define i7 @fshl_i7_const_fold() { 119; CHECK-LABEL: fshl_i7_const_fold: 120; CHECK: @ %bb.0: 121; CHECK-NEXT: mov r0, #67 122; CHECK-NEXT: bx lr 123 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) 124 ret i7 %f 125} 126 127define i8 @fshl_i8_const_fold_overshift_1() { 128; CHECK-LABEL: fshl_i8_const_fold_overshift_1: 129; CHECK: @ %bb.0: 130; CHECK-NEXT: mov r0, #128 131; CHECK-NEXT: bx lr 132 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15) 133 ret i8 %f 134} 135 136define i8 @fshl_i8_const_fold_overshift_2() { 137; CHECK-LABEL: fshl_i8_const_fold_overshift_2: 138; CHECK: @ %bb.0: 139; CHECK-NEXT: mov r0, #120 140; CHECK-NEXT: bx lr 141 %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11) 142 ret i8 %f 143} 144 145define i8 @fshl_i8_const_fold_overshift_3() { 146; CHECK-LABEL: fshl_i8_const_fold_overshift_3: 147; CHECK: @ %bb.0: 148; CHECK-NEXT: mov r0, #0 149; CHECK-NEXT: bx lr 150 %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8) 151 ret i8 %f 152} 153 154; With constant shift amount, this is 'extr'. 155 156define i32 @fshl_i32_const_shift(i32 %x, i32 %y) { 157; CHECK-LABEL: fshl_i32_const_shift: 158; CHECK: @ %bb.0: 159; CHECK-NEXT: lsl r0, r0, #9 160; CHECK-NEXT: orr r0, r0, r1, lsr #23 161; CHECK-NEXT: bx lr 162 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 163 ret i32 %f 164} 165 166; Check modulo math on shift amount. 167 168define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { 169; CHECK-LABEL: fshl_i32_const_overshift: 170; CHECK: @ %bb.0: 171; CHECK-NEXT: lsl r0, r0, #9 172; CHECK-NEXT: orr r0, r0, r1, lsr #23 173; CHECK-NEXT: bx lr 174 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) 175 ret i32 %f 176} 177 178; 64-bit should also work. 179 180define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { 181; CHECK-LABEL: fshl_i64_const_overshift: 182; CHECK: @ %bb.0: 183; CHECK-NEXT: lsl r1, r3, #9 184; CHECK-NEXT: orr r2, r1, r2, lsr #23 185; CHECK-NEXT: lsl r0, r0, #9 186; CHECK-NEXT: orr r1, r0, r3, lsr #23 187; CHECK-NEXT: mov r0, r2 188; CHECK-NEXT: bx lr 189 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) 190 ret i64 %f 191} 192 193; This should work without any node-specific logic. 194 195define i8 @fshl_i8_const_fold() { 196; CHECK-LABEL: fshl_i8_const_fold: 197; CHECK: @ %bb.0: 198; CHECK-NEXT: mov r0, #128 199; CHECK-NEXT: bx lr 200 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) 201 ret i8 %f 202} 203 204; Repeat everything for funnel shift right. 205 206; General case - all operands can be variables. 207 208define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) { 209; CHECK-LABEL: fshr_i16: 210; CHECK: @ %bb.0: 211; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16 212; CHECK-NEXT: and r1, r2, #15 213; CHECK-NEXT: lsr r0, r0, r1 214; CHECK-NEXT: bx lr 215 %f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z) 216 ret i16 %f 217} 218 219define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { 220; CHECK-LABEL: fshr_i32: 221; CHECK: @ %bb.0: 222; CHECK-NEXT: mov r3, #31 223; CHECK-NEXT: lsl r0, r0, #1 224; CHECK-NEXT: bic r3, r3, r2 225; CHECK-NEXT: and r2, r2, #31 226; CHECK-NEXT: lsl r0, r0, r3 227; CHECK-NEXT: orr r0, r0, r1, lsr r2 228; CHECK-NEXT: bx lr 229 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 230 ret i32 %f 231} 232 233; Verify that weird types are minimally supported. 234declare i37 @llvm.fshr.i37(i37, i37, i37) 235define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { 236; CHECK-LABEL: fshr_i37: 237; CHECK: @ %bb.0: 238; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} 239; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} 240; CHECK-NEXT: mov r4, r1 241; CHECK-NEXT: mov r6, r0 242; CHECK-NEXT: ldr r0, [sp, #24] 243; CHECK-NEXT: mov r5, r3 244; CHECK-NEXT: ldr r1, [sp, #28] 245; CHECK-NEXT: mov r7, r2 246; CHECK-NEXT: mov r2, #37 247; CHECK-NEXT: mov r3, #0 248; CHECK-NEXT: bl __aeabi_uldivmod 249; CHECK-NEXT: lsl r3, r5, #27 250; CHECK-NEXT: add r0, r2, #27 251; CHECK-NEXT: orr r3, r3, r7, lsr #5 252; CHECK-NEXT: mov r1, #31 253; CHECK-NEXT: ands r12, r0, #32 254; CHECK-NEXT: mov r5, r6 255; CHECK-NEXT: moveq r5, r3 256; CHECK-NEXT: bic r1, r1, r0 257; CHECK-NEXT: lsl r2, r5, #1 258; CHECK-NEXT: lsleq r3, r7, #27 259; CHECK-NEXT: cmp r12, #0 260; CHECK-NEXT: and r7, r0, #31 261; CHECK-NEXT: lsl r2, r2, r1 262; CHECK-NEXT: moveq r4, r6 263; CHECK-NEXT: orr r0, r2, r3, lsr r7 264; CHECK-NEXT: lsl r2, r4, #1 265; CHECK-NEXT: lsl r1, r2, r1 266; CHECK-NEXT: orr r1, r1, r5, lsr r7 267; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} 268 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) 269 ret i37 %f 270} 271 272; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 273 274declare i7 @llvm.fshr.i7(i7, i7, i7) 275define i7 @fshr_i7_const_fold() { 276; CHECK-LABEL: fshr_i7_const_fold: 277; CHECK: @ %bb.0: 278; CHECK-NEXT: mov r0, #31 279; CHECK-NEXT: bx lr 280 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) 281 ret i7 %f 282} 283 284define i8 @fshr_i8_const_fold_overshift_1() { 285; CHECK-LABEL: fshr_i8_const_fold_overshift_1: 286; CHECK: @ %bb.0: 287; CHECK-NEXT: mov r0, #254 288; CHECK-NEXT: bx lr 289 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15) 290 ret i8 %f 291} 292 293define i8 @fshr_i8_const_fold_overshift_2() { 294; CHECK-LABEL: fshr_i8_const_fold_overshift_2: 295; CHECK: @ %bb.0: 296; CHECK-NEXT: mov r0, #225 297; CHECK-NEXT: bx lr 298 %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) 299 ret i8 %f 300} 301 302define i8 @fshr_i8_const_fold_overshift_3() { 303; CHECK-LABEL: fshr_i8_const_fold_overshift_3: 304; CHECK: @ %bb.0: 305; CHECK-NEXT: mov r0, #255 306; CHECK-NEXT: bx lr 307 %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) 308 ret i8 %f 309} 310 311; With constant shift amount, this is 'extr'. 312 313define i32 @fshr_i32_const_shift(i32 %x, i32 %y) { 314; CHECK-LABEL: fshr_i32_const_shift: 315; CHECK: @ %bb.0: 316; CHECK-NEXT: lsl r0, r0, #23 317; CHECK-NEXT: orr r0, r0, r1, lsr #9 318; CHECK-NEXT: bx lr 319 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 320 ret i32 %f 321} 322 323; Check modulo math on shift amount. 41-32=9. 324 325define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) { 326; CHECK-LABEL: fshr_i32_const_overshift: 327; CHECK: @ %bb.0: 328; CHECK-NEXT: lsl r0, r0, #23 329; CHECK-NEXT: orr r0, r0, r1, lsr #9 330; CHECK-NEXT: bx lr 331 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) 332 ret i32 %f 333} 334 335; 64-bit should also work. 105-64 = 41. 336 337define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) { 338; CHECK-LABEL: fshr_i64_const_overshift: 339; CHECK: @ %bb.0: 340; CHECK-NEXT: lsl r2, r0, #23 341; CHECK-NEXT: lsl r1, r1, #23 342; CHECK-NEXT: orr r2, r2, r3, lsr #9 343; CHECK-NEXT: orr r1, r1, r0, lsr #9 344; CHECK-NEXT: mov r0, r2 345; CHECK-NEXT: bx lr 346 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) 347 ret i64 %f 348} 349 350; This should work without any node-specific logic. 351 352define i8 @fshr_i8_const_fold() { 353; CHECK-LABEL: fshr_i8_const_fold: 354; CHECK: @ %bb.0: 355; CHECK-NEXT: mov r0, #254 356; CHECK-NEXT: bx lr 357 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) 358 ret i8 %f 359} 360 361define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { 362; CHECK-LABEL: fshl_i32_shift_by_bitwidth: 363; CHECK: @ %bb.0: 364; CHECK-NEXT: bx lr 365 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) 366 ret i32 %f 367} 368 369define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) { 370; CHECK-LABEL: fshr_i32_shift_by_bitwidth: 371; CHECK: @ %bb.0: 372; CHECK-NEXT: mov r0, r1 373; CHECK-NEXT: bx lr 374 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) 375 ret i32 %f 376} 377 378define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 379; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: 380; CHECK: @ %bb.0: 381; CHECK-NEXT: bx lr 382 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 383 ret <4 x i32> %f 384} 385 386define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 387; SCALAR-LABEL: fshr_v4i32_shift_by_bitwidth: 388; SCALAR: @ %bb.0: 389; SCALAR-NEXT: ldm sp, {r0, r1, r2, r3} 390; SCALAR-NEXT: bx lr 391; 392; NEON-LABEL: fshr_v4i32_shift_by_bitwidth: 393; NEON: @ %bb.0: 394; NEON-NEXT: mov r0, sp 395; NEON-NEXT: vld1.64 {d16, d17}, [r0] 396; NEON-NEXT: vmov r0, r1, d16 397; NEON-NEXT: vmov r2, r3, d17 398; NEON-NEXT: bx lr 399 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 400 ret <4 x i32> %f 401} 402 403