1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines an instruction selector for the ARM target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARM.h" 15 #include "ARMBaseInstrInfo.h" 16 #include "ARMTargetMachine.h" 17 #include "MCTargetDesc/ARMAddressingModes.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/IR/CallingConv.h" 26 #include "llvm/IR/Constants.h" 27 #include "llvm/IR/DerivedTypes.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/IR/Intrinsics.h" 30 #include "llvm/IR/LLVMContext.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Target/TargetLowering.h" 35 #include "llvm/Target/TargetOptions.h" 36 37 using namespace llvm; 38 39 #define DEBUG_TYPE "arm-isel" 40 41 static cl::opt<bool> 42 DisableShifterOp("disable-shifter-op", cl::Hidden, 43 cl::desc("Disable isel of shifter-op"), 44 cl::init(false)); 45 46 //===--------------------------------------------------------------------===// 47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 48 /// instructions for SelectionDAG operations. 49 /// 50 namespace { 51 52 enum AddrMode2Type { 53 AM2_BASE, // Simple AM2 (+-imm12) 54 AM2_SHOP // Shifter-op AM2 55 }; 56 57 class ARMDAGToDAGISel : public SelectionDAGISel { 58 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 59 /// make the right decision when generating code for different targets. 60 const ARMSubtarget *Subtarget; 61 62 public: 63 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 64 : SelectionDAGISel(tm, OptLevel) {} 65 66 bool runOnMachineFunction(MachineFunction &MF) override { 67 // Reset the subtarget each time through. 68 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 69 SelectionDAGISel::runOnMachineFunction(MF); 70 return true; 71 } 72 73 StringRef getPassName() const override { return "ARM Instruction Selection"; } 74 75 void PreprocessISelDAG() override; 76 77 /// getI32Imm - Return a target constant of type i32 with the specified 78 /// value. 79 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 80 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 81 } 82 83 void Select(SDNode *N) override; 84 85 bool hasNoVMLxHazardUse(SDNode *N) const; 86 bool isShifterOpProfitable(const SDValue &Shift, 87 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 88 bool SelectRegShifterOperand(SDValue N, SDValue &A, 89 SDValue &B, SDValue &C, 90 bool CheckProfitability = true); 91 bool SelectImmShifterOperand(SDValue N, SDValue &A, 92 SDValue &B, bool CheckProfitability = true); 93 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 94 SDValue &B, SDValue &C) { 95 // Don't apply the profitability check 96 return SelectRegShifterOperand(N, A, B, C, false); 97 } 98 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 99 SDValue &B) { 100 // Don't apply the profitability check 101 return SelectImmShifterOperand(N, A, B, false); 102 } 103 104 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 105 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 106 107 AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base, 108 SDValue &Offset, SDValue &Opc); 109 bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset, 110 SDValue &Opc) { 111 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE; 112 } 113 114 bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset, 115 SDValue &Opc) { 116 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP; 117 } 118 119 bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset, 120 SDValue &Opc) { 121 SelectAddrMode2Worker(N, Base, Offset, Opc); 122 // return SelectAddrMode2ShOp(N, Base, Offset, Opc); 123 // This always matches one way or another. 124 return true; 125 } 126 127 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 128 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 129 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 130 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 131 return true; 132 } 133 134 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 135 SDValue &Offset, SDValue &Opc); 136 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 137 SDValue &Offset, SDValue &Opc); 138 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 139 SDValue &Offset, SDValue &Opc); 140 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 141 bool SelectAddrMode3(SDValue N, SDValue &Base, 142 SDValue &Offset, SDValue &Opc); 143 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 144 SDValue &Offset, SDValue &Opc); 145 bool SelectAddrMode5(SDValue N, SDValue &Base, 146 SDValue &Offset); 147 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 148 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 149 150 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 151 152 // Thumb Addressing Modes: 153 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 154 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 155 SDValue &OffImm); 156 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 157 SDValue &OffImm); 158 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 159 SDValue &OffImm); 160 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 161 SDValue &OffImm); 162 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 163 164 // Thumb 2 Addressing Modes: 165 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 166 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 167 SDValue &OffImm); 168 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 169 SDValue &OffImm); 170 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 171 SDValue &OffReg, SDValue &ShImm); 172 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 173 174 inline bool is_so_imm(unsigned Imm) const { 175 return ARM_AM::getSOImmVal(Imm) != -1; 176 } 177 178 inline bool is_so_imm_not(unsigned Imm) const { 179 return ARM_AM::getSOImmVal(~Imm) != -1; 180 } 181 182 inline bool is_t2_so_imm(unsigned Imm) const { 183 return ARM_AM::getT2SOImmVal(Imm) != -1; 184 } 185 186 inline bool is_t2_so_imm_not(unsigned Imm) const { 187 return ARM_AM::getT2SOImmVal(~Imm) != -1; 188 } 189 190 // Include the pieces autogenerated from the target description. 191 #include "ARMGenDAGISel.inc" 192 193 private: 194 void transferMemOperands(SDNode *Src, SDNode *Dst); 195 196 /// Indexed (pre/post inc/dec) load matching code for ARM. 197 bool tryARMIndexedLoad(SDNode *N); 198 bool tryT1IndexedLoad(SDNode *N); 199 bool tryT2IndexedLoad(SDNode *N); 200 201 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 202 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 203 /// loads of D registers and even subregs and odd subregs of Q registers. 204 /// For NumVecs <= 2, QOpcodes1 is not used. 205 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 206 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 207 const uint16_t *QOpcodes1); 208 209 /// SelectVST - Select NEON store intrinsics. NumVecs should 210 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 211 /// stores of D registers and even subregs and odd subregs of Q registers. 212 /// For NumVecs <= 2, QOpcodes1 is not used. 213 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 214 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 215 const uint16_t *QOpcodes1); 216 217 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 218 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 219 /// load/store of D registers and Q registers. 220 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 221 unsigned NumVecs, const uint16_t *DOpcodes, 222 const uint16_t *QOpcodes); 223 224 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 225 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 226 /// for loading D registers. (Q registers are not supported.) 227 void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, 228 const uint16_t *DOpcodes, 229 const uint16_t *QOpcodes = nullptr); 230 231 /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, 232 /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be 233 /// generated to force the table registers to be consecutive. 234 void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); 235 236 /// Try to select SBFX/UBFX instructions for ARM. 237 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 238 239 // Select special operations if node forms integer ABS pattern 240 bool tryABSOp(SDNode *N); 241 242 bool tryReadRegister(SDNode *N); 243 bool tryWriteRegister(SDNode *N); 244 245 bool tryInlineAsm(SDNode *N); 246 247 void SelectConcatVector(SDNode *N); 248 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 249 250 bool trySMLAWSMULW(SDNode *N); 251 252 void SelectCMP_SWAP(SDNode *N); 253 254 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 255 /// inline asm expressions. 256 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 257 std::vector<SDValue> &OutOps) override; 258 259 // Form pairs of consecutive R, S, D, or Q registers. 260 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 261 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 262 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 263 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 264 265 // Form sequences of 4 consecutive S, D, or Q registers. 266 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 267 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 268 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 269 270 // Get the alignment operand for a NEON VLD or VST instruction. 271 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 272 bool is64BitVector); 273 274 /// Returns the number of instructions required to materialize the given 275 /// constant in a register, or 3 if a literal pool load is needed. 276 unsigned ConstantMaterializationCost(unsigned Val) const; 277 278 /// Checks if N is a multiplication by a constant where we can extract out a 279 /// power of two from the constant so that it can be used in a shift, but only 280 /// if it simplifies the materialization of the constant. Returns true if it 281 /// is, and assigns to PowerOfTwo the power of two that should be extracted 282 /// out and to NewMulConst the new constant to be multiplied by. 283 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 284 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 285 286 /// Replace N with M in CurDAG, in a way that also ensures that M gets 287 /// selected when N would have been selected. 288 void replaceDAGValue(const SDValue &N, SDValue M); 289 }; 290 } 291 292 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 293 /// operand. If so Imm will receive the 32-bit value. 294 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 295 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 296 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 297 return true; 298 } 299 return false; 300 } 301 302 // isInt32Immediate - This method tests to see if a constant operand. 303 // If so Imm will receive the 32 bit value. 304 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 305 return isInt32Immediate(N.getNode(), Imm); 306 } 307 308 // isOpcWithIntImmediate - This method tests to see if the node is a specific 309 // opcode and that it has a immediate integer right operand. 310 // If so Imm will receive the 32 bit value. 311 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 312 return N->getOpcode() == Opc && 313 isInt32Immediate(N->getOperand(1).getNode(), Imm); 314 } 315 316 /// \brief Check whether a particular node is a constant value representable as 317 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 318 /// 319 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 320 static bool isScaledConstantInRange(SDValue Node, int Scale, 321 int RangeMin, int RangeMax, 322 int &ScaledConstant) { 323 assert(Scale > 0 && "Invalid scale!"); 324 325 // Check that this is a constant. 326 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 327 if (!C) 328 return false; 329 330 ScaledConstant = (int) C->getZExtValue(); 331 if ((ScaledConstant % Scale) != 0) 332 return false; 333 334 ScaledConstant /= Scale; 335 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 336 } 337 338 void ARMDAGToDAGISel::PreprocessISelDAG() { 339 if (!Subtarget->hasV6T2Ops()) 340 return; 341 342 bool isThumb2 = Subtarget->isThumb(); 343 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 344 E = CurDAG->allnodes_end(); I != E; ) { 345 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 346 347 if (N->getOpcode() != ISD::ADD) 348 continue; 349 350 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 351 // leading zeros, followed by consecutive set bits, followed by 1 or 2 352 // trailing zeros, e.g. 1020. 353 // Transform the expression to 354 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 355 // of trailing zeros of c2. The left shift would be folded as an shifter 356 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 357 // node (UBFX). 358 359 SDValue N0 = N->getOperand(0); 360 SDValue N1 = N->getOperand(1); 361 unsigned And_imm = 0; 362 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 363 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 364 std::swap(N0, N1); 365 } 366 if (!And_imm) 367 continue; 368 369 // Check if the AND mask is an immediate of the form: 000.....1111111100 370 unsigned TZ = countTrailingZeros(And_imm); 371 if (TZ != 1 && TZ != 2) 372 // Be conservative here. Shifter operands aren't always free. e.g. On 373 // Swift, left shifter operand of 1 / 2 for free but others are not. 374 // e.g. 375 // ubfx r3, r1, #16, #8 376 // ldr.w r3, [r0, r3, lsl #2] 377 // vs. 378 // mov.w r9, #1020 379 // and.w r2, r9, r1, lsr #14 380 // ldr r2, [r0, r2] 381 continue; 382 And_imm >>= TZ; 383 if (And_imm & (And_imm + 1)) 384 continue; 385 386 // Look for (and (srl X, c1), c2). 387 SDValue Srl = N1.getOperand(0); 388 unsigned Srl_imm = 0; 389 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 390 (Srl_imm <= 2)) 391 continue; 392 393 // Make sure first operand is not a shifter operand which would prevent 394 // folding of the left shift. 395 SDValue CPTmp0; 396 SDValue CPTmp1; 397 SDValue CPTmp2; 398 if (isThumb2) { 399 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 400 continue; 401 } else { 402 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 403 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 404 continue; 405 } 406 407 // Now make the transformation. 408 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 409 Srl.getOperand(0), 410 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 411 MVT::i32)); 412 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 413 Srl, 414 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 415 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 416 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 417 CurDAG->UpdateNodeOperands(N, N0, N1); 418 } 419 } 420 421 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 422 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 423 /// least on current ARM implementations) which should be avoidded. 424 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 425 if (OptLevel == CodeGenOpt::None) 426 return true; 427 428 if (!Subtarget->hasVMLxHazards()) 429 return true; 430 431 if (!N->hasOneUse()) 432 return false; 433 434 SDNode *Use = *N->use_begin(); 435 if (Use->getOpcode() == ISD::CopyToReg) 436 return true; 437 if (Use->isMachineOpcode()) { 438 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 439 CurDAG->getSubtarget().getInstrInfo()); 440 441 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 442 if (MCID.mayStore()) 443 return true; 444 unsigned Opcode = MCID.getOpcode(); 445 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 446 return true; 447 // vmlx feeding into another vmlx. We actually want to unfold 448 // the use later in the MLxExpansion pass. e.g. 449 // vmla 450 // vmla (stall 8 cycles) 451 // 452 // vmul (5 cycles) 453 // vadd (5 cycles) 454 // vmla 455 // This adds up to about 18 - 19 cycles. 456 // 457 // vmla 458 // vmul (stall 4 cycles) 459 // vadd adds up to about 14 cycles. 460 return TII->isFpMLxInstruction(Opcode); 461 } 462 463 return false; 464 } 465 466 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 467 ARM_AM::ShiftOpc ShOpcVal, 468 unsigned ShAmt) { 469 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 470 return true; 471 if (Shift.hasOneUse()) 472 return true; 473 // R << 2 is free. 474 return ShOpcVal == ARM_AM::lsl && 475 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 476 } 477 478 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { 479 if (Subtarget->isThumb()) { 480 if (Val <= 255) return 1; // MOV 481 if (Subtarget->hasV6T2Ops() && 482 (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1)) 483 return 1; // MOVW 484 if (Val <= 510) return 2; // MOV + ADDi8 485 if (~Val <= 255) return 2; // MOV + MVN 486 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL 487 } else { 488 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV 489 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN 490 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW 491 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs 492 } 493 if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT 494 return 3; // Literal pool load 495 } 496 497 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 498 unsigned MaxShift, 499 unsigned &PowerOfTwo, 500 SDValue &NewMulConst) const { 501 assert(N.getOpcode() == ISD::MUL); 502 assert(MaxShift > 0); 503 504 // If the multiply is used in more than one place then changing the constant 505 // will make other uses incorrect, so don't. 506 if (!N.hasOneUse()) return false; 507 // Check if the multiply is by a constant 508 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 509 if (!MulConst) return false; 510 // If the constant is used in more than one place then modifying it will mean 511 // we need to materialize two constants instead of one, which is a bad idea. 512 if (!MulConst->hasOneUse()) return false; 513 unsigned MulConstVal = MulConst->getZExtValue(); 514 if (MulConstVal == 0) return false; 515 516 // Find the largest power of 2 that MulConstVal is a multiple of 517 PowerOfTwo = MaxShift; 518 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 519 --PowerOfTwo; 520 if (PowerOfTwo == 0) return false; 521 } 522 523 // Only optimise if the new cost is better 524 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 525 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 526 unsigned OldCost = ConstantMaterializationCost(MulConstVal); 527 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal); 528 return NewCost < OldCost; 529 } 530 531 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 532 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 533 CurDAG->ReplaceAllUsesWith(N, M); 534 } 535 536 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 537 SDValue &BaseReg, 538 SDValue &Opc, 539 bool CheckProfitability) { 540 if (DisableShifterOp) 541 return false; 542 543 // If N is a multiply-by-constant and it's profitable to extract a shift and 544 // use it in a shifted operand do so. 545 if (N.getOpcode() == ISD::MUL) { 546 unsigned PowerOfTwo = 0; 547 SDValue NewMulConst; 548 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 549 HandleSDNode Handle(N); 550 replaceDAGValue(N.getOperand(1), NewMulConst); 551 BaseReg = Handle.getValue(); 552 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl, 553 PowerOfTwo), 554 SDLoc(N), MVT::i32); 555 return true; 556 } 557 } 558 559 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 560 561 // Don't match base register only case. That is matched to a separate 562 // lower complexity pattern with explicit register operand. 563 if (ShOpcVal == ARM_AM::no_shift) return false; 564 565 BaseReg = N.getOperand(0); 566 unsigned ShImmVal = 0; 567 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 568 if (!RHS) return false; 569 ShImmVal = RHS->getZExtValue() & 31; 570 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 571 SDLoc(N), MVT::i32); 572 return true; 573 } 574 575 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 576 SDValue &BaseReg, 577 SDValue &ShReg, 578 SDValue &Opc, 579 bool CheckProfitability) { 580 if (DisableShifterOp) 581 return false; 582 583 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 584 585 // Don't match base register only case. That is matched to a separate 586 // lower complexity pattern with explicit register operand. 587 if (ShOpcVal == ARM_AM::no_shift) return false; 588 589 BaseReg = N.getOperand(0); 590 unsigned ShImmVal = 0; 591 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 592 if (RHS) return false; 593 594 ShReg = N.getOperand(1); 595 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 596 return false; 597 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 598 SDLoc(N), MVT::i32); 599 return true; 600 } 601 602 603 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 604 SDValue &Base, 605 SDValue &OffImm) { 606 // Match simple R + imm12 operands. 607 608 // Base only. 609 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 610 !CurDAG->isBaseWithConstantOffset(N)) { 611 if (N.getOpcode() == ISD::FrameIndex) { 612 // Match frame index. 613 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 614 Base = CurDAG->getTargetFrameIndex( 615 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 616 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 617 return true; 618 } 619 620 if (N.getOpcode() == ARMISD::Wrapper && 621 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 622 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 623 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 624 Base = N.getOperand(0); 625 } else 626 Base = N; 627 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 628 return true; 629 } 630 631 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 632 int RHSC = (int)RHS->getSExtValue(); 633 if (N.getOpcode() == ISD::SUB) 634 RHSC = -RHSC; 635 636 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 637 Base = N.getOperand(0); 638 if (Base.getOpcode() == ISD::FrameIndex) { 639 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 640 Base = CurDAG->getTargetFrameIndex( 641 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 642 } 643 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 644 return true; 645 } 646 } 647 648 // Base only. 649 Base = N; 650 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 651 return true; 652 } 653 654 655 656 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 657 SDValue &Opc) { 658 if (N.getOpcode() == ISD::MUL && 659 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 660 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 661 // X * [3,5,9] -> X + X * [2,4,8] etc. 662 int RHSC = (int)RHS->getZExtValue(); 663 if (RHSC & 1) { 664 RHSC = RHSC & ~1; 665 ARM_AM::AddrOpc AddSub = ARM_AM::add; 666 if (RHSC < 0) { 667 AddSub = ARM_AM::sub; 668 RHSC = - RHSC; 669 } 670 if (isPowerOf2_32(RHSC)) { 671 unsigned ShAmt = Log2_32(RHSC); 672 Base = Offset = N.getOperand(0); 673 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 674 ARM_AM::lsl), 675 SDLoc(N), MVT::i32); 676 return true; 677 } 678 } 679 } 680 } 681 682 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 683 // ISD::OR that is equivalent to an ISD::ADD. 684 !CurDAG->isBaseWithConstantOffset(N)) 685 return false; 686 687 // Leave simple R +/- imm12 operands for LDRi12 688 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 689 int RHSC; 690 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 691 -0x1000+1, 0x1000, RHSC)) // 12 bits. 692 return false; 693 } 694 695 // Otherwise this is R +/- [possibly shifted] R. 696 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 697 ARM_AM::ShiftOpc ShOpcVal = 698 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 699 unsigned ShAmt = 0; 700 701 Base = N.getOperand(0); 702 Offset = N.getOperand(1); 703 704 if (ShOpcVal != ARM_AM::no_shift) { 705 // Check to see if the RHS of the shift is a constant, if not, we can't fold 706 // it. 707 if (ConstantSDNode *Sh = 708 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 709 ShAmt = Sh->getZExtValue(); 710 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 711 Offset = N.getOperand(1).getOperand(0); 712 else { 713 ShAmt = 0; 714 ShOpcVal = ARM_AM::no_shift; 715 } 716 } else { 717 ShOpcVal = ARM_AM::no_shift; 718 } 719 } 720 721 // Try matching (R shl C) + (R). 722 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 723 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 724 N.getOperand(0).hasOneUse())) { 725 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 726 if (ShOpcVal != ARM_AM::no_shift) { 727 // Check to see if the RHS of the shift is a constant, if not, we can't 728 // fold it. 729 if (ConstantSDNode *Sh = 730 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 731 ShAmt = Sh->getZExtValue(); 732 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 733 Offset = N.getOperand(0).getOperand(0); 734 Base = N.getOperand(1); 735 } else { 736 ShAmt = 0; 737 ShOpcVal = ARM_AM::no_shift; 738 } 739 } else { 740 ShOpcVal = ARM_AM::no_shift; 741 } 742 } 743 } 744 745 // If Offset is a multiply-by-constant and it's profitable to extract a shift 746 // and use it in a shifted operand do so. 747 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 748 unsigned PowerOfTwo = 0; 749 SDValue NewMulConst; 750 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 751 replaceDAGValue(Offset.getOperand(1), NewMulConst); 752 ShAmt = PowerOfTwo; 753 ShOpcVal = ARM_AM::lsl; 754 } 755 } 756 757 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 758 SDLoc(N), MVT::i32); 759 return true; 760 } 761 762 763 //----- 764 765 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, 766 SDValue &Base, 767 SDValue &Offset, 768 SDValue &Opc) { 769 if (N.getOpcode() == ISD::MUL && 770 (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) { 771 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 772 // X * [3,5,9] -> X + X * [2,4,8] etc. 773 int RHSC = (int)RHS->getZExtValue(); 774 if (RHSC & 1) { 775 RHSC = RHSC & ~1; 776 ARM_AM::AddrOpc AddSub = ARM_AM::add; 777 if (RHSC < 0) { 778 AddSub = ARM_AM::sub; 779 RHSC = - RHSC; 780 } 781 if (isPowerOf2_32(RHSC)) { 782 unsigned ShAmt = Log2_32(RHSC); 783 Base = Offset = N.getOperand(0); 784 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 785 ARM_AM::lsl), 786 SDLoc(N), MVT::i32); 787 return AM2_SHOP; 788 } 789 } 790 } 791 } 792 793 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 794 // ISD::OR that is equivalent to an ADD. 795 !CurDAG->isBaseWithConstantOffset(N)) { 796 Base = N; 797 if (N.getOpcode() == ISD::FrameIndex) { 798 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 799 Base = CurDAG->getTargetFrameIndex( 800 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 801 } else if (N.getOpcode() == ARMISD::Wrapper && 802 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 803 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 804 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 805 Base = N.getOperand(0); 806 } 807 Offset = CurDAG->getRegister(0, MVT::i32); 808 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, 809 ARM_AM::no_shift), 810 SDLoc(N), MVT::i32); 811 return AM2_BASE; 812 } 813 814 // Match simple R +/- imm12 operands. 815 if (N.getOpcode() != ISD::SUB) { 816 int RHSC; 817 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 818 -0x1000+1, 0x1000, RHSC)) { // 12 bits. 819 Base = N.getOperand(0); 820 if (Base.getOpcode() == ISD::FrameIndex) { 821 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 822 Base = CurDAG->getTargetFrameIndex( 823 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 824 } 825 Offset = CurDAG->getRegister(0, MVT::i32); 826 827 ARM_AM::AddrOpc AddSub = ARM_AM::add; 828 if (RHSC < 0) { 829 AddSub = ARM_AM::sub; 830 RHSC = - RHSC; 831 } 832 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC, 833 ARM_AM::no_shift), 834 SDLoc(N), MVT::i32); 835 return AM2_BASE; 836 } 837 } 838 839 if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) { 840 // Compute R +/- (R << N) and reuse it. 841 Base = N; 842 Offset = CurDAG->getRegister(0, MVT::i32); 843 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, 844 ARM_AM::no_shift), 845 SDLoc(N), MVT::i32); 846 return AM2_BASE; 847 } 848 849 // Otherwise this is R +/- [possibly shifted] R. 850 ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub; 851 ARM_AM::ShiftOpc ShOpcVal = 852 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 853 unsigned ShAmt = 0; 854 855 Base = N.getOperand(0); 856 Offset = N.getOperand(1); 857 858 if (ShOpcVal != ARM_AM::no_shift) { 859 // Check to see if the RHS of the shift is a constant, if not, we can't fold 860 // it. 861 if (ConstantSDNode *Sh = 862 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 863 ShAmt = Sh->getZExtValue(); 864 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 865 Offset = N.getOperand(1).getOperand(0); 866 else { 867 ShAmt = 0; 868 ShOpcVal = ARM_AM::no_shift; 869 } 870 } else { 871 ShOpcVal = ARM_AM::no_shift; 872 } 873 } 874 875 // Try matching (R shl C) + (R). 876 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 877 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 878 N.getOperand(0).hasOneUse())) { 879 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 880 if (ShOpcVal != ARM_AM::no_shift) { 881 // Check to see if the RHS of the shift is a constant, if not, we can't 882 // fold it. 883 if (ConstantSDNode *Sh = 884 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 885 ShAmt = Sh->getZExtValue(); 886 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 887 Offset = N.getOperand(0).getOperand(0); 888 Base = N.getOperand(1); 889 } else { 890 ShAmt = 0; 891 ShOpcVal = ARM_AM::no_shift; 892 } 893 } else { 894 ShOpcVal = ARM_AM::no_shift; 895 } 896 } 897 } 898 899 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 900 SDLoc(N), MVT::i32); 901 return AM2_SHOP; 902 } 903 904 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 905 SDValue &Offset, SDValue &Opc) { 906 unsigned Opcode = Op->getOpcode(); 907 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 908 ? cast<LoadSDNode>(Op)->getAddressingMode() 909 : cast<StoreSDNode>(Op)->getAddressingMode(); 910 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 911 ? ARM_AM::add : ARM_AM::sub; 912 int Val; 913 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 914 return false; 915 916 Offset = N; 917 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 918 unsigned ShAmt = 0; 919 if (ShOpcVal != ARM_AM::no_shift) { 920 // Check to see if the RHS of the shift is a constant, if not, we can't fold 921 // it. 922 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 923 ShAmt = Sh->getZExtValue(); 924 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 925 Offset = N.getOperand(0); 926 else { 927 ShAmt = 0; 928 ShOpcVal = ARM_AM::no_shift; 929 } 930 } else { 931 ShOpcVal = ARM_AM::no_shift; 932 } 933 } 934 935 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 936 SDLoc(N), MVT::i32); 937 return true; 938 } 939 940 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 941 SDValue &Offset, SDValue &Opc) { 942 unsigned Opcode = Op->getOpcode(); 943 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 944 ? cast<LoadSDNode>(Op)->getAddressingMode() 945 : cast<StoreSDNode>(Op)->getAddressingMode(); 946 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 947 ? ARM_AM::add : ARM_AM::sub; 948 int Val; 949 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 950 if (AddSub == ARM_AM::sub) Val *= -1; 951 Offset = CurDAG->getRegister(0, MVT::i32); 952 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 953 return true; 954 } 955 956 return false; 957 } 958 959 960 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 961 SDValue &Offset, SDValue &Opc) { 962 unsigned Opcode = Op->getOpcode(); 963 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 964 ? cast<LoadSDNode>(Op)->getAddressingMode() 965 : cast<StoreSDNode>(Op)->getAddressingMode(); 966 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 967 ? ARM_AM::add : ARM_AM::sub; 968 int Val; 969 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 970 Offset = CurDAG->getRegister(0, MVT::i32); 971 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 972 ARM_AM::no_shift), 973 SDLoc(Op), MVT::i32); 974 return true; 975 } 976 977 return false; 978 } 979 980 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 981 Base = N; 982 return true; 983 } 984 985 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 986 SDValue &Base, SDValue &Offset, 987 SDValue &Opc) { 988 if (N.getOpcode() == ISD::SUB) { 989 // X - C is canonicalize to X + -C, no need to handle it here. 990 Base = N.getOperand(0); 991 Offset = N.getOperand(1); 992 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 993 MVT::i32); 994 return true; 995 } 996 997 if (!CurDAG->isBaseWithConstantOffset(N)) { 998 Base = N; 999 if (N.getOpcode() == ISD::FrameIndex) { 1000 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1001 Base = CurDAG->getTargetFrameIndex( 1002 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1003 } 1004 Offset = CurDAG->getRegister(0, MVT::i32); 1005 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 1006 MVT::i32); 1007 return true; 1008 } 1009 1010 // If the RHS is +/- imm8, fold into addr mode. 1011 int RHSC; 1012 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 1013 -256 + 1, 256, RHSC)) { // 8 bits. 1014 Base = N.getOperand(0); 1015 if (Base.getOpcode() == ISD::FrameIndex) { 1016 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1017 Base = CurDAG->getTargetFrameIndex( 1018 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1019 } 1020 Offset = CurDAG->getRegister(0, MVT::i32); 1021 1022 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1023 if (RHSC < 0) { 1024 AddSub = ARM_AM::sub; 1025 RHSC = -RHSC; 1026 } 1027 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 1028 MVT::i32); 1029 return true; 1030 } 1031 1032 Base = N.getOperand(0); 1033 Offset = N.getOperand(1); 1034 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 1035 MVT::i32); 1036 return true; 1037 } 1038 1039 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 1040 SDValue &Offset, SDValue &Opc) { 1041 unsigned Opcode = Op->getOpcode(); 1042 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1043 ? cast<LoadSDNode>(Op)->getAddressingMode() 1044 : cast<StoreSDNode>(Op)->getAddressingMode(); 1045 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 1046 ? ARM_AM::add : ARM_AM::sub; 1047 int Val; 1048 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 1049 Offset = CurDAG->getRegister(0, MVT::i32); 1050 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 1051 MVT::i32); 1052 return true; 1053 } 1054 1055 Offset = N; 1056 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 1057 MVT::i32); 1058 return true; 1059 } 1060 1061 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1062 SDValue &Base, SDValue &Offset) { 1063 if (!CurDAG->isBaseWithConstantOffset(N)) { 1064 Base = N; 1065 if (N.getOpcode() == ISD::FrameIndex) { 1066 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1067 Base = CurDAG->getTargetFrameIndex( 1068 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1069 } else if (N.getOpcode() == ARMISD::Wrapper && 1070 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1071 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1072 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1073 Base = N.getOperand(0); 1074 } 1075 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1076 SDLoc(N), MVT::i32); 1077 return true; 1078 } 1079 1080 // If the RHS is +/- imm8, fold into addr mode. 1081 int RHSC; 1082 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 1083 -256 + 1, 256, RHSC)) { 1084 Base = N.getOperand(0); 1085 if (Base.getOpcode() == ISD::FrameIndex) { 1086 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1087 Base = CurDAG->getTargetFrameIndex( 1088 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1089 } 1090 1091 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1092 if (RHSC < 0) { 1093 AddSub = ARM_AM::sub; 1094 RHSC = -RHSC; 1095 } 1096 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1097 SDLoc(N), MVT::i32); 1098 return true; 1099 } 1100 1101 Base = N; 1102 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1103 SDLoc(N), MVT::i32); 1104 return true; 1105 } 1106 1107 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1108 SDValue &Align) { 1109 Addr = N; 1110 1111 unsigned Alignment = 0; 1112 1113 MemSDNode *MemN = cast<MemSDNode>(Parent); 1114 1115 if (isa<LSBaseSDNode>(MemN) || 1116 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1117 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1118 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1119 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1120 // The maximum alignment is equal to the memory size being referenced. 1121 unsigned MMOAlign = MemN->getAlignment(); 1122 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1123 if (MMOAlign >= MemSize && MemSize > 1) 1124 Alignment = MemSize; 1125 } else { 1126 // All other uses of addrmode6 are for intrinsics. For now just record 1127 // the raw alignment value; it will be refined later based on the legal 1128 // alignment operands for the intrinsic. 1129 Alignment = MemN->getAlignment(); 1130 } 1131 1132 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1133 return true; 1134 } 1135 1136 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1137 SDValue &Offset) { 1138 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1139 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1140 if (AM != ISD::POST_INC) 1141 return false; 1142 Offset = N; 1143 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1144 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1145 Offset = CurDAG->getRegister(0, MVT::i32); 1146 } 1147 return true; 1148 } 1149 1150 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1151 SDValue &Offset, SDValue &Label) { 1152 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1153 Offset = N.getOperand(0); 1154 SDValue N1 = N.getOperand(1); 1155 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1156 SDLoc(N), MVT::i32); 1157 return true; 1158 } 1159 1160 return false; 1161 } 1162 1163 1164 //===----------------------------------------------------------------------===// 1165 // Thumb Addressing Modes 1166 //===----------------------------------------------------------------------===// 1167 1168 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, 1169 SDValue &Base, SDValue &Offset){ 1170 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1171 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1172 if (!NC || !NC->isNullValue()) 1173 return false; 1174 1175 Base = Offset = N; 1176 return true; 1177 } 1178 1179 Base = N.getOperand(0); 1180 Offset = N.getOperand(1); 1181 return true; 1182 } 1183 1184 bool 1185 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1186 SDValue &Base, SDValue &OffImm) { 1187 if (!CurDAG->isBaseWithConstantOffset(N)) { 1188 if (N.getOpcode() == ISD::ADD) { 1189 return false; // We want to select register offset instead 1190 } else if (N.getOpcode() == ARMISD::Wrapper && 1191 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1192 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1193 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1194 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1195 Base = N.getOperand(0); 1196 } else { 1197 Base = N; 1198 } 1199 1200 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1201 return true; 1202 } 1203 1204 // If the RHS is + imm5 * scale, fold into addr mode. 1205 int RHSC; 1206 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1207 Base = N.getOperand(0); 1208 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1209 return true; 1210 } 1211 1212 // Offset is too large, so use register offset instead. 1213 return false; 1214 } 1215 1216 bool 1217 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1218 SDValue &OffImm) { 1219 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1220 } 1221 1222 bool 1223 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1224 SDValue &OffImm) { 1225 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1226 } 1227 1228 bool 1229 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1230 SDValue &OffImm) { 1231 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1232 } 1233 1234 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1235 SDValue &Base, SDValue &OffImm) { 1236 if (N.getOpcode() == ISD::FrameIndex) { 1237 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1238 // Only multiples of 4 are allowed for the offset, so the frame object 1239 // alignment must be at least 4. 1240 MachineFrameInfo &MFI = MF->getFrameInfo(); 1241 if (MFI.getObjectAlignment(FI) < 4) 1242 MFI.setObjectAlignment(FI, 4); 1243 Base = CurDAG->getTargetFrameIndex( 1244 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1245 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1246 return true; 1247 } 1248 1249 if (!CurDAG->isBaseWithConstantOffset(N)) 1250 return false; 1251 1252 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); 1253 if (N.getOperand(0).getOpcode() == ISD::FrameIndex || 1254 (LHSR && LHSR->getReg() == ARM::SP)) { 1255 // If the RHS is + imm8 * scale, fold into addr mode. 1256 int RHSC; 1257 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1258 Base = N.getOperand(0); 1259 if (Base.getOpcode() == ISD::FrameIndex) { 1260 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1261 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1262 // indexed by the LHS must be 4-byte aligned. 1263 MachineFrameInfo &MFI = MF->getFrameInfo(); 1264 if (MFI.getObjectAlignment(FI) < 4) 1265 MFI.setObjectAlignment(FI, 4); 1266 Base = CurDAG->getTargetFrameIndex( 1267 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1268 } 1269 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1270 return true; 1271 } 1272 } 1273 1274 return false; 1275 } 1276 1277 1278 //===----------------------------------------------------------------------===// 1279 // Thumb 2 Addressing Modes 1280 //===----------------------------------------------------------------------===// 1281 1282 1283 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1284 SDValue &Base, SDValue &OffImm) { 1285 // Match simple R + imm12 operands. 1286 1287 // Base only. 1288 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1289 !CurDAG->isBaseWithConstantOffset(N)) { 1290 if (N.getOpcode() == ISD::FrameIndex) { 1291 // Match frame index. 1292 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1293 Base = CurDAG->getTargetFrameIndex( 1294 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1295 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1296 return true; 1297 } 1298 1299 if (N.getOpcode() == ARMISD::Wrapper && 1300 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1301 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1302 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1303 Base = N.getOperand(0); 1304 if (Base.getOpcode() == ISD::TargetConstantPool) 1305 return false; // We want to select t2LDRpci instead. 1306 } else 1307 Base = N; 1308 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1309 return true; 1310 } 1311 1312 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1313 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1314 // Let t2LDRi8 handle (R - imm8). 1315 return false; 1316 1317 int RHSC = (int)RHS->getZExtValue(); 1318 if (N.getOpcode() == ISD::SUB) 1319 RHSC = -RHSC; 1320 1321 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1322 Base = N.getOperand(0); 1323 if (Base.getOpcode() == ISD::FrameIndex) { 1324 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1325 Base = CurDAG->getTargetFrameIndex( 1326 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1327 } 1328 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1329 return true; 1330 } 1331 } 1332 1333 // Base only. 1334 Base = N; 1335 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1336 return true; 1337 } 1338 1339 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1340 SDValue &Base, SDValue &OffImm) { 1341 // Match simple R - imm8 operands. 1342 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1343 !CurDAG->isBaseWithConstantOffset(N)) 1344 return false; 1345 1346 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1347 int RHSC = (int)RHS->getSExtValue(); 1348 if (N.getOpcode() == ISD::SUB) 1349 RHSC = -RHSC; 1350 1351 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1352 Base = N.getOperand(0); 1353 if (Base.getOpcode() == ISD::FrameIndex) { 1354 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1355 Base = CurDAG->getTargetFrameIndex( 1356 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1357 } 1358 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1359 return true; 1360 } 1361 } 1362 1363 return false; 1364 } 1365 1366 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1367 SDValue &OffImm){ 1368 unsigned Opcode = Op->getOpcode(); 1369 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1370 ? cast<LoadSDNode>(Op)->getAddressingMode() 1371 : cast<StoreSDNode>(Op)->getAddressingMode(); 1372 int RHSC; 1373 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1374 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1375 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1376 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1377 return true; 1378 } 1379 1380 return false; 1381 } 1382 1383 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1384 SDValue &Base, 1385 SDValue &OffReg, SDValue &ShImm) { 1386 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1387 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1388 return false; 1389 1390 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1391 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1392 int RHSC = (int)RHS->getZExtValue(); 1393 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1394 return false; 1395 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1396 return false; 1397 } 1398 1399 // Look for (R + R) or (R + (R << [1,2,3])). 1400 unsigned ShAmt = 0; 1401 Base = N.getOperand(0); 1402 OffReg = N.getOperand(1); 1403 1404 // Swap if it is ((R << c) + R). 1405 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1406 if (ShOpcVal != ARM_AM::lsl) { 1407 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1408 if (ShOpcVal == ARM_AM::lsl) 1409 std::swap(Base, OffReg); 1410 } 1411 1412 if (ShOpcVal == ARM_AM::lsl) { 1413 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1414 // it. 1415 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1416 ShAmt = Sh->getZExtValue(); 1417 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1418 OffReg = OffReg.getOperand(0); 1419 else { 1420 ShAmt = 0; 1421 } 1422 } 1423 } 1424 1425 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1426 // and use it in a shifted operand do so. 1427 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1428 unsigned PowerOfTwo = 0; 1429 SDValue NewMulConst; 1430 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1431 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1432 ShAmt = PowerOfTwo; 1433 } 1434 } 1435 1436 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1437 1438 return true; 1439 } 1440 1441 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1442 SDValue &OffImm) { 1443 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1444 // instructions. 1445 Base = N; 1446 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1447 1448 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1449 return true; 1450 1451 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1452 if (!RHS) 1453 return true; 1454 1455 uint32_t RHSC = (int)RHS->getZExtValue(); 1456 if (RHSC > 1020 || RHSC % 4 != 0) 1457 return true; 1458 1459 Base = N.getOperand(0); 1460 if (Base.getOpcode() == ISD::FrameIndex) { 1461 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1462 Base = CurDAG->getTargetFrameIndex( 1463 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1464 } 1465 1466 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1467 return true; 1468 } 1469 1470 //===--------------------------------------------------------------------===// 1471 1472 /// getAL - Returns a ARMCC::AL immediate node. 1473 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1474 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1475 } 1476 1477 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1478 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1479 MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); 1480 cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); 1481 } 1482 1483 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1484 LoadSDNode *LD = cast<LoadSDNode>(N); 1485 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1486 if (AM == ISD::UNINDEXED) 1487 return false; 1488 1489 EVT LoadedVT = LD->getMemoryVT(); 1490 SDValue Offset, AMOpc; 1491 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1492 unsigned Opcode = 0; 1493 bool Match = false; 1494 if (LoadedVT == MVT::i32 && isPre && 1495 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1496 Opcode = ARM::LDR_PRE_IMM; 1497 Match = true; 1498 } else if (LoadedVT == MVT::i32 && !isPre && 1499 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1500 Opcode = ARM::LDR_POST_IMM; 1501 Match = true; 1502 } else if (LoadedVT == MVT::i32 && 1503 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1504 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1505 Match = true; 1506 1507 } else if (LoadedVT == MVT::i16 && 1508 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1509 Match = true; 1510 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1511 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1512 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1513 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1514 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1515 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1516 Match = true; 1517 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1518 } 1519 } else { 1520 if (isPre && 1521 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1522 Match = true; 1523 Opcode = ARM::LDRB_PRE_IMM; 1524 } else if (!isPre && 1525 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1526 Match = true; 1527 Opcode = ARM::LDRB_POST_IMM; 1528 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1529 Match = true; 1530 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1531 } 1532 } 1533 } 1534 1535 if (Match) { 1536 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1537 SDValue Chain = LD->getChain(); 1538 SDValue Base = LD->getBasePtr(); 1539 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1540 CurDAG->getRegister(0, MVT::i32), Chain }; 1541 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1542 MVT::Other, Ops); 1543 transferMemOperands(N, New); 1544 ReplaceNode(N, New); 1545 return true; 1546 } else { 1547 SDValue Chain = LD->getChain(); 1548 SDValue Base = LD->getBasePtr(); 1549 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1550 CurDAG->getRegister(0, MVT::i32), Chain }; 1551 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1552 MVT::Other, Ops); 1553 transferMemOperands(N, New); 1554 ReplaceNode(N, New); 1555 return true; 1556 } 1557 } 1558 1559 return false; 1560 } 1561 1562 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1563 LoadSDNode *LD = cast<LoadSDNode>(N); 1564 EVT LoadedVT = LD->getMemoryVT(); 1565 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1566 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1567 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1568 return false; 1569 1570 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1571 if (!COffs || COffs->getZExtValue() != 4) 1572 return false; 1573 1574 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1575 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1576 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1577 // ISel. 1578 SDValue Chain = LD->getChain(); 1579 SDValue Base = LD->getBasePtr(); 1580 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1581 CurDAG->getRegister(0, MVT::i32), Chain }; 1582 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1583 MVT::i32, MVT::Other, Ops); 1584 transferMemOperands(N, New); 1585 ReplaceNode(N, New); 1586 return true; 1587 } 1588 1589 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1590 LoadSDNode *LD = cast<LoadSDNode>(N); 1591 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1592 if (AM == ISD::UNINDEXED) 1593 return false; 1594 1595 EVT LoadedVT = LD->getMemoryVT(); 1596 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1597 SDValue Offset; 1598 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1599 unsigned Opcode = 0; 1600 bool Match = false; 1601 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1602 switch (LoadedVT.getSimpleVT().SimpleTy) { 1603 case MVT::i32: 1604 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1605 break; 1606 case MVT::i16: 1607 if (isSExtLd) 1608 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1609 else 1610 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1611 break; 1612 case MVT::i8: 1613 case MVT::i1: 1614 if (isSExtLd) 1615 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1616 else 1617 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1618 break; 1619 default: 1620 return false; 1621 } 1622 Match = true; 1623 } 1624 1625 if (Match) { 1626 SDValue Chain = LD->getChain(); 1627 SDValue Base = LD->getBasePtr(); 1628 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1629 CurDAG->getRegister(0, MVT::i32), Chain }; 1630 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1631 MVT::Other, Ops); 1632 transferMemOperands(N, New); 1633 ReplaceNode(N, New); 1634 return true; 1635 } 1636 1637 return false; 1638 } 1639 1640 /// \brief Form a GPRPair pseudo register from a pair of GPR regs. 1641 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1642 SDLoc dl(V0.getNode()); 1643 SDValue RegClass = 1644 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1645 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1646 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1647 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1648 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1649 } 1650 1651 /// \brief Form a D register from a pair of S registers. 1652 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1653 SDLoc dl(V0.getNode()); 1654 SDValue RegClass = 1655 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1656 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1657 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1658 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1659 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1660 } 1661 1662 /// \brief Form a quad register from a pair of D registers. 1663 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1664 SDLoc dl(V0.getNode()); 1665 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1666 MVT::i32); 1667 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1668 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1669 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1670 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1671 } 1672 1673 /// \brief Form 4 consecutive D registers from a pair of Q registers. 1674 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1675 SDLoc dl(V0.getNode()); 1676 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1677 MVT::i32); 1678 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1679 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1680 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1681 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1682 } 1683 1684 /// \brief Form 4 consecutive S registers. 1685 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1686 SDValue V2, SDValue V3) { 1687 SDLoc dl(V0.getNode()); 1688 SDValue RegClass = 1689 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1690 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1691 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1692 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1693 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1694 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1695 V2, SubReg2, V3, SubReg3 }; 1696 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1697 } 1698 1699 /// \brief Form 4 consecutive D registers. 1700 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1701 SDValue V2, SDValue V3) { 1702 SDLoc dl(V0.getNode()); 1703 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1704 MVT::i32); 1705 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1706 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1707 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1708 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1709 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1710 V2, SubReg2, V3, SubReg3 }; 1711 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1712 } 1713 1714 /// \brief Form 4 consecutive Q registers. 1715 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1716 SDValue V2, SDValue V3) { 1717 SDLoc dl(V0.getNode()); 1718 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1719 MVT::i32); 1720 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1721 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1722 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1723 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1724 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1725 V2, SubReg2, V3, SubReg3 }; 1726 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1727 } 1728 1729 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1730 /// of a NEON VLD or VST instruction. The supported values depend on the 1731 /// number of registers being loaded. 1732 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1733 unsigned NumVecs, bool is64BitVector) { 1734 unsigned NumRegs = NumVecs; 1735 if (!is64BitVector && NumVecs < 3) 1736 NumRegs *= 2; 1737 1738 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1739 if (Alignment >= 32 && NumRegs == 4) 1740 Alignment = 32; 1741 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1742 Alignment = 16; 1743 else if (Alignment >= 8) 1744 Alignment = 8; 1745 else 1746 Alignment = 0; 1747 1748 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1749 } 1750 1751 static bool isVLDfixed(unsigned Opc) 1752 { 1753 switch (Opc) { 1754 default: return false; 1755 case ARM::VLD1d8wb_fixed : return true; 1756 case ARM::VLD1d16wb_fixed : return true; 1757 case ARM::VLD1d64Qwb_fixed : return true; 1758 case ARM::VLD1d32wb_fixed : return true; 1759 case ARM::VLD1d64wb_fixed : return true; 1760 case ARM::VLD1d64TPseudoWB_fixed : return true; 1761 case ARM::VLD1d64QPseudoWB_fixed : return true; 1762 case ARM::VLD1q8wb_fixed : return true; 1763 case ARM::VLD1q16wb_fixed : return true; 1764 case ARM::VLD1q32wb_fixed : return true; 1765 case ARM::VLD1q64wb_fixed : return true; 1766 case ARM::VLD1DUPd8wb_fixed : return true; 1767 case ARM::VLD1DUPd16wb_fixed : return true; 1768 case ARM::VLD1DUPd32wb_fixed : return true; 1769 case ARM::VLD1DUPq8wb_fixed : return true; 1770 case ARM::VLD1DUPq16wb_fixed : return true; 1771 case ARM::VLD1DUPq32wb_fixed : return true; 1772 case ARM::VLD2d8wb_fixed : return true; 1773 case ARM::VLD2d16wb_fixed : return true; 1774 case ARM::VLD2d32wb_fixed : return true; 1775 case ARM::VLD2q8PseudoWB_fixed : return true; 1776 case ARM::VLD2q16PseudoWB_fixed : return true; 1777 case ARM::VLD2q32PseudoWB_fixed : return true; 1778 case ARM::VLD2DUPd8wb_fixed : return true; 1779 case ARM::VLD2DUPd16wb_fixed : return true; 1780 case ARM::VLD2DUPd32wb_fixed : return true; 1781 } 1782 } 1783 1784 static bool isVSTfixed(unsigned Opc) 1785 { 1786 switch (Opc) { 1787 default: return false; 1788 case ARM::VST1d8wb_fixed : return true; 1789 case ARM::VST1d16wb_fixed : return true; 1790 case ARM::VST1d32wb_fixed : return true; 1791 case ARM::VST1d64wb_fixed : return true; 1792 case ARM::VST1q8wb_fixed : return true; 1793 case ARM::VST1q16wb_fixed : return true; 1794 case ARM::VST1q32wb_fixed : return true; 1795 case ARM::VST1q64wb_fixed : return true; 1796 case ARM::VST1d64TPseudoWB_fixed : return true; 1797 case ARM::VST1d64QPseudoWB_fixed : return true; 1798 case ARM::VST2d8wb_fixed : return true; 1799 case ARM::VST2d16wb_fixed : return true; 1800 case ARM::VST2d32wb_fixed : return true; 1801 case ARM::VST2q8PseudoWB_fixed : return true; 1802 case ARM::VST2q16PseudoWB_fixed : return true; 1803 case ARM::VST2q32PseudoWB_fixed : return true; 1804 } 1805 } 1806 1807 // Get the register stride update opcode of a VLD/VST instruction that 1808 // is otherwise equivalent to the given fixed stride updating instruction. 1809 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1810 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 1811 && "Incorrect fixed stride updating instruction."); 1812 switch (Opc) { 1813 default: break; 1814 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1815 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 1816 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 1817 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 1818 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 1819 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 1820 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 1821 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 1822 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 1823 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 1824 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 1825 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 1826 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 1827 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 1828 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 1829 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 1830 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 1831 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 1832 1833 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 1834 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 1835 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 1836 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 1837 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 1838 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 1839 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 1840 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 1841 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 1842 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 1843 1844 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 1845 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 1846 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 1847 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 1848 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 1849 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 1850 1851 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 1852 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 1853 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 1854 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 1855 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 1856 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 1857 1858 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 1859 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 1860 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 1861 } 1862 return Opc; // If not one we handle, return it unchanged. 1863 } 1864 1865 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 1866 const uint16_t *DOpcodes, 1867 const uint16_t *QOpcodes0, 1868 const uint16_t *QOpcodes1) { 1869 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 1870 SDLoc dl(N); 1871 1872 SDValue MemAddr, Align; 1873 unsigned AddrOpIdx = isUpdating ? 1 : 2; 1874 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1875 return; 1876 1877 SDValue Chain = N->getOperand(0); 1878 EVT VT = N->getValueType(0); 1879 bool is64BitVector = VT.is64BitVector(); 1880 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 1881 1882 unsigned OpcodeIndex; 1883 switch (VT.getSimpleVT().SimpleTy) { 1884 default: llvm_unreachable("unhandled vld type"); 1885 // Double-register operations: 1886 case MVT::v8i8: OpcodeIndex = 0; break; 1887 case MVT::v4i16: OpcodeIndex = 1; break; 1888 case MVT::v2f32: 1889 case MVT::v2i32: OpcodeIndex = 2; break; 1890 case MVT::v1i64: OpcodeIndex = 3; break; 1891 // Quad-register operations: 1892 case MVT::v16i8: OpcodeIndex = 0; break; 1893 case MVT::v8i16: OpcodeIndex = 1; break; 1894 case MVT::v4f32: 1895 case MVT::v4i32: OpcodeIndex = 2; break; 1896 case MVT::v2f64: 1897 case MVT::v2i64: OpcodeIndex = 3; 1898 assert(NumVecs == 1 && "v2i64 type only supported for VLD1"); 1899 break; 1900 } 1901 1902 EVT ResTy; 1903 if (NumVecs == 1) 1904 ResTy = VT; 1905 else { 1906 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 1907 if (!is64BitVector) 1908 ResTyElts *= 2; 1909 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 1910 } 1911 std::vector<EVT> ResTys; 1912 ResTys.push_back(ResTy); 1913 if (isUpdating) 1914 ResTys.push_back(MVT::i32); 1915 ResTys.push_back(MVT::Other); 1916 1917 SDValue Pred = getAL(CurDAG, dl); 1918 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1919 SDNode *VLd; 1920 SmallVector<SDValue, 7> Ops; 1921 1922 // Double registers and VLD1/VLD2 quad registers are directly supported. 1923 if (is64BitVector || NumVecs <= 2) { 1924 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 1925 QOpcodes0[OpcodeIndex]); 1926 Ops.push_back(MemAddr); 1927 Ops.push_back(Align); 1928 if (isUpdating) { 1929 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1930 // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 1931 // case entirely when the rest are updated to that form, too. 1932 if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode())) 1933 Opc = getVLDSTRegisterUpdateOpcode(Opc); 1934 // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 1935 // check for that explicitly too. Horribly hacky, but temporary. 1936 if ((NumVecs > 2 && !isVLDfixed(Opc)) || 1937 !isa<ConstantSDNode>(Inc.getNode())) 1938 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); 1939 } 1940 Ops.push_back(Pred); 1941 Ops.push_back(Reg0); 1942 Ops.push_back(Chain); 1943 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1944 1945 } else { 1946 // Otherwise, quad registers are loaded with two separate instructions, 1947 // where one loads the even registers and the other loads the odd registers. 1948 EVT AddrTy = MemAddr.getValueType(); 1949 1950 // Load the even subregs. This is always an updating load, so that it 1951 // provides the address to the second load for the odd subregs. 1952 SDValue ImplDef = 1953 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 1954 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 1955 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 1956 ResTy, AddrTy, MVT::Other, OpsA); 1957 Chain = SDValue(VLdA, 2); 1958 1959 // Load the odd subregs. 1960 Ops.push_back(SDValue(VLdA, 1)); 1961 Ops.push_back(Align); 1962 if (isUpdating) { 1963 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1964 assert(isa<ConstantSDNode>(Inc.getNode()) && 1965 "only constant post-increment update allowed for VLD3/4"); 1966 (void)Inc; 1967 Ops.push_back(Reg0); 1968 } 1969 Ops.push_back(SDValue(VLdA, 0)); 1970 Ops.push_back(Pred); 1971 Ops.push_back(Reg0); 1972 Ops.push_back(Chain); 1973 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 1974 } 1975 1976 // Transfer memoperands. 1977 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1978 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1979 cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1); 1980 1981 if (NumVecs == 1) { 1982 ReplaceNode(N, VLd); 1983 return; 1984 } 1985 1986 // Extract out the subregisters. 1987 SDValue SuperReg = SDValue(VLd, 0); 1988 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 1989 ARM::qsub_3 == ARM::qsub_0 + 3, 1990 "Unexpected subreg numbering"); 1991 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 1992 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 1993 ReplaceUses(SDValue(N, Vec), 1994 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 1995 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 1996 if (isUpdating) 1997 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 1998 CurDAG->RemoveDeadNode(N); 1999 } 2000 2001 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2002 const uint16_t *DOpcodes, 2003 const uint16_t *QOpcodes0, 2004 const uint16_t *QOpcodes1) { 2005 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2006 SDLoc dl(N); 2007 2008 SDValue MemAddr, Align; 2009 unsigned AddrOpIdx = isUpdating ? 1 : 2; 2010 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2011 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2012 return; 2013 2014 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2015 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2016 2017 SDValue Chain = N->getOperand(0); 2018 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2019 bool is64BitVector = VT.is64BitVector(); 2020 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2021 2022 unsigned OpcodeIndex; 2023 switch (VT.getSimpleVT().SimpleTy) { 2024 default: llvm_unreachable("unhandled vst type"); 2025 // Double-register operations: 2026 case MVT::v8i8: OpcodeIndex = 0; break; 2027 case MVT::v4i16: OpcodeIndex = 1; break; 2028 case MVT::v2f32: 2029 case MVT::v2i32: OpcodeIndex = 2; break; 2030 case MVT::v1i64: OpcodeIndex = 3; break; 2031 // Quad-register operations: 2032 case MVT::v16i8: OpcodeIndex = 0; break; 2033 case MVT::v8i16: OpcodeIndex = 1; break; 2034 case MVT::v4f32: 2035 case MVT::v4i32: OpcodeIndex = 2; break; 2036 case MVT::v2f64: 2037 case MVT::v2i64: OpcodeIndex = 3; 2038 assert(NumVecs == 1 && "v2i64 type only supported for VST1"); 2039 break; 2040 } 2041 2042 std::vector<EVT> ResTys; 2043 if (isUpdating) 2044 ResTys.push_back(MVT::i32); 2045 ResTys.push_back(MVT::Other); 2046 2047 SDValue Pred = getAL(CurDAG, dl); 2048 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2049 SmallVector<SDValue, 7> Ops; 2050 2051 // Double registers and VST1/VST2 quad registers are directly supported. 2052 if (is64BitVector || NumVecs <= 2) { 2053 SDValue SrcReg; 2054 if (NumVecs == 1) { 2055 SrcReg = N->getOperand(Vec0Idx); 2056 } else if (is64BitVector) { 2057 // Form a REG_SEQUENCE to force register allocation. 2058 SDValue V0 = N->getOperand(Vec0Idx + 0); 2059 SDValue V1 = N->getOperand(Vec0Idx + 1); 2060 if (NumVecs == 2) 2061 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2062 else { 2063 SDValue V2 = N->getOperand(Vec0Idx + 2); 2064 // If it's a vst3, form a quad D-register and leave the last part as 2065 // an undef. 2066 SDValue V3 = (NumVecs == 3) 2067 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2068 : N->getOperand(Vec0Idx + 3); 2069 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2070 } 2071 } else { 2072 // Form a QQ register. 2073 SDValue Q0 = N->getOperand(Vec0Idx); 2074 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2075 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2076 } 2077 2078 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2079 QOpcodes0[OpcodeIndex]); 2080 Ops.push_back(MemAddr); 2081 Ops.push_back(Align); 2082 if (isUpdating) { 2083 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2084 // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 2085 // case entirely when the rest are updated to that form, too. 2086 if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) 2087 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2088 // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so 2089 // check for that explicitly too. Horribly hacky, but temporary. 2090 if (!isa<ConstantSDNode>(Inc.getNode())) 2091 Ops.push_back(Inc); 2092 else if (NumVecs > 2 && !isVSTfixed(Opc)) 2093 Ops.push_back(Reg0); 2094 } 2095 Ops.push_back(SrcReg); 2096 Ops.push_back(Pred); 2097 Ops.push_back(Reg0); 2098 Ops.push_back(Chain); 2099 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2100 2101 // Transfer memoperands. 2102 cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); 2103 2104 ReplaceNode(N, VSt); 2105 return; 2106 } 2107 2108 // Otherwise, quad registers are stored with two separate instructions, 2109 // where one stores the even registers and the other stores the odd registers. 2110 2111 // Form the QQQQ REG_SEQUENCE. 2112 SDValue V0 = N->getOperand(Vec0Idx + 0); 2113 SDValue V1 = N->getOperand(Vec0Idx + 1); 2114 SDValue V2 = N->getOperand(Vec0Idx + 2); 2115 SDValue V3 = (NumVecs == 3) 2116 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2117 : N->getOperand(Vec0Idx + 3); 2118 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2119 2120 // Store the even D registers. This is always an updating store, so that it 2121 // provides the address to the second store for the odd subregs. 2122 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2123 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2124 MemAddr.getValueType(), 2125 MVT::Other, OpsA); 2126 cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1); 2127 Chain = SDValue(VStA, 1); 2128 2129 // Store the odd D registers. 2130 Ops.push_back(SDValue(VStA, 0)); 2131 Ops.push_back(Align); 2132 if (isUpdating) { 2133 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2134 assert(isa<ConstantSDNode>(Inc.getNode()) && 2135 "only constant post-increment update allowed for VST3/4"); 2136 (void)Inc; 2137 Ops.push_back(Reg0); 2138 } 2139 Ops.push_back(RegSeq); 2140 Ops.push_back(Pred); 2141 Ops.push_back(Reg0); 2142 Ops.push_back(Chain); 2143 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2144 Ops); 2145 cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1); 2146 ReplaceNode(N, VStB); 2147 } 2148 2149 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2150 unsigned NumVecs, 2151 const uint16_t *DOpcodes, 2152 const uint16_t *QOpcodes) { 2153 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2154 SDLoc dl(N); 2155 2156 SDValue MemAddr, Align; 2157 unsigned AddrOpIdx = isUpdating ? 1 : 2; 2158 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2159 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2160 return; 2161 2162 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2163 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2164 2165 SDValue Chain = N->getOperand(0); 2166 unsigned Lane = 2167 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2168 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2169 bool is64BitVector = VT.is64BitVector(); 2170 2171 unsigned Alignment = 0; 2172 if (NumVecs != 3) { 2173 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2174 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2175 if (Alignment > NumBytes) 2176 Alignment = NumBytes; 2177 if (Alignment < 8 && Alignment < NumBytes) 2178 Alignment = 0; 2179 // Alignment must be a power of two; make sure of that. 2180 Alignment = (Alignment & -Alignment); 2181 if (Alignment == 1) 2182 Alignment = 0; 2183 } 2184 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2185 2186 unsigned OpcodeIndex; 2187 switch (VT.getSimpleVT().SimpleTy) { 2188 default: llvm_unreachable("unhandled vld/vst lane type"); 2189 // Double-register operations: 2190 case MVT::v8i8: OpcodeIndex = 0; break; 2191 case MVT::v4i16: OpcodeIndex = 1; break; 2192 case MVT::v2f32: 2193 case MVT::v2i32: OpcodeIndex = 2; break; 2194 // Quad-register operations: 2195 case MVT::v8i16: OpcodeIndex = 0; break; 2196 case MVT::v4f32: 2197 case MVT::v4i32: OpcodeIndex = 1; break; 2198 } 2199 2200 std::vector<EVT> ResTys; 2201 if (IsLoad) { 2202 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2203 if (!is64BitVector) 2204 ResTyElts *= 2; 2205 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2206 MVT::i64, ResTyElts)); 2207 } 2208 if (isUpdating) 2209 ResTys.push_back(MVT::i32); 2210 ResTys.push_back(MVT::Other); 2211 2212 SDValue Pred = getAL(CurDAG, dl); 2213 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2214 2215 SmallVector<SDValue, 8> Ops; 2216 Ops.push_back(MemAddr); 2217 Ops.push_back(Align); 2218 if (isUpdating) { 2219 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2220 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); 2221 } 2222 2223 SDValue SuperReg; 2224 SDValue V0 = N->getOperand(Vec0Idx + 0); 2225 SDValue V1 = N->getOperand(Vec0Idx + 1); 2226 if (NumVecs == 2) { 2227 if (is64BitVector) 2228 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2229 else 2230 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2231 } else { 2232 SDValue V2 = N->getOperand(Vec0Idx + 2); 2233 SDValue V3 = (NumVecs == 3) 2234 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2235 : N->getOperand(Vec0Idx + 3); 2236 if (is64BitVector) 2237 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2238 else 2239 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2240 } 2241 Ops.push_back(SuperReg); 2242 Ops.push_back(getI32Imm(Lane, dl)); 2243 Ops.push_back(Pred); 2244 Ops.push_back(Reg0); 2245 Ops.push_back(Chain); 2246 2247 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2248 QOpcodes[OpcodeIndex]); 2249 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2250 cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1); 2251 if (!IsLoad) { 2252 ReplaceNode(N, VLdLn); 2253 return; 2254 } 2255 2256 // Extract the subregisters. 2257 SuperReg = SDValue(VLdLn, 0); 2258 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2259 ARM::qsub_3 == ARM::qsub_0 + 3, 2260 "Unexpected subreg numbering"); 2261 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2262 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2263 ReplaceUses(SDValue(N, Vec), 2264 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2265 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2266 if (isUpdating) 2267 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2268 CurDAG->RemoveDeadNode(N); 2269 } 2270 2271 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, 2272 const uint16_t *DOpcodes, 2273 const uint16_t *QOpcodes) { 2274 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2275 SDLoc dl(N); 2276 2277 SDValue MemAddr, Align; 2278 if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) 2279 return; 2280 2281 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2282 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2283 2284 SDValue Chain = N->getOperand(0); 2285 EVT VT = N->getValueType(0); 2286 2287 unsigned Alignment = 0; 2288 if (NumVecs != 3) { 2289 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2290 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2291 if (Alignment > NumBytes) 2292 Alignment = NumBytes; 2293 if (Alignment < 8 && Alignment < NumBytes) 2294 Alignment = 0; 2295 // Alignment must be a power of two; make sure of that. 2296 Alignment = (Alignment & -Alignment); 2297 if (Alignment == 1) 2298 Alignment = 0; 2299 } 2300 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2301 2302 unsigned Opc; 2303 switch (VT.getSimpleVT().SimpleTy) { 2304 default: llvm_unreachable("unhandled vld-dup type"); 2305 case MVT::v8i8: Opc = DOpcodes[0]; break; 2306 case MVT::v16i8: Opc = QOpcodes[0]; break; 2307 case MVT::v4i16: Opc = DOpcodes[1]; break; 2308 case MVT::v8i16: Opc = QOpcodes[1]; break; 2309 case MVT::v2f32: 2310 case MVT::v2i32: Opc = DOpcodes[2]; break; 2311 case MVT::v4f32: 2312 case MVT::v4i32: Opc = QOpcodes[2]; break; 2313 } 2314 2315 SDValue Pred = getAL(CurDAG, dl); 2316 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2317 SmallVector<SDValue, 6> Ops; 2318 Ops.push_back(MemAddr); 2319 Ops.push_back(Align); 2320 if (isUpdating) { 2321 // fixed-stride update instructions don't have an explicit writeback 2322 // operand. It's implicit in the opcode itself. 2323 SDValue Inc = N->getOperand(2); 2324 if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) 2325 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2326 if (!isa<ConstantSDNode>(Inc.getNode())) 2327 Ops.push_back(Inc); 2328 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2329 else if (NumVecs > 2) 2330 Ops.push_back(Reg0); 2331 } 2332 Ops.push_back(Pred); 2333 Ops.push_back(Reg0); 2334 Ops.push_back(Chain); 2335 2336 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2337 std::vector<EVT> ResTys; 2338 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts)); 2339 if (isUpdating) 2340 ResTys.push_back(MVT::i32); 2341 ResTys.push_back(MVT::Other); 2342 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2343 cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); 2344 2345 // Extract the subregisters. 2346 if (NumVecs == 1) { 2347 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 2348 } else { 2349 SDValue SuperReg = SDValue(VLdDup, 0); 2350 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 2351 unsigned SubIdx = ARM::dsub_0; 2352 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2353 ReplaceUses(SDValue(N, Vec), 2354 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 2355 } 2356 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 2357 if (isUpdating) 2358 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 2359 CurDAG->RemoveDeadNode(N); 2360 } 2361 2362 void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, 2363 unsigned Opc) { 2364 assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); 2365 SDLoc dl(N); 2366 EVT VT = N->getValueType(0); 2367 unsigned FirstTblReg = IsExt ? 2 : 1; 2368 2369 // Form a REG_SEQUENCE to force register allocation. 2370 SDValue RegSeq; 2371 SDValue V0 = N->getOperand(FirstTblReg + 0); 2372 SDValue V1 = N->getOperand(FirstTblReg + 1); 2373 if (NumVecs == 2) 2374 RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); 2375 else { 2376 SDValue V2 = N->getOperand(FirstTblReg + 2); 2377 // If it's a vtbl3, form a quad D-register and leave the last part as 2378 // an undef. 2379 SDValue V3 = (NumVecs == 3) 2380 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2381 : N->getOperand(FirstTblReg + 3); 2382 RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2383 } 2384 2385 SmallVector<SDValue, 6> Ops; 2386 if (IsExt) 2387 Ops.push_back(N->getOperand(1)); 2388 Ops.push_back(RegSeq); 2389 Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); 2390 Ops.push_back(getAL(CurDAG, dl)); // predicate 2391 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register 2392 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 2393 } 2394 2395 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 2396 if (!Subtarget->hasV6T2Ops()) 2397 return false; 2398 2399 unsigned Opc = isSigned 2400 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 2401 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 2402 SDLoc dl(N); 2403 2404 // For unsigned extracts, check for a shift right and mask 2405 unsigned And_imm = 0; 2406 if (N->getOpcode() == ISD::AND) { 2407 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 2408 2409 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2410 if (And_imm & (And_imm + 1)) 2411 return false; 2412 2413 unsigned Srl_imm = 0; 2414 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 2415 Srl_imm)) { 2416 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2417 2418 // Note: The width operand is encoded as width-1. 2419 unsigned Width = countTrailingOnes(And_imm) - 1; 2420 unsigned LSB = Srl_imm; 2421 2422 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2423 2424 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 2425 // It's cheaper to use a right shift to extract the top bits. 2426 if (Subtarget->isThumb()) { 2427 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 2428 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2429 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2430 getAL(CurDAG, dl), Reg0, Reg0 }; 2431 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2432 return true; 2433 } 2434 2435 // ARM models shift instructions as MOVsi with shifter operand. 2436 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 2437 SDValue ShOpc = 2438 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 2439 MVT::i32); 2440 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 2441 getAL(CurDAG, dl), Reg0, Reg0 }; 2442 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 2443 return true; 2444 } 2445 2446 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2447 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2448 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2449 getAL(CurDAG, dl), Reg0 }; 2450 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2451 return true; 2452 } 2453 } 2454 return false; 2455 } 2456 2457 // Otherwise, we're looking for a shift of a shift 2458 unsigned Shl_imm = 0; 2459 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 2460 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 2461 unsigned Srl_imm = 0; 2462 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 2463 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2464 // Note: The width operand is encoded as width-1. 2465 unsigned Width = 32 - Srl_imm - 1; 2466 int LSB = Srl_imm - Shl_imm; 2467 if (LSB < 0) 2468 return false; 2469 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2470 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2471 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2472 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2473 getAL(CurDAG, dl), Reg0 }; 2474 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2475 return true; 2476 } 2477 } 2478 2479 // Or we are looking for a shift of an and, with a mask operand 2480 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 2481 isShiftedMask_32(And_imm)) { 2482 unsigned Srl_imm = 0; 2483 unsigned LSB = countTrailingZeros(And_imm); 2484 // Shift must be the same as the ands lsb 2485 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 2486 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2487 unsigned MSB = 31 - countLeadingZeros(And_imm); 2488 // Note: The width operand is encoded as width-1. 2489 unsigned Width = MSB - LSB; 2490 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2491 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2492 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 2493 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2494 getAL(CurDAG, dl), Reg0 }; 2495 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2496 return true; 2497 } 2498 } 2499 2500 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 2501 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2502 unsigned LSB = 0; 2503 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 2504 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 2505 return false; 2506 2507 if (LSB + Width > 32) 2508 return false; 2509 2510 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2511 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2512 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2513 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 2514 getAL(CurDAG, dl), Reg0 }; 2515 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2516 return true; 2517 } 2518 2519 return false; 2520 } 2521 2522 /// Target-specific DAG combining for ISD::XOR. 2523 /// Target-independent combining lowers SELECT_CC nodes of the form 2524 /// select_cc setg[ge] X, 0, X, -X 2525 /// select_cc setgt X, -1, X, -X 2526 /// select_cc setl[te] X, 0, -X, X 2527 /// select_cc setlt X, 1, -X, X 2528 /// which represent Integer ABS into: 2529 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 2530 /// ARM instruction selection detects the latter and matches it to 2531 /// ARM::ABS or ARM::t2ABS machine node. 2532 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 2533 SDValue XORSrc0 = N->getOperand(0); 2534 SDValue XORSrc1 = N->getOperand(1); 2535 EVT VT = N->getValueType(0); 2536 2537 if (Subtarget->isThumb1Only()) 2538 return false; 2539 2540 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 2541 return false; 2542 2543 SDValue ADDSrc0 = XORSrc0.getOperand(0); 2544 SDValue ADDSrc1 = XORSrc0.getOperand(1); 2545 SDValue SRASrc0 = XORSrc1.getOperand(0); 2546 SDValue SRASrc1 = XORSrc1.getOperand(1); 2547 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 2548 EVT XType = SRASrc0.getValueType(); 2549 unsigned Size = XType.getSizeInBits() - 1; 2550 2551 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 2552 XType.isInteger() && SRAConstant != nullptr && 2553 Size == SRAConstant->getZExtValue()) { 2554 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 2555 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 2556 return true; 2557 } 2558 2559 return false; 2560 } 2561 2562 static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1, 2563 bool Accumulate) { 2564 // For SM*WB, we need to some form of sext. 2565 // For SM*WT, we need to search for (sra X, 16) 2566 // Src1 then gets set to X. 2567 if ((SignExt.getOpcode() == ISD::SIGN_EXTEND || 2568 SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG || 2569 SignExt.getOpcode() == ISD::AssertSext) && 2570 SignExt.getValueType() == MVT::i32) { 2571 2572 *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB; 2573 Src1 = SignExt.getOperand(0); 2574 return true; 2575 } 2576 2577 if (SignExt.getOpcode() != ISD::SRA) 2578 return false; 2579 2580 ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1)); 2581 if (!SRASrc1 || SRASrc1->getZExtValue() != 16) 2582 return false; 2583 2584 SDValue Op0 = SignExt.getOperand(0); 2585 2586 // The sign extend operand for SM*WB could be generated by a shl and ashr. 2587 if (Op0.getOpcode() == ISD::SHL) { 2588 SDValue SHL = Op0; 2589 ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1)); 2590 if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16) 2591 return false; 2592 2593 *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB; 2594 Src1 = Op0.getOperand(0); 2595 return true; 2596 } 2597 *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT; 2598 Src1 = SignExt.getOperand(0); 2599 return true; 2600 } 2601 2602 static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0, 2603 SDValue &Src1, bool Accumulate) { 2604 // First we look for: 2605 // (add (or (srl ?, 16), (shl ?, 16))) 2606 if (OR.getOpcode() != ISD::OR) 2607 return false; 2608 2609 SDValue SRL = OR.getOperand(0); 2610 SDValue SHL = OR.getOperand(1); 2611 2612 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) { 2613 SRL = OR.getOperand(1); 2614 SHL = OR.getOperand(0); 2615 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) 2616 return false; 2617 } 2618 2619 ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1)); 2620 ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1)); 2621 if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 || 2622 SHLSrc1->getZExtValue() != 16) 2623 return false; 2624 2625 // The first operands to the shifts need to be the two results from the 2626 // same smul_lohi node. 2627 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) || 2628 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI) 2629 return false; 2630 2631 SDNode *SMULLOHI = SRL.getOperand(0).getNode(); 2632 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) || 2633 SHL.getOperand(0) != SDValue(SMULLOHI, 1)) 2634 return false; 2635 2636 // Now we have: 2637 // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16))) 2638 // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments. 2639 // For SMLAWB the 16-bit value will signed extended somehow. 2640 // For SMLAWT only the SRA is required. 2641 2642 // Check both sides of SMUL_LOHI 2643 if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) { 2644 Src0 = SMULLOHI->getOperand(1); 2645 } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1, 2646 Accumulate)) { 2647 Src0 = SMULLOHI->getOperand(0); 2648 } else { 2649 return false; 2650 } 2651 return true; 2652 } 2653 2654 bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) { 2655 if (!Subtarget->hasV6Ops() || 2656 (Subtarget->isThumb() && !Subtarget->hasThumb2())) 2657 return false; 2658 2659 SDLoc dl(N); 2660 SDValue Src0 = N->getOperand(0); 2661 SDValue Src1 = N->getOperand(1); 2662 SDValue A, B; 2663 unsigned Opc = 0; 2664 2665 if (N->getOpcode() == ISD::ADD) { 2666 if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR) 2667 return false; 2668 2669 SDValue Acc; 2670 if (SearchSignedMulLong(Src0, &Opc, A, B, true)) { 2671 Acc = Src1; 2672 } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) { 2673 Acc = Src0; 2674 } else { 2675 return false; 2676 } 2677 if (Opc == 0) 2678 return false; 2679 2680 SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl), 2681 CurDAG->getRegister(0, MVT::i32) }; 2682 CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops); 2683 return true; 2684 } else if (N->getOpcode() == ISD::OR && 2685 SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) { 2686 if (Opc == 0) 2687 return false; 2688 2689 SDValue Ops[] = { A, B, getAL(CurDAG, dl), 2690 CurDAG->getRegister(0, MVT::i32)}; 2691 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2692 return true; 2693 } 2694 return false; 2695 } 2696 2697 /// We've got special pseudo-instructions for these 2698 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 2699 unsigned Opcode; 2700 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 2701 if (MemTy == MVT::i8) 2702 Opcode = ARM::CMP_SWAP_8; 2703 else if (MemTy == MVT::i16) 2704 Opcode = ARM::CMP_SWAP_16; 2705 else if (MemTy == MVT::i32) 2706 Opcode = ARM::CMP_SWAP_32; 2707 else 2708 llvm_unreachable("Unknown AtomicCmpSwap type"); 2709 2710 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 2711 N->getOperand(0)}; 2712 SDNode *CmpSwap = CurDAG->getMachineNode( 2713 Opcode, SDLoc(N), 2714 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 2715 2716 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2717 MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); 2718 cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1); 2719 2720 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 2721 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 2722 CurDAG->RemoveDeadNode(N); 2723 } 2724 2725 void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { 2726 // The only time a CONCAT_VECTORS operation can have legal types is when 2727 // two 64-bit vectors are concatenated to a 128-bit vector. 2728 EVT VT = N->getValueType(0); 2729 if (!VT.is128BitVector() || N->getNumOperands() != 2) 2730 llvm_unreachable("unexpected CONCAT_VECTORS"); 2731 ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1))); 2732 } 2733 2734 static Optional<std::pair<unsigned, unsigned>> 2735 getContiguousRangeOfSetBits(const APInt &A) { 2736 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 2737 unsigned LastOne = A.countTrailingZeros(); 2738 if (A.countPopulation() != (FirstOne - LastOne + 1)) 2739 return Optional<std::pair<unsigned,unsigned>>(); 2740 return std::make_pair(FirstOne, LastOne); 2741 } 2742 2743 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 2744 assert(N->getOpcode() == ARMISD::CMPZ); 2745 SwitchEQNEToPLMI = false; 2746 2747 if (!Subtarget->isThumb()) 2748 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 2749 // LSR don't exist as standalone instructions - they need the barrel shifter. 2750 return; 2751 2752 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 2753 SDValue And = N->getOperand(0); 2754 if (!And->hasOneUse()) 2755 return; 2756 2757 SDValue Zero = N->getOperand(1); 2758 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 2759 And->getOpcode() != ISD::AND) 2760 return; 2761 SDValue X = And.getOperand(0); 2762 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 2763 2764 if (!C || !X->hasOneUse()) 2765 return; 2766 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 2767 if (!Range) 2768 return; 2769 2770 // There are several ways to lower this: 2771 SDNode *NewN; 2772 SDLoc dl(N); 2773 2774 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 2775 if (Subtarget->isThumb2()) { 2776 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 2777 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 2778 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2779 CurDAG->getRegister(0, MVT::i32) }; 2780 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 2781 } else { 2782 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 2783 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 2784 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 2785 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 2786 } 2787 }; 2788 2789 if (Range->second == 0) { 2790 // 1. Mask includes the LSB -> Simply shift the top N bits off 2791 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2792 ReplaceNode(And.getNode(), NewN); 2793 } else if (Range->first == 31) { 2794 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 2795 NewN = EmitShift(ARM::tLSRri, X, Range->second); 2796 ReplaceNode(And.getNode(), NewN); 2797 } else if (Range->first == Range->second) { 2798 // 3. Only one bit is set. We can shift this into the sign bit and use a 2799 // PL/MI comparison. 2800 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2801 ReplaceNode(And.getNode(), NewN); 2802 2803 SwitchEQNEToPLMI = true; 2804 } else if (!Subtarget->hasV6T2Ops()) { 2805 // 4. Do a double shift to clear bottom and top bits, but only in 2806 // thumb-1 mode as in thumb-2 we can use UBFX. 2807 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2808 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 2809 Range->second + (31 - Range->first)); 2810 ReplaceNode(And.getNode(), NewN); 2811 } 2812 2813 } 2814 2815 void ARMDAGToDAGISel::Select(SDNode *N) { 2816 SDLoc dl(N); 2817 2818 if (N->isMachineOpcode()) { 2819 N->setNodeId(-1); 2820 return; // Already selected. 2821 } 2822 2823 switch (N->getOpcode()) { 2824 default: break; 2825 case ISD::ADD: 2826 case ISD::OR: 2827 if (trySMLAWSMULW(N)) 2828 return; 2829 break; 2830 case ISD::WRITE_REGISTER: 2831 if (tryWriteRegister(N)) 2832 return; 2833 break; 2834 case ISD::READ_REGISTER: 2835 if (tryReadRegister(N)) 2836 return; 2837 break; 2838 case ISD::INLINEASM: 2839 if (tryInlineAsm(N)) 2840 return; 2841 break; 2842 case ISD::XOR: 2843 // Select special operations if XOR node forms integer ABS pattern 2844 if (tryABSOp(N)) 2845 return; 2846 // Other cases are autogenerated. 2847 break; 2848 case ISD::Constant: { 2849 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 2850 // If we can't materialize the constant we need to use a literal pool 2851 if (ConstantMaterializationCost(Val) > 2) { 2852 SDValue CPIdx = CurDAG->getTargetConstantPool( 2853 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 2854 TLI->getPointerTy(CurDAG->getDataLayout())); 2855 2856 SDNode *ResNode; 2857 if (Subtarget->isThumb()) { 2858 SDValue Pred = getAL(CurDAG, dl); 2859 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2860 SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; 2861 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 2862 Ops); 2863 } else { 2864 SDValue Ops[] = { 2865 CPIdx, 2866 CurDAG->getTargetConstant(0, dl, MVT::i32), 2867 getAL(CurDAG, dl), 2868 CurDAG->getRegister(0, MVT::i32), 2869 CurDAG->getEntryNode() 2870 }; 2871 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 2872 Ops); 2873 } 2874 ReplaceNode(N, ResNode); 2875 return; 2876 } 2877 2878 // Other cases are autogenerated. 2879 break; 2880 } 2881 case ISD::FrameIndex: { 2882 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 2883 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 2884 SDValue TFI = CurDAG->getTargetFrameIndex( 2885 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 2886 if (Subtarget->isThumb1Only()) { 2887 // Set the alignment of the frame object to 4, to avoid having to generate 2888 // more than one ADD 2889 MachineFrameInfo &MFI = MF->getFrameInfo(); 2890 if (MFI.getObjectAlignment(FI) < 4) 2891 MFI.setObjectAlignment(FI, 4); 2892 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 2893 CurDAG->getTargetConstant(0, dl, MVT::i32)); 2894 return; 2895 } else { 2896 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 2897 ARM::t2ADDri : ARM::ADDri); 2898 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 2899 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2900 CurDAG->getRegister(0, MVT::i32) }; 2901 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2902 return; 2903 } 2904 } 2905 case ISD::SRL: 2906 if (tryV6T2BitfieldExtractOp(N, false)) 2907 return; 2908 break; 2909 case ISD::SIGN_EXTEND_INREG: 2910 case ISD::SRA: 2911 if (tryV6T2BitfieldExtractOp(N, true)) 2912 return; 2913 break; 2914 case ISD::MUL: 2915 if (Subtarget->isThumb1Only()) 2916 break; 2917 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 2918 unsigned RHSV = C->getZExtValue(); 2919 if (!RHSV) break; 2920 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 2921 unsigned ShImm = Log2_32(RHSV-1); 2922 if (ShImm >= 32) 2923 break; 2924 SDValue V = N->getOperand(0); 2925 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2926 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2927 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2928 if (Subtarget->isThumb()) { 2929 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2930 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 2931 return; 2932 } else { 2933 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2934 Reg0 }; 2935 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 2936 return; 2937 } 2938 } 2939 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 2940 unsigned ShImm = Log2_32(RHSV+1); 2941 if (ShImm >= 32) 2942 break; 2943 SDValue V = N->getOperand(0); 2944 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2945 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2946 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2947 if (Subtarget->isThumb()) { 2948 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2949 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 2950 return; 2951 } else { 2952 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2953 Reg0 }; 2954 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 2955 return; 2956 } 2957 } 2958 } 2959 break; 2960 case ISD::AND: { 2961 // Check for unsigned bitfield extract 2962 if (tryV6T2BitfieldExtractOp(N, false)) 2963 return; 2964 2965 // If an immediate is used in an AND node, it is possible that the immediate 2966 // can be more optimally materialized when negated. If this is the case we 2967 // can negate the immediate and use a BIC instead. 2968 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2969 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 2970 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 2971 2972 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 2973 // immediate can be negated and fit in the immediate operand of 2974 // a t2BIC, don't do any manual transform here as this can be 2975 // handled by the generic ISel machinery. 2976 bool PreferImmediateEncoding = 2977 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 2978 if (!PreferImmediateEncoding && 2979 ConstantMaterializationCost(Imm) > 2980 ConstantMaterializationCost(~Imm)) { 2981 // The current immediate costs more to materialize than a negated 2982 // immediate, so negate the immediate and use a BIC. 2983 SDValue NewImm = 2984 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 2985 // If the new constant didn't exist before, reposition it in the topological 2986 // ordering so it is just before N. Otherwise, don't touch its location. 2987 if (NewImm->getNodeId() == -1) 2988 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 2989 2990 if (!Subtarget->hasThumb2()) { 2991 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 2992 N->getOperand(0), NewImm, getAL(CurDAG, dl), 2993 CurDAG->getRegister(0, MVT::i32)}; 2994 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 2995 return; 2996 } else { 2997 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 2998 CurDAG->getRegister(0, MVT::i32), 2999 CurDAG->getRegister(0, MVT::i32)}; 3000 ReplaceNode(N, 3001 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3002 return; 3003 } 3004 } 3005 } 3006 3007 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3008 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3009 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3010 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3011 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3012 EVT VT = N->getValueType(0); 3013 if (VT != MVT::i32) 3014 break; 3015 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3016 ? ARM::t2MOVTi16 3017 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3018 if (!Opc) 3019 break; 3020 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3021 N1C = dyn_cast<ConstantSDNode>(N1); 3022 if (!N1C) 3023 break; 3024 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3025 SDValue N2 = N0.getOperand(1); 3026 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3027 if (!N2C) 3028 break; 3029 unsigned N1CVal = N1C->getZExtValue(); 3030 unsigned N2CVal = N2C->getZExtValue(); 3031 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3032 (N1CVal & 0xffffU) == 0xffffU && 3033 (N2CVal & 0xffffU) == 0x0U) { 3034 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3035 dl, MVT::i32); 3036 SDValue Ops[] = { N0.getOperand(0), Imm16, 3037 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3038 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3039 return; 3040 } 3041 } 3042 3043 break; 3044 } 3045 case ARMISD::VMOVRRD: 3046 ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, 3047 N->getOperand(0), getAL(CurDAG, dl), 3048 CurDAG->getRegister(0, MVT::i32))); 3049 return; 3050 case ISD::UMUL_LOHI: { 3051 if (Subtarget->isThumb1Only()) 3052 break; 3053 if (Subtarget->isThumb()) { 3054 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3055 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3056 ReplaceNode( 3057 N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops)); 3058 return; 3059 } else { 3060 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3061 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3062 CurDAG->getRegister(0, MVT::i32) }; 3063 ReplaceNode(N, CurDAG->getMachineNode( 3064 Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl, 3065 MVT::i32, MVT::i32, Ops)); 3066 return; 3067 } 3068 } 3069 case ISD::SMUL_LOHI: { 3070 if (Subtarget->isThumb1Only()) 3071 break; 3072 if (Subtarget->isThumb()) { 3073 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3074 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3075 ReplaceNode( 3076 N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops)); 3077 return; 3078 } else { 3079 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3080 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3081 CurDAG->getRegister(0, MVT::i32) }; 3082 ReplaceNode(N, CurDAG->getMachineNode( 3083 Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl, 3084 MVT::i32, MVT::i32, Ops)); 3085 return; 3086 } 3087 } 3088 case ARMISD::UMAAL: { 3089 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3090 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3091 N->getOperand(2), N->getOperand(3), 3092 getAL(CurDAG, dl), 3093 CurDAG->getRegister(0, MVT::i32) }; 3094 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3095 return; 3096 } 3097 case ARMISD::UMLAL:{ 3098 // UMAAL is similar to UMLAL but it adds two 32-bit values to the 3099 // 64-bit multiplication result. 3100 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && 3101 N->getOperand(2).getOpcode() == ARMISD::ADDC && 3102 N->getOperand(3).getOpcode() == ARMISD::ADDE) { 3103 3104 SDValue Addc = N->getOperand(2); 3105 SDValue Adde = N->getOperand(3); 3106 3107 if (Adde.getOperand(2).getNode() == Addc.getNode()) { 3108 3109 ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0)); 3110 ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1)); 3111 3112 if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0) 3113 { 3114 // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm 3115 // RdLo = one operand to be added, lower 32-bits of res 3116 // RdHi = other operand to be added, upper 32-bits of res 3117 // Rn = first multiply operand 3118 // Rm = second multiply operand 3119 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3120 Addc.getOperand(0), Addc.getOperand(1), 3121 getAL(CurDAG, dl), 3122 CurDAG->getRegister(0, MVT::i32) }; 3123 unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3124 CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops); 3125 return; 3126 } 3127 } 3128 } 3129 3130 if (Subtarget->isThumb()) { 3131 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3132 N->getOperand(3), getAL(CurDAG, dl), 3133 CurDAG->getRegister(0, MVT::i32)}; 3134 ReplaceNode( 3135 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3136 return; 3137 }else{ 3138 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3139 N->getOperand(3), getAL(CurDAG, dl), 3140 CurDAG->getRegister(0, MVT::i32), 3141 CurDAG->getRegister(0, MVT::i32) }; 3142 ReplaceNode(N, CurDAG->getMachineNode( 3143 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3144 MVT::i32, MVT::i32, Ops)); 3145 return; 3146 } 3147 } 3148 case ARMISD::SMLAL:{ 3149 if (Subtarget->isThumb()) { 3150 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3151 N->getOperand(3), getAL(CurDAG, dl), 3152 CurDAG->getRegister(0, MVT::i32)}; 3153 ReplaceNode( 3154 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3155 return; 3156 }else{ 3157 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3158 N->getOperand(3), getAL(CurDAG, dl), 3159 CurDAG->getRegister(0, MVT::i32), 3160 CurDAG->getRegister(0, MVT::i32) }; 3161 ReplaceNode(N, CurDAG->getMachineNode( 3162 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3163 MVT::i32, MVT::i32, Ops)); 3164 return; 3165 } 3166 } 3167 case ARMISD::SUBE: { 3168 if (!Subtarget->hasV6Ops()) 3169 break; 3170 // Look for a pattern to match SMMLS 3171 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3172 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3173 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3174 !SDValue(N, 1).use_empty()) 3175 break; 3176 3177 if (Subtarget->isThumb()) 3178 assert(Subtarget->hasThumb2() && 3179 "This pattern should not be generated for Thumb"); 3180 3181 SDValue SmulLoHi = N->getOperand(1); 3182 SDValue Subc = N->getOperand(2); 3183 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 3184 3185 if (!Zero || Zero->getZExtValue() != 0 || 3186 Subc.getOperand(1) != SmulLoHi.getValue(0) || 3187 N->getOperand(1) != SmulLoHi.getValue(1) || 3188 N->getOperand(2) != Subc.getValue(1)) 3189 break; 3190 3191 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3192 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3193 N->getOperand(0), getAL(CurDAG, dl), 3194 CurDAG->getRegister(0, MVT::i32) }; 3195 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3196 return; 3197 } 3198 case ISD::LOAD: { 3199 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3200 if (tryT2IndexedLoad(N)) 3201 return; 3202 } else if (Subtarget->isThumb()) { 3203 if (tryT1IndexedLoad(N)) 3204 return; 3205 } else if (tryARMIndexedLoad(N)) 3206 return; 3207 // Other cases are autogenerated. 3208 break; 3209 } 3210 case ARMISD::BRCOND: { 3211 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3212 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3213 // Pattern complexity = 6 cost = 1 size = 0 3214 3215 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3216 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 3217 // Pattern complexity = 6 cost = 1 size = 0 3218 3219 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3220 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3221 // Pattern complexity = 6 cost = 1 size = 0 3222 3223 unsigned Opc = Subtarget->isThumb() ? 3224 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 3225 SDValue Chain = N->getOperand(0); 3226 SDValue N1 = N->getOperand(1); 3227 SDValue N2 = N->getOperand(2); 3228 SDValue N3 = N->getOperand(3); 3229 SDValue InFlag = N->getOperand(4); 3230 assert(N1.getOpcode() == ISD::BasicBlock); 3231 assert(N2.getOpcode() == ISD::Constant); 3232 assert(N3.getOpcode() == ISD::Register); 3233 3234 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 3235 3236 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3237 bool SwitchEQNEToPLMI; 3238 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3239 InFlag = N->getOperand(4); 3240 3241 if (SwitchEQNEToPLMI) { 3242 switch ((ARMCC::CondCodes)CC) { 3243 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3244 case ARMCC::NE: 3245 CC = (unsigned)ARMCC::MI; 3246 break; 3247 case ARMCC::EQ: 3248 CC = (unsigned)ARMCC::PL; 3249 break; 3250 } 3251 } 3252 } 3253 3254 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 3255 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 3256 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 3257 MVT::Glue, Ops); 3258 Chain = SDValue(ResNode, 0); 3259 if (N->getNumValues() == 2) { 3260 InFlag = SDValue(ResNode, 1); 3261 ReplaceUses(SDValue(N, 1), InFlag); 3262 } 3263 ReplaceUses(SDValue(N, 0), 3264 SDValue(Chain.getNode(), Chain.getResNo())); 3265 CurDAG->RemoveDeadNode(N); 3266 return; 3267 } 3268 3269 case ARMISD::CMPZ: { 3270 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 3271 // This allows us to avoid materializing the expensive negative constant. 3272 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 3273 // for its glue output. 3274 SDValue X = N->getOperand(0); 3275 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 3276 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 3277 int64_t Addend = -C->getSExtValue(); 3278 3279 SDNode *Add = nullptr; 3280 // In T2 mode, ADDS can be better than CMN if the immediate fits in a 3281 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 3282 // Outside that range we can just use a CMN which is 32-bit but has a 3283 // 12-bit immediate range. 3284 if (Subtarget->isThumb2() && Addend < 1<<8) { 3285 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3286 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3287 CurDAG->getRegister(0, MVT::i32) }; 3288 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 3289 } else if (!Subtarget->isThumb2() && Addend < 1<<8) { 3290 // FIXME: Add T1 tADDi8 code. 3291 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 3292 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3293 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3294 Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops); 3295 } else if (!Subtarget->isThumb2() && Addend < 1<<3) { 3296 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 3297 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3298 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3299 Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops); 3300 } 3301 if (Add) { 3302 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 3303 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 3304 } 3305 } 3306 // Other cases are autogenerated. 3307 break; 3308 } 3309 3310 case ARMISD::CMOV: { 3311 SDValue InFlag = N->getOperand(4); 3312 3313 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3314 bool SwitchEQNEToPLMI; 3315 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3316 3317 if (SwitchEQNEToPLMI) { 3318 SDValue ARMcc = N->getOperand(2); 3319 ARMCC::CondCodes CC = 3320 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 3321 3322 switch (CC) { 3323 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3324 case ARMCC::NE: 3325 CC = ARMCC::MI; 3326 break; 3327 case ARMCC::EQ: 3328 CC = ARMCC::PL; 3329 break; 3330 } 3331 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 3332 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 3333 N->getOperand(3), N->getOperand(4)}; 3334 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 3335 } 3336 3337 } 3338 // Other cases are autogenerated. 3339 break; 3340 } 3341 3342 case ARMISD::VZIP: { 3343 unsigned Opc = 0; 3344 EVT VT = N->getValueType(0); 3345 switch (VT.getSimpleVT().SimpleTy) { 3346 default: return; 3347 case MVT::v8i8: Opc = ARM::VZIPd8; break; 3348 case MVT::v4i16: Opc = ARM::VZIPd16; break; 3349 case MVT::v2f32: 3350 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3351 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3352 case MVT::v16i8: Opc = ARM::VZIPq8; break; 3353 case MVT::v8i16: Opc = ARM::VZIPq16; break; 3354 case MVT::v4f32: 3355 case MVT::v4i32: Opc = ARM::VZIPq32; break; 3356 } 3357 SDValue Pred = getAL(CurDAG, dl); 3358 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3359 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3360 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3361 return; 3362 } 3363 case ARMISD::VUZP: { 3364 unsigned Opc = 0; 3365 EVT VT = N->getValueType(0); 3366 switch (VT.getSimpleVT().SimpleTy) { 3367 default: return; 3368 case MVT::v8i8: Opc = ARM::VUZPd8; break; 3369 case MVT::v4i16: Opc = ARM::VUZPd16; break; 3370 case MVT::v2f32: 3371 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3372 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3373 case MVT::v16i8: Opc = ARM::VUZPq8; break; 3374 case MVT::v8i16: Opc = ARM::VUZPq16; break; 3375 case MVT::v4f32: 3376 case MVT::v4i32: Opc = ARM::VUZPq32; break; 3377 } 3378 SDValue Pred = getAL(CurDAG, dl); 3379 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3380 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3381 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3382 return; 3383 } 3384 case ARMISD::VTRN: { 3385 unsigned Opc = 0; 3386 EVT VT = N->getValueType(0); 3387 switch (VT.getSimpleVT().SimpleTy) { 3388 default: return; 3389 case MVT::v8i8: Opc = ARM::VTRNd8; break; 3390 case MVT::v4i16: Opc = ARM::VTRNd16; break; 3391 case MVT::v2f32: 3392 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3393 case MVT::v16i8: Opc = ARM::VTRNq8; break; 3394 case MVT::v8i16: Opc = ARM::VTRNq16; break; 3395 case MVT::v4f32: 3396 case MVT::v4i32: Opc = ARM::VTRNq32; break; 3397 } 3398 SDValue Pred = getAL(CurDAG, dl); 3399 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3400 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3401 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3402 return; 3403 } 3404 case ARMISD::BUILD_VECTOR: { 3405 EVT VecVT = N->getValueType(0); 3406 EVT EltVT = VecVT.getVectorElementType(); 3407 unsigned NumElts = VecVT.getVectorNumElements(); 3408 if (EltVT == MVT::f64) { 3409 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 3410 ReplaceNode( 3411 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3412 return; 3413 } 3414 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 3415 if (NumElts == 2) { 3416 ReplaceNode( 3417 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3418 return; 3419 } 3420 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 3421 ReplaceNode(N, 3422 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 3423 N->getOperand(2), N->getOperand(3))); 3424 return; 3425 } 3426 3427 case ARMISD::VLD1DUP: { 3428 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 3429 ARM::VLD1DUPd32 }; 3430 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 3431 ARM::VLD1DUPq32 }; 3432 SelectVLDDup(N, false, 1, DOpcodes, QOpcodes); 3433 return; 3434 } 3435 3436 case ARMISD::VLD2DUP: { 3437 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 3438 ARM::VLD2DUPd32 }; 3439 SelectVLDDup(N, false, 2, Opcodes); 3440 return; 3441 } 3442 3443 case ARMISD::VLD3DUP: { 3444 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 3445 ARM::VLD3DUPd16Pseudo, 3446 ARM::VLD3DUPd32Pseudo }; 3447 SelectVLDDup(N, false, 3, Opcodes); 3448 return; 3449 } 3450 3451 case ARMISD::VLD4DUP: { 3452 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 3453 ARM::VLD4DUPd16Pseudo, 3454 ARM::VLD4DUPd32Pseudo }; 3455 SelectVLDDup(N, false, 4, Opcodes); 3456 return; 3457 } 3458 3459 case ARMISD::VLD1DUP_UPD: { 3460 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 3461 ARM::VLD1DUPd16wb_fixed, 3462 ARM::VLD1DUPd32wb_fixed }; 3463 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 3464 ARM::VLD1DUPq16wb_fixed, 3465 ARM::VLD1DUPq32wb_fixed }; 3466 SelectVLDDup(N, true, 1, DOpcodes, QOpcodes); 3467 return; 3468 } 3469 3470 case ARMISD::VLD2DUP_UPD: { 3471 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 3472 ARM::VLD2DUPd16wb_fixed, 3473 ARM::VLD2DUPd32wb_fixed }; 3474 SelectVLDDup(N, true, 2, Opcodes); 3475 return; 3476 } 3477 3478 case ARMISD::VLD3DUP_UPD: { 3479 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 3480 ARM::VLD3DUPd16Pseudo_UPD, 3481 ARM::VLD3DUPd32Pseudo_UPD }; 3482 SelectVLDDup(N, true, 3, Opcodes); 3483 return; 3484 } 3485 3486 case ARMISD::VLD4DUP_UPD: { 3487 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 3488 ARM::VLD4DUPd16Pseudo_UPD, 3489 ARM::VLD4DUPd32Pseudo_UPD }; 3490 SelectVLDDup(N, true, 4, Opcodes); 3491 return; 3492 } 3493 3494 case ARMISD::VLD1_UPD: { 3495 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 3496 ARM::VLD1d16wb_fixed, 3497 ARM::VLD1d32wb_fixed, 3498 ARM::VLD1d64wb_fixed }; 3499 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 3500 ARM::VLD1q16wb_fixed, 3501 ARM::VLD1q32wb_fixed, 3502 ARM::VLD1q64wb_fixed }; 3503 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 3504 return; 3505 } 3506 3507 case ARMISD::VLD2_UPD: { 3508 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed, 3509 ARM::VLD2d16wb_fixed, 3510 ARM::VLD2d32wb_fixed, 3511 ARM::VLD1q64wb_fixed}; 3512 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, 3513 ARM::VLD2q16PseudoWB_fixed, 3514 ARM::VLD2q32PseudoWB_fixed }; 3515 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 3516 return; 3517 } 3518 3519 case ARMISD::VLD3_UPD: { 3520 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 3521 ARM::VLD3d16Pseudo_UPD, 3522 ARM::VLD3d32Pseudo_UPD, 3523 ARM::VLD1d64TPseudoWB_fixed}; 3524 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3525 ARM::VLD3q16Pseudo_UPD, 3526 ARM::VLD3q32Pseudo_UPD }; 3527 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 3528 ARM::VLD3q16oddPseudo_UPD, 3529 ARM::VLD3q32oddPseudo_UPD }; 3530 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3531 return; 3532 } 3533 3534 case ARMISD::VLD4_UPD: { 3535 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, 3536 ARM::VLD4d16Pseudo_UPD, 3537 ARM::VLD4d32Pseudo_UPD, 3538 ARM::VLD1d64QPseudoWB_fixed}; 3539 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3540 ARM::VLD4q16Pseudo_UPD, 3541 ARM::VLD4q32Pseudo_UPD }; 3542 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, 3543 ARM::VLD4q16oddPseudo_UPD, 3544 ARM::VLD4q32oddPseudo_UPD }; 3545 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3546 return; 3547 } 3548 3549 case ARMISD::VLD2LN_UPD: { 3550 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 3551 ARM::VLD2LNd16Pseudo_UPD, 3552 ARM::VLD2LNd32Pseudo_UPD }; 3553 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 3554 ARM::VLD2LNq32Pseudo_UPD }; 3555 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 3556 return; 3557 } 3558 3559 case ARMISD::VLD3LN_UPD: { 3560 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 3561 ARM::VLD3LNd16Pseudo_UPD, 3562 ARM::VLD3LNd32Pseudo_UPD }; 3563 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 3564 ARM::VLD3LNq32Pseudo_UPD }; 3565 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 3566 return; 3567 } 3568 3569 case ARMISD::VLD4LN_UPD: { 3570 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 3571 ARM::VLD4LNd16Pseudo_UPD, 3572 ARM::VLD4LNd32Pseudo_UPD }; 3573 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 3574 ARM::VLD4LNq32Pseudo_UPD }; 3575 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 3576 return; 3577 } 3578 3579 case ARMISD::VST1_UPD: { 3580 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 3581 ARM::VST1d16wb_fixed, 3582 ARM::VST1d32wb_fixed, 3583 ARM::VST1d64wb_fixed }; 3584 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 3585 ARM::VST1q16wb_fixed, 3586 ARM::VST1q32wb_fixed, 3587 ARM::VST1q64wb_fixed }; 3588 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 3589 return; 3590 } 3591 3592 case ARMISD::VST2_UPD: { 3593 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed, 3594 ARM::VST2d16wb_fixed, 3595 ARM::VST2d32wb_fixed, 3596 ARM::VST1q64wb_fixed}; 3597 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, 3598 ARM::VST2q16PseudoWB_fixed, 3599 ARM::VST2q32PseudoWB_fixed }; 3600 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 3601 return; 3602 } 3603 3604 case ARMISD::VST3_UPD: { 3605 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 3606 ARM::VST3d16Pseudo_UPD, 3607 ARM::VST3d32Pseudo_UPD, 3608 ARM::VST1d64TPseudoWB_fixed}; 3609 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3610 ARM::VST3q16Pseudo_UPD, 3611 ARM::VST3q32Pseudo_UPD }; 3612 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 3613 ARM::VST3q16oddPseudo_UPD, 3614 ARM::VST3q32oddPseudo_UPD }; 3615 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3616 return; 3617 } 3618 3619 case ARMISD::VST4_UPD: { 3620 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD, 3621 ARM::VST4d16Pseudo_UPD, 3622 ARM::VST4d32Pseudo_UPD, 3623 ARM::VST1d64QPseudoWB_fixed}; 3624 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3625 ARM::VST4q16Pseudo_UPD, 3626 ARM::VST4q32Pseudo_UPD }; 3627 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, 3628 ARM::VST4q16oddPseudo_UPD, 3629 ARM::VST4q32oddPseudo_UPD }; 3630 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3631 return; 3632 } 3633 3634 case ARMISD::VST2LN_UPD: { 3635 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 3636 ARM::VST2LNd16Pseudo_UPD, 3637 ARM::VST2LNd32Pseudo_UPD }; 3638 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 3639 ARM::VST2LNq32Pseudo_UPD }; 3640 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 3641 return; 3642 } 3643 3644 case ARMISD::VST3LN_UPD: { 3645 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 3646 ARM::VST3LNd16Pseudo_UPD, 3647 ARM::VST3LNd32Pseudo_UPD }; 3648 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 3649 ARM::VST3LNq32Pseudo_UPD }; 3650 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 3651 return; 3652 } 3653 3654 case ARMISD::VST4LN_UPD: { 3655 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 3656 ARM::VST4LNd16Pseudo_UPD, 3657 ARM::VST4LNd32Pseudo_UPD }; 3658 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 3659 ARM::VST4LNq32Pseudo_UPD }; 3660 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 3661 return; 3662 } 3663 3664 case ISD::INTRINSIC_VOID: 3665 case ISD::INTRINSIC_W_CHAIN: { 3666 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 3667 switch (IntNo) { 3668 default: 3669 break; 3670 3671 case Intrinsic::arm_mrrc: 3672 case Intrinsic::arm_mrrc2: { 3673 SDLoc dl(N); 3674 SDValue Chain = N->getOperand(0); 3675 unsigned Opc; 3676 3677 if (Subtarget->isThumb()) 3678 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 3679 else 3680 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 3681 3682 SmallVector<SDValue, 5> Ops; 3683 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 3684 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 3685 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 3686 3687 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 3688 // instruction will always be '1111' but it is possible in assembly language to specify 3689 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 3690 if (Opc != ARM::MRRC2) { 3691 Ops.push_back(getAL(CurDAG, dl)); 3692 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3693 } 3694 3695 Ops.push_back(Chain); 3696 3697 // Writes to two registers. 3698 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 3699 3700 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 3701 return; 3702 } 3703 case Intrinsic::arm_ldaexd: 3704 case Intrinsic::arm_ldrexd: { 3705 SDLoc dl(N); 3706 SDValue Chain = N->getOperand(0); 3707 SDValue MemAddr = N->getOperand(2); 3708 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 3709 3710 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 3711 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 3712 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 3713 3714 // arm_ldrexd returns a i64 value in {i32, i32} 3715 std::vector<EVT> ResTys; 3716 if (isThumb) { 3717 ResTys.push_back(MVT::i32); 3718 ResTys.push_back(MVT::i32); 3719 } else 3720 ResTys.push_back(MVT::Untyped); 3721 ResTys.push_back(MVT::Other); 3722 3723 // Place arguments in the right order. 3724 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 3725 CurDAG->getRegister(0, MVT::i32), Chain}; 3726 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3727 // Transfer memoperands. 3728 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3729 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3730 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); 3731 3732 // Remap uses. 3733 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 3734 if (!SDValue(N, 0).use_empty()) { 3735 SDValue Result; 3736 if (isThumb) 3737 Result = SDValue(Ld, 0); 3738 else { 3739 SDValue SubRegIdx = 3740 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 3741 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3742 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3743 Result = SDValue(ResNode,0); 3744 } 3745 ReplaceUses(SDValue(N, 0), Result); 3746 } 3747 if (!SDValue(N, 1).use_empty()) { 3748 SDValue Result; 3749 if (isThumb) 3750 Result = SDValue(Ld, 1); 3751 else { 3752 SDValue SubRegIdx = 3753 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 3754 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3755 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3756 Result = SDValue(ResNode,0); 3757 } 3758 ReplaceUses(SDValue(N, 1), Result); 3759 } 3760 ReplaceUses(SDValue(N, 2), OutChain); 3761 CurDAG->RemoveDeadNode(N); 3762 return; 3763 } 3764 case Intrinsic::arm_stlexd: 3765 case Intrinsic::arm_strexd: { 3766 SDLoc dl(N); 3767 SDValue Chain = N->getOperand(0); 3768 SDValue Val0 = N->getOperand(2); 3769 SDValue Val1 = N->getOperand(3); 3770 SDValue MemAddr = N->getOperand(4); 3771 3772 // Store exclusive double return a i32 value which is the return status 3773 // of the issued store. 3774 const EVT ResTys[] = {MVT::i32, MVT::Other}; 3775 3776 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 3777 // Place arguments in the right order. 3778 SmallVector<SDValue, 7> Ops; 3779 if (isThumb) { 3780 Ops.push_back(Val0); 3781 Ops.push_back(Val1); 3782 } else 3783 // arm_strexd uses GPRPair. 3784 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 3785 Ops.push_back(MemAddr); 3786 Ops.push_back(getAL(CurDAG, dl)); 3787 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3788 Ops.push_back(Chain); 3789 3790 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 3791 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 3792 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 3793 3794 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3795 // Transfer memoperands. 3796 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3797 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3798 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 3799 3800 ReplaceNode(N, St); 3801 return; 3802 } 3803 3804 case Intrinsic::arm_neon_vld1: { 3805 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 3806 ARM::VLD1d32, ARM::VLD1d64 }; 3807 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 3808 ARM::VLD1q32, ARM::VLD1q64}; 3809 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 3810 return; 3811 } 3812 3813 case Intrinsic::arm_neon_vld2: { 3814 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 3815 ARM::VLD2d32, ARM::VLD1q64 }; 3816 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 3817 ARM::VLD2q32Pseudo }; 3818 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 3819 return; 3820 } 3821 3822 case Intrinsic::arm_neon_vld3: { 3823 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 3824 ARM::VLD3d16Pseudo, 3825 ARM::VLD3d32Pseudo, 3826 ARM::VLD1d64TPseudo }; 3827 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3828 ARM::VLD3q16Pseudo_UPD, 3829 ARM::VLD3q32Pseudo_UPD }; 3830 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 3831 ARM::VLD3q16oddPseudo, 3832 ARM::VLD3q32oddPseudo }; 3833 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3834 return; 3835 } 3836 3837 case Intrinsic::arm_neon_vld4: { 3838 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 3839 ARM::VLD4d16Pseudo, 3840 ARM::VLD4d32Pseudo, 3841 ARM::VLD1d64QPseudo }; 3842 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3843 ARM::VLD4q16Pseudo_UPD, 3844 ARM::VLD4q32Pseudo_UPD }; 3845 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 3846 ARM::VLD4q16oddPseudo, 3847 ARM::VLD4q32oddPseudo }; 3848 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3849 return; 3850 } 3851 3852 case Intrinsic::arm_neon_vld2lane: { 3853 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 3854 ARM::VLD2LNd16Pseudo, 3855 ARM::VLD2LNd32Pseudo }; 3856 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 3857 ARM::VLD2LNq32Pseudo }; 3858 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 3859 return; 3860 } 3861 3862 case Intrinsic::arm_neon_vld3lane: { 3863 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 3864 ARM::VLD3LNd16Pseudo, 3865 ARM::VLD3LNd32Pseudo }; 3866 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 3867 ARM::VLD3LNq32Pseudo }; 3868 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 3869 return; 3870 } 3871 3872 case Intrinsic::arm_neon_vld4lane: { 3873 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 3874 ARM::VLD4LNd16Pseudo, 3875 ARM::VLD4LNd32Pseudo }; 3876 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 3877 ARM::VLD4LNq32Pseudo }; 3878 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 3879 return; 3880 } 3881 3882 case Intrinsic::arm_neon_vst1: { 3883 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 3884 ARM::VST1d32, ARM::VST1d64 }; 3885 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 3886 ARM::VST1q32, ARM::VST1q64 }; 3887 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 3888 return; 3889 } 3890 3891 case Intrinsic::arm_neon_vst2: { 3892 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 3893 ARM::VST2d32, ARM::VST1q64 }; 3894 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 3895 ARM::VST2q32Pseudo }; 3896 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 3897 return; 3898 } 3899 3900 case Intrinsic::arm_neon_vst3: { 3901 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 3902 ARM::VST3d16Pseudo, 3903 ARM::VST3d32Pseudo, 3904 ARM::VST1d64TPseudo }; 3905 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3906 ARM::VST3q16Pseudo_UPD, 3907 ARM::VST3q32Pseudo_UPD }; 3908 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 3909 ARM::VST3q16oddPseudo, 3910 ARM::VST3q32oddPseudo }; 3911 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3912 return; 3913 } 3914 3915 case Intrinsic::arm_neon_vst4: { 3916 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 3917 ARM::VST4d16Pseudo, 3918 ARM::VST4d32Pseudo, 3919 ARM::VST1d64QPseudo }; 3920 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3921 ARM::VST4q16Pseudo_UPD, 3922 ARM::VST4q32Pseudo_UPD }; 3923 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 3924 ARM::VST4q16oddPseudo, 3925 ARM::VST4q32oddPseudo }; 3926 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3927 return; 3928 } 3929 3930 case Intrinsic::arm_neon_vst2lane: { 3931 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 3932 ARM::VST2LNd16Pseudo, 3933 ARM::VST2LNd32Pseudo }; 3934 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 3935 ARM::VST2LNq32Pseudo }; 3936 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 3937 return; 3938 } 3939 3940 case Intrinsic::arm_neon_vst3lane: { 3941 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 3942 ARM::VST3LNd16Pseudo, 3943 ARM::VST3LNd32Pseudo }; 3944 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 3945 ARM::VST3LNq32Pseudo }; 3946 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 3947 return; 3948 } 3949 3950 case Intrinsic::arm_neon_vst4lane: { 3951 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 3952 ARM::VST4LNd16Pseudo, 3953 ARM::VST4LNd32Pseudo }; 3954 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 3955 ARM::VST4LNq32Pseudo }; 3956 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 3957 return; 3958 } 3959 } 3960 break; 3961 } 3962 3963 case ISD::INTRINSIC_WO_CHAIN: { 3964 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 3965 switch (IntNo) { 3966 default: 3967 break; 3968 3969 case Intrinsic::arm_neon_vtbl2: 3970 SelectVTBL(N, false, 2, ARM::VTBL2); 3971 return; 3972 case Intrinsic::arm_neon_vtbl3: 3973 SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); 3974 return; 3975 case Intrinsic::arm_neon_vtbl4: 3976 SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); 3977 return; 3978 3979 case Intrinsic::arm_neon_vtbx2: 3980 SelectVTBL(N, true, 2, ARM::VTBX2); 3981 return; 3982 case Intrinsic::arm_neon_vtbx3: 3983 SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); 3984 return; 3985 case Intrinsic::arm_neon_vtbx4: 3986 SelectVTBL(N, true, 4, ARM::VTBX4Pseudo); 3987 return; 3988 } 3989 break; 3990 } 3991 3992 case ARMISD::VTBL1: { 3993 SDLoc dl(N); 3994 EVT VT = N->getValueType(0); 3995 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), 3996 getAL(CurDAG, dl), // Predicate 3997 CurDAG->getRegister(0, MVT::i32)}; // Predicate Register 3998 ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops)); 3999 return; 4000 } 4001 case ARMISD::VTBL2: { 4002 SDLoc dl(N); 4003 EVT VT = N->getValueType(0); 4004 4005 // Form a REG_SEQUENCE to force register allocation. 4006 SDValue V0 = N->getOperand(0); 4007 SDValue V1 = N->getOperand(1); 4008 SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); 4009 4010 SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate 4011 CurDAG->getRegister(0, MVT::i32)}; // Predicate Register 4012 ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops)); 4013 return; 4014 } 4015 4016 case ISD::CONCAT_VECTORS: 4017 SelectConcatVector(N); 4018 return; 4019 4020 case ISD::ATOMIC_CMP_SWAP: 4021 SelectCMP_SWAP(N); 4022 return; 4023 } 4024 4025 SelectCode(N); 4026 } 4027 4028 // Inspect a register string of the form 4029 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 4030 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 4031 // and obtain the integer operands from them, adding these operands to the 4032 // provided vector. 4033 static void getIntOperandsFromRegisterString(StringRef RegString, 4034 SelectionDAG *CurDAG, 4035 const SDLoc &DL, 4036 std::vector<SDValue> &Ops) { 4037 SmallVector<StringRef, 5> Fields; 4038 RegString.split(Fields, ':'); 4039 4040 if (Fields.size() > 1) { 4041 bool AllIntFields = true; 4042 4043 for (StringRef Field : Fields) { 4044 // Need to trim out leading 'cp' characters and get the integer field. 4045 unsigned IntField; 4046 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 4047 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 4048 } 4049 4050 assert(AllIntFields && 4051 "Unexpected non-integer value in special register string."); 4052 } 4053 } 4054 4055 // Maps a Banked Register string to its mask value. The mask value returned is 4056 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 4057 // mask operand, which expresses which register is to be used, e.g. r8, and in 4058 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 4059 // was invalid. 4060 static inline int getBankedRegisterMask(StringRef RegString) { 4061 return StringSwitch<int>(RegString.lower()) 4062 .Case("r8_usr", 0x00) 4063 .Case("r9_usr", 0x01) 4064 .Case("r10_usr", 0x02) 4065 .Case("r11_usr", 0x03) 4066 .Case("r12_usr", 0x04) 4067 .Case("sp_usr", 0x05) 4068 .Case("lr_usr", 0x06) 4069 .Case("r8_fiq", 0x08) 4070 .Case("r9_fiq", 0x09) 4071 .Case("r10_fiq", 0x0a) 4072 .Case("r11_fiq", 0x0b) 4073 .Case("r12_fiq", 0x0c) 4074 .Case("sp_fiq", 0x0d) 4075 .Case("lr_fiq", 0x0e) 4076 .Case("lr_irq", 0x10) 4077 .Case("sp_irq", 0x11) 4078 .Case("lr_svc", 0x12) 4079 .Case("sp_svc", 0x13) 4080 .Case("lr_abt", 0x14) 4081 .Case("sp_abt", 0x15) 4082 .Case("lr_und", 0x16) 4083 .Case("sp_und", 0x17) 4084 .Case("lr_mon", 0x1c) 4085 .Case("sp_mon", 0x1d) 4086 .Case("elr_hyp", 0x1e) 4087 .Case("sp_hyp", 0x1f) 4088 .Case("spsr_fiq", 0x2e) 4089 .Case("spsr_irq", 0x30) 4090 .Case("spsr_svc", 0x32) 4091 .Case("spsr_abt", 0x34) 4092 .Case("spsr_und", 0x36) 4093 .Case("spsr_mon", 0x3c) 4094 .Case("spsr_hyp", 0x3e) 4095 .Default(-1); 4096 } 4097 4098 // Maps a MClass special register string to its value for use in the 4099 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand. 4100 // Returns -1 to signify that the string was invalid. 4101 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) { 4102 return StringSwitch<int>(RegString.lower()) 4103 .Case("apsr", 0x0) 4104 .Case("iapsr", 0x1) 4105 .Case("eapsr", 0x2) 4106 .Case("xpsr", 0x3) 4107 .Case("ipsr", 0x5) 4108 .Case("epsr", 0x6) 4109 .Case("iepsr", 0x7) 4110 .Case("msp", 0x8) 4111 .Case("psp", 0x9) 4112 .Case("primask", 0x10) 4113 .Case("basepri", 0x11) 4114 .Case("basepri_max", 0x12) 4115 .Case("faultmask", 0x13) 4116 .Case("control", 0x14) 4117 .Case("msplim", 0x0a) 4118 .Case("psplim", 0x0b) 4119 .Case("sp", 0x18) 4120 .Default(-1); 4121 } 4122 4123 // The flags here are common to those allowed for apsr in the A class cores and 4124 // those allowed for the special registers in the M class cores. Returns a 4125 // value representing which flags were present, -1 if invalid. 4126 static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) { 4127 if (Flags.empty()) 4128 return 0x2 | (int)hasDSP; 4129 4130 return StringSwitch<int>(Flags) 4131 .Case("g", 0x1) 4132 .Case("nzcvq", 0x2) 4133 .Case("nzcvqg", 0x3) 4134 .Default(-1); 4135 } 4136 4137 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, 4138 const ARMSubtarget *Subtarget) { 4139 // Ensure that the register (without flags) was a valid M Class special 4140 // register. 4141 int SYSmvalue = getMClassRegisterSYSmValueMask(Reg); 4142 if (SYSmvalue == -1) 4143 return -1; 4144 4145 // basepri, basepri_max and faultmask are only valid for V7m. 4146 if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13) 4147 return -1; 4148 4149 if (Subtarget->has8MSecExt() && Flags.lower() == "ns") { 4150 Flags = ""; 4151 SYSmvalue |= 0x80; 4152 } 4153 4154 if (!Subtarget->has8MSecExt() && 4155 (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14)) 4156 return -1; 4157 4158 if (!Subtarget->hasV8MMainlineOps() && 4159 (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 || 4160 SYSmvalue == 0x93)) 4161 return -1; 4162 4163 // If it was a read then we won't be expecting flags and so at this point 4164 // we can return the mask. 4165 if (IsRead) { 4166 if (Flags.empty()) 4167 return SYSmvalue; 4168 else 4169 return -1; 4170 } 4171 4172 // We know we are now handling a write so need to get the mask for the flags. 4173 int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP()); 4174 4175 // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values 4176 // shouldn't have flags present. 4177 if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty())) 4178 return -1; 4179 4180 // The _g and _nzcvqg versions are only valid if the DSP extension is 4181 // available. 4182 if (!Subtarget->hasDSP() && (Mask & 0x1)) 4183 return -1; 4184 4185 // The register was valid so need to put the mask in the correct place 4186 // (the flags need to be in bits 11-10) and combine with the SYSmvalue to 4187 // construct the operand for the instruction node. 4188 if (SYSmvalue < 0x4) 4189 return SYSmvalue | Mask << 10; 4190 4191 return SYSmvalue; 4192 } 4193 4194 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 4195 // The mask operand contains the special register (R Bit) in bit 4, whether 4196 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 4197 // bits 3-0 contains the fields to be accessed in the special register, set by 4198 // the flags provided with the register. 4199 int Mask = 0; 4200 if (Reg == "apsr") { 4201 // The flags permitted for apsr are the same flags that are allowed in 4202 // M class registers. We get the flag value and then shift the flags into 4203 // the correct place to combine with the mask. 4204 Mask = getMClassFlagsMask(Flags, true); 4205 if (Mask == -1) 4206 return -1; 4207 return Mask << 2; 4208 } 4209 4210 if (Reg != "cpsr" && Reg != "spsr") { 4211 return -1; 4212 } 4213 4214 // This is the same as if the flags were "fc" 4215 if (Flags.empty() || Flags == "all") 4216 return Mask | 0x9; 4217 4218 // Inspect the supplied flags string and set the bits in the mask for 4219 // the relevant and valid flags allowed for cpsr and spsr. 4220 for (char Flag : Flags) { 4221 int FlagVal; 4222 switch (Flag) { 4223 case 'c': 4224 FlagVal = 0x1; 4225 break; 4226 case 'x': 4227 FlagVal = 0x2; 4228 break; 4229 case 's': 4230 FlagVal = 0x4; 4231 break; 4232 case 'f': 4233 FlagVal = 0x8; 4234 break; 4235 default: 4236 FlagVal = 0; 4237 } 4238 4239 // This avoids allowing strings where the same flag bit appears twice. 4240 if (!FlagVal || (Mask & FlagVal)) 4241 return -1; 4242 Mask |= FlagVal; 4243 } 4244 4245 // If the register is spsr then we need to set the R bit. 4246 if (Reg == "spsr") 4247 Mask |= 0x10; 4248 4249 return Mask; 4250 } 4251 4252 // Lower the read_register intrinsic to ARM specific DAG nodes 4253 // using the supplied metadata string to select the instruction node to use 4254 // and the registers/masks to construct as operands for the node. 4255 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 4256 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4257 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4258 bool IsThumb2 = Subtarget->isThumb2(); 4259 SDLoc DL(N); 4260 4261 std::vector<SDValue> Ops; 4262 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4263 4264 if (!Ops.empty()) { 4265 // If the special register string was constructed of fields (as defined 4266 // in the ACLE) then need to lower to MRC node (32 bit) or 4267 // MRRC node(64 bit), we can make the distinction based on the number of 4268 // operands we have. 4269 unsigned Opcode; 4270 SmallVector<EVT, 3> ResTypes; 4271 if (Ops.size() == 5){ 4272 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 4273 ResTypes.append({ MVT::i32, MVT::Other }); 4274 } else { 4275 assert(Ops.size() == 3 && 4276 "Invalid number of fields in special register string."); 4277 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 4278 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 4279 } 4280 4281 Ops.push_back(getAL(CurDAG, DL)); 4282 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4283 Ops.push_back(N->getOperand(0)); 4284 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 4285 return true; 4286 } 4287 4288 std::string SpecialReg = RegString->getString().lower(); 4289 4290 int BankedReg = getBankedRegisterMask(SpecialReg); 4291 if (BankedReg != -1) { 4292 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 4293 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4294 N->getOperand(0) }; 4295 ReplaceNode( 4296 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 4297 DL, MVT::i32, MVT::Other, Ops)); 4298 return true; 4299 } 4300 4301 // The VFP registers are read by creating SelectionDAG nodes with opcodes 4302 // corresponding to the register that is being read from. So we switch on the 4303 // string to find which opcode we need to use. 4304 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4305 .Case("fpscr", ARM::VMRS) 4306 .Case("fpexc", ARM::VMRS_FPEXC) 4307 .Case("fpsid", ARM::VMRS_FPSID) 4308 .Case("mvfr0", ARM::VMRS_MVFR0) 4309 .Case("mvfr1", ARM::VMRS_MVFR1) 4310 .Case("mvfr2", ARM::VMRS_MVFR2) 4311 .Case("fpinst", ARM::VMRS_FPINST) 4312 .Case("fpinst2", ARM::VMRS_FPINST2) 4313 .Default(0); 4314 4315 // If an opcode was found then we can lower the read to a VFP instruction. 4316 if (Opcode) { 4317 if (!Subtarget->hasVFP2()) 4318 return false; 4319 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8()) 4320 return false; 4321 4322 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4323 N->getOperand(0) }; 4324 ReplaceNode(N, 4325 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 4326 return true; 4327 } 4328 4329 // If the target is M Class then need to validate that the register string 4330 // is an acceptable value, so check that a mask can be constructed from the 4331 // string. 4332 if (Subtarget->isMClass()) { 4333 StringRef Flags = "", Reg = SpecialReg; 4334 if (Reg.endswith("_ns")) { 4335 Flags = "ns"; 4336 Reg = Reg.drop_back(3); 4337 } 4338 4339 int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget); 4340 if (SYSmValue == -1) 4341 return false; 4342 4343 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4344 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4345 N->getOperand(0) }; 4346 ReplaceNode( 4347 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 4348 return true; 4349 } 4350 4351 // Here we know the target is not M Class so we need to check if it is one 4352 // of the remaining possible values which are apsr, cpsr or spsr. 4353 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 4354 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4355 N->getOperand(0) }; 4356 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 4357 DL, MVT::i32, MVT::Other, Ops)); 4358 return true; 4359 } 4360 4361 if (SpecialReg == "spsr") { 4362 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4363 N->getOperand(0) }; 4364 ReplaceNode( 4365 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 4366 MVT::i32, MVT::Other, Ops)); 4367 return true; 4368 } 4369 4370 return false; 4371 } 4372 4373 // Lower the write_register intrinsic to ARM specific DAG nodes 4374 // using the supplied metadata string to select the instruction node to use 4375 // and the registers/masks to use in the nodes 4376 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 4377 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4378 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4379 bool IsThumb2 = Subtarget->isThumb2(); 4380 SDLoc DL(N); 4381 4382 std::vector<SDValue> Ops; 4383 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4384 4385 if (!Ops.empty()) { 4386 // If the special register string was constructed of fields (as defined 4387 // in the ACLE) then need to lower to MCR node (32 bit) or 4388 // MCRR node(64 bit), we can make the distinction based on the number of 4389 // operands we have. 4390 unsigned Opcode; 4391 if (Ops.size() == 5) { 4392 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 4393 Ops.insert(Ops.begin()+2, N->getOperand(2)); 4394 } else { 4395 assert(Ops.size() == 3 && 4396 "Invalid number of fields in special register string."); 4397 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 4398 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 4399 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 4400 } 4401 4402 Ops.push_back(getAL(CurDAG, DL)); 4403 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4404 Ops.push_back(N->getOperand(0)); 4405 4406 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4407 return true; 4408 } 4409 4410 std::string SpecialReg = RegString->getString().lower(); 4411 int BankedReg = getBankedRegisterMask(SpecialReg); 4412 if (BankedReg != -1) { 4413 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 4414 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4415 N->getOperand(0) }; 4416 ReplaceNode( 4417 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 4418 DL, MVT::Other, Ops)); 4419 return true; 4420 } 4421 4422 // The VFP registers are written to by creating SelectionDAG nodes with 4423 // opcodes corresponding to the register that is being written. So we switch 4424 // on the string to find which opcode we need to use. 4425 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4426 .Case("fpscr", ARM::VMSR) 4427 .Case("fpexc", ARM::VMSR_FPEXC) 4428 .Case("fpsid", ARM::VMSR_FPSID) 4429 .Case("fpinst", ARM::VMSR_FPINST) 4430 .Case("fpinst2", ARM::VMSR_FPINST2) 4431 .Default(0); 4432 4433 if (Opcode) { 4434 if (!Subtarget->hasVFP2()) 4435 return false; 4436 Ops = { N->getOperand(2), getAL(CurDAG, DL), 4437 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4438 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4439 return true; 4440 } 4441 4442 std::pair<StringRef, StringRef> Fields; 4443 Fields = StringRef(SpecialReg).rsplit('_'); 4444 std::string Reg = Fields.first.str(); 4445 StringRef Flags = Fields.second; 4446 4447 // If the target was M Class then need to validate the special register value 4448 // and retrieve the mask for use in the instruction node. 4449 if (Subtarget->isMClass()) { 4450 // basepri_max gets split so need to correct Reg and Flags. 4451 if (SpecialReg == "basepri_max") { 4452 Reg = SpecialReg; 4453 Flags = ""; 4454 } 4455 int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget); 4456 if (SYSmValue == -1) 4457 return false; 4458 4459 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4460 N->getOperand(2), getAL(CurDAG, DL), 4461 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4462 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 4463 return true; 4464 } 4465 4466 // We then check to see if a valid mask can be constructed for one of the 4467 // register string values permitted for the A and R class cores. These values 4468 // are apsr, spsr and cpsr; these are also valid on older cores. 4469 int Mask = getARClassRegisterMask(Reg, Flags); 4470 if (Mask != -1) { 4471 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 4472 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4473 N->getOperand(0) }; 4474 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 4475 DL, MVT::Other, Ops)); 4476 return true; 4477 } 4478 4479 return false; 4480 } 4481 4482 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 4483 std::vector<SDValue> AsmNodeOperands; 4484 unsigned Flag, Kind; 4485 bool Changed = false; 4486 unsigned NumOps = N->getNumOperands(); 4487 4488 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 4489 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 4490 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 4491 // respectively. Since there is no constraint to explicitly specify a 4492 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 4493 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 4494 // them into a GPRPair. 4495 4496 SDLoc dl(N); 4497 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 4498 : SDValue(nullptr,0); 4499 4500 SmallVector<bool, 8> OpChanged; 4501 // Glue node will be appended late. 4502 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 4503 SDValue op = N->getOperand(i); 4504 AsmNodeOperands.push_back(op); 4505 4506 if (i < InlineAsm::Op_FirstOperand) 4507 continue; 4508 4509 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 4510 Flag = C->getZExtValue(); 4511 Kind = InlineAsm::getKind(Flag); 4512 } 4513 else 4514 continue; 4515 4516 // Immediate operands to inline asm in the SelectionDAG are modeled with 4517 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 4518 // the second is a constant with the value of the immediate. If we get here 4519 // and we have a Kind_Imm, skip the next operand, and continue. 4520 if (Kind == InlineAsm::Kind_Imm) { 4521 SDValue op = N->getOperand(++i); 4522 AsmNodeOperands.push_back(op); 4523 continue; 4524 } 4525 4526 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 4527 if (NumRegs) 4528 OpChanged.push_back(false); 4529 4530 unsigned DefIdx = 0; 4531 bool IsTiedToChangedOp = false; 4532 // If it's a use that is tied with a previous def, it has no 4533 // reg class constraint. 4534 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 4535 IsTiedToChangedOp = OpChanged[DefIdx]; 4536 4537 // Memory operands to inline asm in the SelectionDAG are modeled with two 4538 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 4539 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 4540 // it doesn't get misinterpreted), and continue. We do this here because 4541 // it's important to update the OpChanged array correctly before moving on. 4542 if (Kind == InlineAsm::Kind_Mem) { 4543 SDValue op = N->getOperand(++i); 4544 AsmNodeOperands.push_back(op); 4545 continue; 4546 } 4547 4548 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 4549 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 4550 continue; 4551 4552 unsigned RC; 4553 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 4554 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 4555 || NumRegs != 2) 4556 continue; 4557 4558 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 4559 SDValue V0 = N->getOperand(i+1); 4560 SDValue V1 = N->getOperand(i+2); 4561 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 4562 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 4563 SDValue PairedReg; 4564 MachineRegisterInfo &MRI = MF->getRegInfo(); 4565 4566 if (Kind == InlineAsm::Kind_RegDef || 4567 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 4568 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 4569 // the original GPRs. 4570 4571 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 4572 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 4573 SDValue Chain = SDValue(N,0); 4574 4575 SDNode *GU = N->getGluedUser(); 4576 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 4577 Chain.getValue(1)); 4578 4579 // Extract values from a GPRPair reg and copy to the original GPR reg. 4580 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4581 RegCopy); 4582 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4583 RegCopy); 4584 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 4585 RegCopy.getValue(1)); 4586 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 4587 4588 // Update the original glue user. 4589 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 4590 Ops.push_back(T1.getValue(1)); 4591 CurDAG->UpdateNodeOperands(GU, Ops); 4592 } 4593 else { 4594 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 4595 // GPRPair and then pass the GPRPair to the inline asm. 4596 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 4597 4598 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 4599 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 4600 Chain.getValue(1)); 4601 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 4602 T0.getValue(1)); 4603 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 4604 4605 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 4606 // i32 VRs of inline asm with it. 4607 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 4608 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 4609 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 4610 4611 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 4612 Glue = Chain.getValue(1); 4613 } 4614 4615 Changed = true; 4616 4617 if(PairedReg.getNode()) { 4618 OpChanged[OpChanged.size() -1 ] = true; 4619 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 4620 if (IsTiedToChangedOp) 4621 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 4622 else 4623 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 4624 // Replace the current flag. 4625 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 4626 Flag, dl, MVT::i32); 4627 // Add the new register node and skip the original two GPRs. 4628 AsmNodeOperands.push_back(PairedReg); 4629 // Skip the next two GPRs. 4630 i += 2; 4631 } 4632 } 4633 4634 if (Glue.getNode()) 4635 AsmNodeOperands.push_back(Glue); 4636 if (!Changed) 4637 return false; 4638 4639 SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), 4640 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 4641 New->setNodeId(-1); 4642 ReplaceNode(N, New.getNode()); 4643 return true; 4644 } 4645 4646 4647 bool ARMDAGToDAGISel:: 4648 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 4649 std::vector<SDValue> &OutOps) { 4650 switch(ConstraintID) { 4651 default: 4652 llvm_unreachable("Unexpected asm memory constraint"); 4653 case InlineAsm::Constraint_i: 4654 // FIXME: It seems strange that 'i' is needed here since it's supposed to 4655 // be an immediate and not a memory constraint. 4656 LLVM_FALLTHROUGH; 4657 case InlineAsm::Constraint_m: 4658 case InlineAsm::Constraint_o: 4659 case InlineAsm::Constraint_Q: 4660 case InlineAsm::Constraint_Um: 4661 case InlineAsm::Constraint_Un: 4662 case InlineAsm::Constraint_Uq: 4663 case InlineAsm::Constraint_Us: 4664 case InlineAsm::Constraint_Ut: 4665 case InlineAsm::Constraint_Uv: 4666 case InlineAsm::Constraint_Uy: 4667 // Require the address to be in a register. That is safe for all ARM 4668 // variants and it is hard to do anything much smarter without knowing 4669 // how the operand is used. 4670 OutOps.push_back(Op); 4671 return false; 4672 } 4673 return true; 4674 } 4675 4676 /// createARMISelDag - This pass converts a legalized DAG into a 4677 /// ARM-specific DAG, ready for instruction scheduling. 4678 /// 4679 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 4680 CodeGenOpt::Level OptLevel) { 4681 return new ARMDAGToDAGISel(TM, OptLevel); 4682 } 4683