1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines an instruction selector for the ARM target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARM.h" 15 #include "ARMBaseInstrInfo.h" 16 #include "ARMTargetMachine.h" 17 #include "MCTargetDesc/ARMAddressingModes.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/IR/CallingConv.h" 26 #include "llvm/IR/Constants.h" 27 #include "llvm/IR/DerivedTypes.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/IR/Intrinsics.h" 30 #include "llvm/IR/LLVMContext.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Target/TargetLowering.h" 35 #include "llvm/Target/TargetOptions.h" 36 37 using namespace llvm; 38 39 #define DEBUG_TYPE "arm-isel" 40 41 static cl::opt<bool> 42 DisableShifterOp("disable-shifter-op", cl::Hidden, 43 cl::desc("Disable isel of shifter-op"), 44 cl::init(false)); 45 46 //===--------------------------------------------------------------------===// 47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 48 /// instructions for SelectionDAG operations. 49 /// 50 namespace { 51 52 enum AddrMode2Type { 53 AM2_BASE, // Simple AM2 (+-imm12) 54 AM2_SHOP // Shifter-op AM2 55 }; 56 57 class ARMDAGToDAGISel : public SelectionDAGISel { 58 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 59 /// make the right decision when generating code for different targets. 60 const ARMSubtarget *Subtarget; 61 62 public: 63 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 64 : SelectionDAGISel(tm, OptLevel) {} 65 66 bool runOnMachineFunction(MachineFunction &MF) override { 67 // Reset the subtarget each time through. 68 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 69 SelectionDAGISel::runOnMachineFunction(MF); 70 return true; 71 } 72 73 StringRef getPassName() const override { return "ARM Instruction Selection"; } 74 75 void PreprocessISelDAG() override; 76 77 /// getI32Imm - Return a target constant of type i32 with the specified 78 /// value. 79 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 80 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 81 } 82 83 void Select(SDNode *N) override; 84 85 bool hasNoVMLxHazardUse(SDNode *N) const; 86 bool isShifterOpProfitable(const SDValue &Shift, 87 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 88 bool SelectRegShifterOperand(SDValue N, SDValue &A, 89 SDValue &B, SDValue &C, 90 bool CheckProfitability = true); 91 bool SelectImmShifterOperand(SDValue N, SDValue &A, 92 SDValue &B, bool CheckProfitability = true); 93 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 94 SDValue &B, SDValue &C) { 95 // Don't apply the profitability check 96 return SelectRegShifterOperand(N, A, B, C, false); 97 } 98 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 99 SDValue &B) { 100 // Don't apply the profitability check 101 return SelectImmShifterOperand(N, A, B, false); 102 } 103 104 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 105 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 106 107 AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base, 108 SDValue &Offset, SDValue &Opc); 109 bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset, 110 SDValue &Opc) { 111 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE; 112 } 113 114 bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset, 115 SDValue &Opc) { 116 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP; 117 } 118 119 bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset, 120 SDValue &Opc) { 121 SelectAddrMode2Worker(N, Base, Offset, Opc); 122 // return SelectAddrMode2ShOp(N, Base, Offset, Opc); 123 // This always matches one way or another. 124 return true; 125 } 126 127 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 128 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 129 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 130 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 131 return true; 132 } 133 134 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 135 SDValue &Offset, SDValue &Opc); 136 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 137 SDValue &Offset, SDValue &Opc); 138 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 139 SDValue &Offset, SDValue &Opc); 140 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 141 bool SelectAddrMode3(SDValue N, SDValue &Base, 142 SDValue &Offset, SDValue &Opc); 143 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 144 SDValue &Offset, SDValue &Opc); 145 bool SelectAddrMode5(SDValue N, SDValue &Base, 146 SDValue &Offset); 147 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 148 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 149 150 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 151 152 // Thumb Addressing Modes: 153 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 154 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 155 SDValue &OffImm); 156 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 157 SDValue &OffImm); 158 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 159 SDValue &OffImm); 160 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 161 SDValue &OffImm); 162 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 163 164 // Thumb 2 Addressing Modes: 165 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 166 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 167 SDValue &OffImm); 168 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 169 SDValue &OffImm); 170 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 171 SDValue &OffReg, SDValue &ShImm); 172 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 173 174 inline bool is_so_imm(unsigned Imm) const { 175 return ARM_AM::getSOImmVal(Imm) != -1; 176 } 177 178 inline bool is_so_imm_not(unsigned Imm) const { 179 return ARM_AM::getSOImmVal(~Imm) != -1; 180 } 181 182 inline bool is_t2_so_imm(unsigned Imm) const { 183 return ARM_AM::getT2SOImmVal(Imm) != -1; 184 } 185 186 inline bool is_t2_so_imm_not(unsigned Imm) const { 187 return ARM_AM::getT2SOImmVal(~Imm) != -1; 188 } 189 190 // Include the pieces autogenerated from the target description. 191 #include "ARMGenDAGISel.inc" 192 193 private: 194 void transferMemOperands(SDNode *Src, SDNode *Dst); 195 196 /// Indexed (pre/post inc/dec) load matching code for ARM. 197 bool tryARMIndexedLoad(SDNode *N); 198 bool tryT1IndexedLoad(SDNode *N); 199 bool tryT2IndexedLoad(SDNode *N); 200 201 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 202 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 203 /// loads of D registers and even subregs and odd subregs of Q registers. 204 /// For NumVecs <= 2, QOpcodes1 is not used. 205 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 206 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 207 const uint16_t *QOpcodes1); 208 209 /// SelectVST - Select NEON store intrinsics. NumVecs should 210 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 211 /// stores of D registers and even subregs and odd subregs of Q registers. 212 /// For NumVecs <= 2, QOpcodes1 is not used. 213 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 214 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 215 const uint16_t *QOpcodes1); 216 217 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 218 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 219 /// load/store of D registers and Q registers. 220 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 221 unsigned NumVecs, const uint16_t *DOpcodes, 222 const uint16_t *QOpcodes); 223 224 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 225 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 226 /// for loading D registers. (Q registers are not supported.) 227 void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, 228 const uint16_t *DOpcodes, 229 const uint16_t *QOpcodes = nullptr); 230 231 /// Try to select SBFX/UBFX instructions for ARM. 232 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 233 234 // Select special operations if node forms integer ABS pattern 235 bool tryABSOp(SDNode *N); 236 237 bool tryReadRegister(SDNode *N); 238 bool tryWriteRegister(SDNode *N); 239 240 bool tryInlineAsm(SDNode *N); 241 242 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 243 244 void SelectCMP_SWAP(SDNode *N); 245 246 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 247 /// inline asm expressions. 248 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 249 std::vector<SDValue> &OutOps) override; 250 251 // Form pairs of consecutive R, S, D, or Q registers. 252 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 253 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 254 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 255 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 256 257 // Form sequences of 4 consecutive S, D, or Q registers. 258 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 259 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 260 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 261 262 // Get the alignment operand for a NEON VLD or VST instruction. 263 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 264 bool is64BitVector); 265 266 /// Returns the number of instructions required to materialize the given 267 /// constant in a register, or 3 if a literal pool load is needed. 268 unsigned ConstantMaterializationCost(unsigned Val) const; 269 270 /// Checks if N is a multiplication by a constant where we can extract out a 271 /// power of two from the constant so that it can be used in a shift, but only 272 /// if it simplifies the materialization of the constant. Returns true if it 273 /// is, and assigns to PowerOfTwo the power of two that should be extracted 274 /// out and to NewMulConst the new constant to be multiplied by. 275 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 276 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 277 278 /// Replace N with M in CurDAG, in a way that also ensures that M gets 279 /// selected when N would have been selected. 280 void replaceDAGValue(const SDValue &N, SDValue M); 281 }; 282 } 283 284 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 285 /// operand. If so Imm will receive the 32-bit value. 286 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 287 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 288 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 289 return true; 290 } 291 return false; 292 } 293 294 // isInt32Immediate - This method tests to see if a constant operand. 295 // If so Imm will receive the 32 bit value. 296 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 297 return isInt32Immediate(N.getNode(), Imm); 298 } 299 300 // isOpcWithIntImmediate - This method tests to see if the node is a specific 301 // opcode and that it has a immediate integer right operand. 302 // If so Imm will receive the 32 bit value. 303 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 304 return N->getOpcode() == Opc && 305 isInt32Immediate(N->getOperand(1).getNode(), Imm); 306 } 307 308 /// \brief Check whether a particular node is a constant value representable as 309 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 310 /// 311 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 312 static bool isScaledConstantInRange(SDValue Node, int Scale, 313 int RangeMin, int RangeMax, 314 int &ScaledConstant) { 315 assert(Scale > 0 && "Invalid scale!"); 316 317 // Check that this is a constant. 318 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 319 if (!C) 320 return false; 321 322 ScaledConstant = (int) C->getZExtValue(); 323 if ((ScaledConstant % Scale) != 0) 324 return false; 325 326 ScaledConstant /= Scale; 327 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 328 } 329 330 void ARMDAGToDAGISel::PreprocessISelDAG() { 331 if (!Subtarget->hasV6T2Ops()) 332 return; 333 334 bool isThumb2 = Subtarget->isThumb(); 335 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 336 E = CurDAG->allnodes_end(); I != E; ) { 337 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 338 339 if (N->getOpcode() != ISD::ADD) 340 continue; 341 342 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 343 // leading zeros, followed by consecutive set bits, followed by 1 or 2 344 // trailing zeros, e.g. 1020. 345 // Transform the expression to 346 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 347 // of trailing zeros of c2. The left shift would be folded as an shifter 348 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 349 // node (UBFX). 350 351 SDValue N0 = N->getOperand(0); 352 SDValue N1 = N->getOperand(1); 353 unsigned And_imm = 0; 354 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 355 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 356 std::swap(N0, N1); 357 } 358 if (!And_imm) 359 continue; 360 361 // Check if the AND mask is an immediate of the form: 000.....1111111100 362 unsigned TZ = countTrailingZeros(And_imm); 363 if (TZ != 1 && TZ != 2) 364 // Be conservative here. Shifter operands aren't always free. e.g. On 365 // Swift, left shifter operand of 1 / 2 for free but others are not. 366 // e.g. 367 // ubfx r3, r1, #16, #8 368 // ldr.w r3, [r0, r3, lsl #2] 369 // vs. 370 // mov.w r9, #1020 371 // and.w r2, r9, r1, lsr #14 372 // ldr r2, [r0, r2] 373 continue; 374 And_imm >>= TZ; 375 if (And_imm & (And_imm + 1)) 376 continue; 377 378 // Look for (and (srl X, c1), c2). 379 SDValue Srl = N1.getOperand(0); 380 unsigned Srl_imm = 0; 381 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 382 (Srl_imm <= 2)) 383 continue; 384 385 // Make sure first operand is not a shifter operand which would prevent 386 // folding of the left shift. 387 SDValue CPTmp0; 388 SDValue CPTmp1; 389 SDValue CPTmp2; 390 if (isThumb2) { 391 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 392 continue; 393 } else { 394 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 395 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 396 continue; 397 } 398 399 // Now make the transformation. 400 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 401 Srl.getOperand(0), 402 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 403 MVT::i32)); 404 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 405 Srl, 406 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 407 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 408 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 409 CurDAG->UpdateNodeOperands(N, N0, N1); 410 } 411 } 412 413 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 414 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 415 /// least on current ARM implementations) which should be avoidded. 416 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 417 if (OptLevel == CodeGenOpt::None) 418 return true; 419 420 if (!Subtarget->hasVMLxHazards()) 421 return true; 422 423 if (!N->hasOneUse()) 424 return false; 425 426 SDNode *Use = *N->use_begin(); 427 if (Use->getOpcode() == ISD::CopyToReg) 428 return true; 429 if (Use->isMachineOpcode()) { 430 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 431 CurDAG->getSubtarget().getInstrInfo()); 432 433 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 434 if (MCID.mayStore()) 435 return true; 436 unsigned Opcode = MCID.getOpcode(); 437 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 438 return true; 439 // vmlx feeding into another vmlx. We actually want to unfold 440 // the use later in the MLxExpansion pass. e.g. 441 // vmla 442 // vmla (stall 8 cycles) 443 // 444 // vmul (5 cycles) 445 // vadd (5 cycles) 446 // vmla 447 // This adds up to about 18 - 19 cycles. 448 // 449 // vmla 450 // vmul (stall 4 cycles) 451 // vadd adds up to about 14 cycles. 452 return TII->isFpMLxInstruction(Opcode); 453 } 454 455 return false; 456 } 457 458 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 459 ARM_AM::ShiftOpc ShOpcVal, 460 unsigned ShAmt) { 461 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 462 return true; 463 if (Shift.hasOneUse()) 464 return true; 465 // R << 2 is free. 466 return ShOpcVal == ARM_AM::lsl && 467 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 468 } 469 470 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { 471 if (Subtarget->isThumb()) { 472 if (Val <= 255) return 1; // MOV 473 if (Subtarget->hasV6T2Ops() && 474 (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1)) 475 return 1; // MOVW 476 if (Val <= 510) return 2; // MOV + ADDi8 477 if (~Val <= 255) return 2; // MOV + MVN 478 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL 479 } else { 480 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV 481 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN 482 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW 483 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs 484 } 485 if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT 486 return 3; // Literal pool load 487 } 488 489 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 490 unsigned MaxShift, 491 unsigned &PowerOfTwo, 492 SDValue &NewMulConst) const { 493 assert(N.getOpcode() == ISD::MUL); 494 assert(MaxShift > 0); 495 496 // If the multiply is used in more than one place then changing the constant 497 // will make other uses incorrect, so don't. 498 if (!N.hasOneUse()) return false; 499 // Check if the multiply is by a constant 500 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 501 if (!MulConst) return false; 502 // If the constant is used in more than one place then modifying it will mean 503 // we need to materialize two constants instead of one, which is a bad idea. 504 if (!MulConst->hasOneUse()) return false; 505 unsigned MulConstVal = MulConst->getZExtValue(); 506 if (MulConstVal == 0) return false; 507 508 // Find the largest power of 2 that MulConstVal is a multiple of 509 PowerOfTwo = MaxShift; 510 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 511 --PowerOfTwo; 512 if (PowerOfTwo == 0) return false; 513 } 514 515 // Only optimise if the new cost is better 516 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 517 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 518 unsigned OldCost = ConstantMaterializationCost(MulConstVal); 519 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal); 520 return NewCost < OldCost; 521 } 522 523 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 524 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 525 CurDAG->ReplaceAllUsesWith(N, M); 526 } 527 528 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 529 SDValue &BaseReg, 530 SDValue &Opc, 531 bool CheckProfitability) { 532 if (DisableShifterOp) 533 return false; 534 535 // If N is a multiply-by-constant and it's profitable to extract a shift and 536 // use it in a shifted operand do so. 537 if (N.getOpcode() == ISD::MUL) { 538 unsigned PowerOfTwo = 0; 539 SDValue NewMulConst; 540 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 541 HandleSDNode Handle(N); 542 SDLoc Loc(N); 543 replaceDAGValue(N.getOperand(1), NewMulConst); 544 BaseReg = Handle.getValue(); 545 Opc = CurDAG->getTargetConstant( 546 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 547 return true; 548 } 549 } 550 551 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 552 553 // Don't match base register only case. That is matched to a separate 554 // lower complexity pattern with explicit register operand. 555 if (ShOpcVal == ARM_AM::no_shift) return false; 556 557 BaseReg = N.getOperand(0); 558 unsigned ShImmVal = 0; 559 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 560 if (!RHS) return false; 561 ShImmVal = RHS->getZExtValue() & 31; 562 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 563 SDLoc(N), MVT::i32); 564 return true; 565 } 566 567 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 568 SDValue &BaseReg, 569 SDValue &ShReg, 570 SDValue &Opc, 571 bool CheckProfitability) { 572 if (DisableShifterOp) 573 return false; 574 575 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 576 577 // Don't match base register only case. That is matched to a separate 578 // lower complexity pattern with explicit register operand. 579 if (ShOpcVal == ARM_AM::no_shift) return false; 580 581 BaseReg = N.getOperand(0); 582 unsigned ShImmVal = 0; 583 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 584 if (RHS) return false; 585 586 ShReg = N.getOperand(1); 587 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 588 return false; 589 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 590 SDLoc(N), MVT::i32); 591 return true; 592 } 593 594 595 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 596 SDValue &Base, 597 SDValue &OffImm) { 598 // Match simple R + imm12 operands. 599 600 // Base only. 601 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 602 !CurDAG->isBaseWithConstantOffset(N)) { 603 if (N.getOpcode() == ISD::FrameIndex) { 604 // Match frame index. 605 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 606 Base = CurDAG->getTargetFrameIndex( 607 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 608 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 609 return true; 610 } 611 612 if (N.getOpcode() == ARMISD::Wrapper && 613 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 614 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 615 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 616 Base = N.getOperand(0); 617 } else 618 Base = N; 619 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 620 return true; 621 } 622 623 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 624 int RHSC = (int)RHS->getSExtValue(); 625 if (N.getOpcode() == ISD::SUB) 626 RHSC = -RHSC; 627 628 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 629 Base = N.getOperand(0); 630 if (Base.getOpcode() == ISD::FrameIndex) { 631 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 632 Base = CurDAG->getTargetFrameIndex( 633 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 634 } 635 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 636 return true; 637 } 638 } 639 640 // Base only. 641 Base = N; 642 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 643 return true; 644 } 645 646 647 648 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 649 SDValue &Opc) { 650 if (N.getOpcode() == ISD::MUL && 651 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 652 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 653 // X * [3,5,9] -> X + X * [2,4,8] etc. 654 int RHSC = (int)RHS->getZExtValue(); 655 if (RHSC & 1) { 656 RHSC = RHSC & ~1; 657 ARM_AM::AddrOpc AddSub = ARM_AM::add; 658 if (RHSC < 0) { 659 AddSub = ARM_AM::sub; 660 RHSC = - RHSC; 661 } 662 if (isPowerOf2_32(RHSC)) { 663 unsigned ShAmt = Log2_32(RHSC); 664 Base = Offset = N.getOperand(0); 665 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 666 ARM_AM::lsl), 667 SDLoc(N), MVT::i32); 668 return true; 669 } 670 } 671 } 672 } 673 674 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 675 // ISD::OR that is equivalent to an ISD::ADD. 676 !CurDAG->isBaseWithConstantOffset(N)) 677 return false; 678 679 // Leave simple R +/- imm12 operands for LDRi12 680 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 681 int RHSC; 682 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 683 -0x1000+1, 0x1000, RHSC)) // 12 bits. 684 return false; 685 } 686 687 // Otherwise this is R +/- [possibly shifted] R. 688 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 689 ARM_AM::ShiftOpc ShOpcVal = 690 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 691 unsigned ShAmt = 0; 692 693 Base = N.getOperand(0); 694 Offset = N.getOperand(1); 695 696 if (ShOpcVal != ARM_AM::no_shift) { 697 // Check to see if the RHS of the shift is a constant, if not, we can't fold 698 // it. 699 if (ConstantSDNode *Sh = 700 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 701 ShAmt = Sh->getZExtValue(); 702 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 703 Offset = N.getOperand(1).getOperand(0); 704 else { 705 ShAmt = 0; 706 ShOpcVal = ARM_AM::no_shift; 707 } 708 } else { 709 ShOpcVal = ARM_AM::no_shift; 710 } 711 } 712 713 // Try matching (R shl C) + (R). 714 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 715 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 716 N.getOperand(0).hasOneUse())) { 717 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 718 if (ShOpcVal != ARM_AM::no_shift) { 719 // Check to see if the RHS of the shift is a constant, if not, we can't 720 // fold it. 721 if (ConstantSDNode *Sh = 722 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 723 ShAmt = Sh->getZExtValue(); 724 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 725 Offset = N.getOperand(0).getOperand(0); 726 Base = N.getOperand(1); 727 } else { 728 ShAmt = 0; 729 ShOpcVal = ARM_AM::no_shift; 730 } 731 } else { 732 ShOpcVal = ARM_AM::no_shift; 733 } 734 } 735 } 736 737 // If Offset is a multiply-by-constant and it's profitable to extract a shift 738 // and use it in a shifted operand do so. 739 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 740 unsigned PowerOfTwo = 0; 741 SDValue NewMulConst; 742 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 743 replaceDAGValue(Offset.getOperand(1), NewMulConst); 744 ShAmt = PowerOfTwo; 745 ShOpcVal = ARM_AM::lsl; 746 } 747 } 748 749 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 750 SDLoc(N), MVT::i32); 751 return true; 752 } 753 754 755 //----- 756 757 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, 758 SDValue &Base, 759 SDValue &Offset, 760 SDValue &Opc) { 761 if (N.getOpcode() == ISD::MUL && 762 (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) { 763 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 764 // X * [3,5,9] -> X + X * [2,4,8] etc. 765 int RHSC = (int)RHS->getZExtValue(); 766 if (RHSC & 1) { 767 RHSC = RHSC & ~1; 768 ARM_AM::AddrOpc AddSub = ARM_AM::add; 769 if (RHSC < 0) { 770 AddSub = ARM_AM::sub; 771 RHSC = - RHSC; 772 } 773 if (isPowerOf2_32(RHSC)) { 774 unsigned ShAmt = Log2_32(RHSC); 775 Base = Offset = N.getOperand(0); 776 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 777 ARM_AM::lsl), 778 SDLoc(N), MVT::i32); 779 return AM2_SHOP; 780 } 781 } 782 } 783 } 784 785 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 786 // ISD::OR that is equivalent to an ADD. 787 !CurDAG->isBaseWithConstantOffset(N)) { 788 Base = N; 789 if (N.getOpcode() == ISD::FrameIndex) { 790 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 791 Base = CurDAG->getTargetFrameIndex( 792 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 793 } else if (N.getOpcode() == ARMISD::Wrapper && 794 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 795 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 796 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 797 Base = N.getOperand(0); 798 } 799 Offset = CurDAG->getRegister(0, MVT::i32); 800 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, 801 ARM_AM::no_shift), 802 SDLoc(N), MVT::i32); 803 return AM2_BASE; 804 } 805 806 // Match simple R +/- imm12 operands. 807 if (N.getOpcode() != ISD::SUB) { 808 int RHSC; 809 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 810 -0x1000+1, 0x1000, RHSC)) { // 12 bits. 811 Base = N.getOperand(0); 812 if (Base.getOpcode() == ISD::FrameIndex) { 813 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 814 Base = CurDAG->getTargetFrameIndex( 815 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 816 } 817 Offset = CurDAG->getRegister(0, MVT::i32); 818 819 ARM_AM::AddrOpc AddSub = ARM_AM::add; 820 if (RHSC < 0) { 821 AddSub = ARM_AM::sub; 822 RHSC = - RHSC; 823 } 824 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC, 825 ARM_AM::no_shift), 826 SDLoc(N), MVT::i32); 827 return AM2_BASE; 828 } 829 } 830 831 if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) { 832 // Compute R +/- (R << N) and reuse it. 833 Base = N; 834 Offset = CurDAG->getRegister(0, MVT::i32); 835 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, 836 ARM_AM::no_shift), 837 SDLoc(N), MVT::i32); 838 return AM2_BASE; 839 } 840 841 // Otherwise this is R +/- [possibly shifted] R. 842 ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub; 843 ARM_AM::ShiftOpc ShOpcVal = 844 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 845 unsigned ShAmt = 0; 846 847 Base = N.getOperand(0); 848 Offset = N.getOperand(1); 849 850 if (ShOpcVal != ARM_AM::no_shift) { 851 // Check to see if the RHS of the shift is a constant, if not, we can't fold 852 // it. 853 if (ConstantSDNode *Sh = 854 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 855 ShAmt = Sh->getZExtValue(); 856 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 857 Offset = N.getOperand(1).getOperand(0); 858 else { 859 ShAmt = 0; 860 ShOpcVal = ARM_AM::no_shift; 861 } 862 } else { 863 ShOpcVal = ARM_AM::no_shift; 864 } 865 } 866 867 // Try matching (R shl C) + (R). 868 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 869 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 870 N.getOperand(0).hasOneUse())) { 871 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 872 if (ShOpcVal != ARM_AM::no_shift) { 873 // Check to see if the RHS of the shift is a constant, if not, we can't 874 // fold it. 875 if (ConstantSDNode *Sh = 876 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 877 ShAmt = Sh->getZExtValue(); 878 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 879 Offset = N.getOperand(0).getOperand(0); 880 Base = N.getOperand(1); 881 } else { 882 ShAmt = 0; 883 ShOpcVal = ARM_AM::no_shift; 884 } 885 } else { 886 ShOpcVal = ARM_AM::no_shift; 887 } 888 } 889 } 890 891 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 892 SDLoc(N), MVT::i32); 893 return AM2_SHOP; 894 } 895 896 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 897 SDValue &Offset, SDValue &Opc) { 898 unsigned Opcode = Op->getOpcode(); 899 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 900 ? cast<LoadSDNode>(Op)->getAddressingMode() 901 : cast<StoreSDNode>(Op)->getAddressingMode(); 902 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 903 ? ARM_AM::add : ARM_AM::sub; 904 int Val; 905 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 906 return false; 907 908 Offset = N; 909 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 910 unsigned ShAmt = 0; 911 if (ShOpcVal != ARM_AM::no_shift) { 912 // Check to see if the RHS of the shift is a constant, if not, we can't fold 913 // it. 914 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 915 ShAmt = Sh->getZExtValue(); 916 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 917 Offset = N.getOperand(0); 918 else { 919 ShAmt = 0; 920 ShOpcVal = ARM_AM::no_shift; 921 } 922 } else { 923 ShOpcVal = ARM_AM::no_shift; 924 } 925 } 926 927 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 928 SDLoc(N), MVT::i32); 929 return true; 930 } 931 932 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 933 SDValue &Offset, SDValue &Opc) { 934 unsigned Opcode = Op->getOpcode(); 935 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 936 ? cast<LoadSDNode>(Op)->getAddressingMode() 937 : cast<StoreSDNode>(Op)->getAddressingMode(); 938 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 939 ? ARM_AM::add : ARM_AM::sub; 940 int Val; 941 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 942 if (AddSub == ARM_AM::sub) Val *= -1; 943 Offset = CurDAG->getRegister(0, MVT::i32); 944 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 945 return true; 946 } 947 948 return false; 949 } 950 951 952 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 953 SDValue &Offset, SDValue &Opc) { 954 unsigned Opcode = Op->getOpcode(); 955 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 956 ? cast<LoadSDNode>(Op)->getAddressingMode() 957 : cast<StoreSDNode>(Op)->getAddressingMode(); 958 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 959 ? ARM_AM::add : ARM_AM::sub; 960 int Val; 961 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 962 Offset = CurDAG->getRegister(0, MVT::i32); 963 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 964 ARM_AM::no_shift), 965 SDLoc(Op), MVT::i32); 966 return true; 967 } 968 969 return false; 970 } 971 972 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 973 Base = N; 974 return true; 975 } 976 977 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 978 SDValue &Base, SDValue &Offset, 979 SDValue &Opc) { 980 if (N.getOpcode() == ISD::SUB) { 981 // X - C is canonicalize to X + -C, no need to handle it here. 982 Base = N.getOperand(0); 983 Offset = N.getOperand(1); 984 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 985 MVT::i32); 986 return true; 987 } 988 989 if (!CurDAG->isBaseWithConstantOffset(N)) { 990 Base = N; 991 if (N.getOpcode() == ISD::FrameIndex) { 992 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 993 Base = CurDAG->getTargetFrameIndex( 994 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 995 } 996 Offset = CurDAG->getRegister(0, MVT::i32); 997 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 998 MVT::i32); 999 return true; 1000 } 1001 1002 // If the RHS is +/- imm8, fold into addr mode. 1003 int RHSC; 1004 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 1005 -256 + 1, 256, RHSC)) { // 8 bits. 1006 Base = N.getOperand(0); 1007 if (Base.getOpcode() == ISD::FrameIndex) { 1008 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1009 Base = CurDAG->getTargetFrameIndex( 1010 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1011 } 1012 Offset = CurDAG->getRegister(0, MVT::i32); 1013 1014 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1015 if (RHSC < 0) { 1016 AddSub = ARM_AM::sub; 1017 RHSC = -RHSC; 1018 } 1019 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 1020 MVT::i32); 1021 return true; 1022 } 1023 1024 Base = N.getOperand(0); 1025 Offset = N.getOperand(1); 1026 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 1027 MVT::i32); 1028 return true; 1029 } 1030 1031 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 1032 SDValue &Offset, SDValue &Opc) { 1033 unsigned Opcode = Op->getOpcode(); 1034 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1035 ? cast<LoadSDNode>(Op)->getAddressingMode() 1036 : cast<StoreSDNode>(Op)->getAddressingMode(); 1037 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 1038 ? ARM_AM::add : ARM_AM::sub; 1039 int Val; 1040 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 1041 Offset = CurDAG->getRegister(0, MVT::i32); 1042 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 1043 MVT::i32); 1044 return true; 1045 } 1046 1047 Offset = N; 1048 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 1049 MVT::i32); 1050 return true; 1051 } 1052 1053 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1054 SDValue &Base, SDValue &Offset) { 1055 if (!CurDAG->isBaseWithConstantOffset(N)) { 1056 Base = N; 1057 if (N.getOpcode() == ISD::FrameIndex) { 1058 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1059 Base = CurDAG->getTargetFrameIndex( 1060 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1061 } else if (N.getOpcode() == ARMISD::Wrapper && 1062 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1063 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1064 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1065 Base = N.getOperand(0); 1066 } 1067 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1068 SDLoc(N), MVT::i32); 1069 return true; 1070 } 1071 1072 // If the RHS is +/- imm8, fold into addr mode. 1073 int RHSC; 1074 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 1075 -256 + 1, 256, RHSC)) { 1076 Base = N.getOperand(0); 1077 if (Base.getOpcode() == ISD::FrameIndex) { 1078 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1079 Base = CurDAG->getTargetFrameIndex( 1080 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1081 } 1082 1083 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1084 if (RHSC < 0) { 1085 AddSub = ARM_AM::sub; 1086 RHSC = -RHSC; 1087 } 1088 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1089 SDLoc(N), MVT::i32); 1090 return true; 1091 } 1092 1093 Base = N; 1094 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1095 SDLoc(N), MVT::i32); 1096 return true; 1097 } 1098 1099 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1100 SDValue &Align) { 1101 Addr = N; 1102 1103 unsigned Alignment = 0; 1104 1105 MemSDNode *MemN = cast<MemSDNode>(Parent); 1106 1107 if (isa<LSBaseSDNode>(MemN) || 1108 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1109 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1110 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1111 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1112 // The maximum alignment is equal to the memory size being referenced. 1113 unsigned MMOAlign = MemN->getAlignment(); 1114 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1115 if (MMOAlign >= MemSize && MemSize > 1) 1116 Alignment = MemSize; 1117 } else { 1118 // All other uses of addrmode6 are for intrinsics. For now just record 1119 // the raw alignment value; it will be refined later based on the legal 1120 // alignment operands for the intrinsic. 1121 Alignment = MemN->getAlignment(); 1122 } 1123 1124 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1125 return true; 1126 } 1127 1128 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1129 SDValue &Offset) { 1130 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1131 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1132 if (AM != ISD::POST_INC) 1133 return false; 1134 Offset = N; 1135 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1136 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1137 Offset = CurDAG->getRegister(0, MVT::i32); 1138 } 1139 return true; 1140 } 1141 1142 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1143 SDValue &Offset, SDValue &Label) { 1144 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1145 Offset = N.getOperand(0); 1146 SDValue N1 = N.getOperand(1); 1147 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1148 SDLoc(N), MVT::i32); 1149 return true; 1150 } 1151 1152 return false; 1153 } 1154 1155 1156 //===----------------------------------------------------------------------===// 1157 // Thumb Addressing Modes 1158 //===----------------------------------------------------------------------===// 1159 1160 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, 1161 SDValue &Base, SDValue &Offset){ 1162 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1163 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1164 if (!NC || !NC->isNullValue()) 1165 return false; 1166 1167 Base = Offset = N; 1168 return true; 1169 } 1170 1171 Base = N.getOperand(0); 1172 Offset = N.getOperand(1); 1173 return true; 1174 } 1175 1176 bool 1177 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1178 SDValue &Base, SDValue &OffImm) { 1179 if (!CurDAG->isBaseWithConstantOffset(N)) { 1180 if (N.getOpcode() == ISD::ADD) { 1181 return false; // We want to select register offset instead 1182 } else if (N.getOpcode() == ARMISD::Wrapper && 1183 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1184 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1185 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1186 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1187 Base = N.getOperand(0); 1188 } else { 1189 Base = N; 1190 } 1191 1192 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1193 return true; 1194 } 1195 1196 // If the RHS is + imm5 * scale, fold into addr mode. 1197 int RHSC; 1198 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1199 Base = N.getOperand(0); 1200 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1201 return true; 1202 } 1203 1204 // Offset is too large, so use register offset instead. 1205 return false; 1206 } 1207 1208 bool 1209 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1210 SDValue &OffImm) { 1211 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1212 } 1213 1214 bool 1215 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1216 SDValue &OffImm) { 1217 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1218 } 1219 1220 bool 1221 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1222 SDValue &OffImm) { 1223 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1224 } 1225 1226 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1227 SDValue &Base, SDValue &OffImm) { 1228 if (N.getOpcode() == ISD::FrameIndex) { 1229 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1230 // Only multiples of 4 are allowed for the offset, so the frame object 1231 // alignment must be at least 4. 1232 MachineFrameInfo &MFI = MF->getFrameInfo(); 1233 if (MFI.getObjectAlignment(FI) < 4) 1234 MFI.setObjectAlignment(FI, 4); 1235 Base = CurDAG->getTargetFrameIndex( 1236 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1237 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1238 return true; 1239 } 1240 1241 if (!CurDAG->isBaseWithConstantOffset(N)) 1242 return false; 1243 1244 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); 1245 if (N.getOperand(0).getOpcode() == ISD::FrameIndex || 1246 (LHSR && LHSR->getReg() == ARM::SP)) { 1247 // If the RHS is + imm8 * scale, fold into addr mode. 1248 int RHSC; 1249 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1250 Base = N.getOperand(0); 1251 if (Base.getOpcode() == ISD::FrameIndex) { 1252 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1253 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1254 // indexed by the LHS must be 4-byte aligned. 1255 MachineFrameInfo &MFI = MF->getFrameInfo(); 1256 if (MFI.getObjectAlignment(FI) < 4) 1257 MFI.setObjectAlignment(FI, 4); 1258 Base = CurDAG->getTargetFrameIndex( 1259 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1260 } 1261 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1262 return true; 1263 } 1264 } 1265 1266 return false; 1267 } 1268 1269 1270 //===----------------------------------------------------------------------===// 1271 // Thumb 2 Addressing Modes 1272 //===----------------------------------------------------------------------===// 1273 1274 1275 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1276 SDValue &Base, SDValue &OffImm) { 1277 // Match simple R + imm12 operands. 1278 1279 // Base only. 1280 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1281 !CurDAG->isBaseWithConstantOffset(N)) { 1282 if (N.getOpcode() == ISD::FrameIndex) { 1283 // Match frame index. 1284 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1285 Base = CurDAG->getTargetFrameIndex( 1286 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1287 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1288 return true; 1289 } 1290 1291 if (N.getOpcode() == ARMISD::Wrapper && 1292 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1293 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1294 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1295 Base = N.getOperand(0); 1296 if (Base.getOpcode() == ISD::TargetConstantPool) 1297 return false; // We want to select t2LDRpci instead. 1298 } else 1299 Base = N; 1300 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1301 return true; 1302 } 1303 1304 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1305 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1306 // Let t2LDRi8 handle (R - imm8). 1307 return false; 1308 1309 int RHSC = (int)RHS->getZExtValue(); 1310 if (N.getOpcode() == ISD::SUB) 1311 RHSC = -RHSC; 1312 1313 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1314 Base = N.getOperand(0); 1315 if (Base.getOpcode() == ISD::FrameIndex) { 1316 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1317 Base = CurDAG->getTargetFrameIndex( 1318 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1319 } 1320 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1321 return true; 1322 } 1323 } 1324 1325 // Base only. 1326 Base = N; 1327 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1328 return true; 1329 } 1330 1331 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1332 SDValue &Base, SDValue &OffImm) { 1333 // Match simple R - imm8 operands. 1334 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1335 !CurDAG->isBaseWithConstantOffset(N)) 1336 return false; 1337 1338 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1339 int RHSC = (int)RHS->getSExtValue(); 1340 if (N.getOpcode() == ISD::SUB) 1341 RHSC = -RHSC; 1342 1343 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1344 Base = N.getOperand(0); 1345 if (Base.getOpcode() == ISD::FrameIndex) { 1346 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1347 Base = CurDAG->getTargetFrameIndex( 1348 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1349 } 1350 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1351 return true; 1352 } 1353 } 1354 1355 return false; 1356 } 1357 1358 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1359 SDValue &OffImm){ 1360 unsigned Opcode = Op->getOpcode(); 1361 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1362 ? cast<LoadSDNode>(Op)->getAddressingMode() 1363 : cast<StoreSDNode>(Op)->getAddressingMode(); 1364 int RHSC; 1365 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1366 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1367 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1368 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1369 return true; 1370 } 1371 1372 return false; 1373 } 1374 1375 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1376 SDValue &Base, 1377 SDValue &OffReg, SDValue &ShImm) { 1378 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1379 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1380 return false; 1381 1382 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1383 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1384 int RHSC = (int)RHS->getZExtValue(); 1385 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1386 return false; 1387 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1388 return false; 1389 } 1390 1391 // Look for (R + R) or (R + (R << [1,2,3])). 1392 unsigned ShAmt = 0; 1393 Base = N.getOperand(0); 1394 OffReg = N.getOperand(1); 1395 1396 // Swap if it is ((R << c) + R). 1397 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1398 if (ShOpcVal != ARM_AM::lsl) { 1399 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1400 if (ShOpcVal == ARM_AM::lsl) 1401 std::swap(Base, OffReg); 1402 } 1403 1404 if (ShOpcVal == ARM_AM::lsl) { 1405 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1406 // it. 1407 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1408 ShAmt = Sh->getZExtValue(); 1409 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1410 OffReg = OffReg.getOperand(0); 1411 else { 1412 ShAmt = 0; 1413 } 1414 } 1415 } 1416 1417 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1418 // and use it in a shifted operand do so. 1419 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1420 unsigned PowerOfTwo = 0; 1421 SDValue NewMulConst; 1422 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1423 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1424 ShAmt = PowerOfTwo; 1425 } 1426 } 1427 1428 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1429 1430 return true; 1431 } 1432 1433 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1434 SDValue &OffImm) { 1435 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1436 // instructions. 1437 Base = N; 1438 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1439 1440 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1441 return true; 1442 1443 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1444 if (!RHS) 1445 return true; 1446 1447 uint32_t RHSC = (int)RHS->getZExtValue(); 1448 if (RHSC > 1020 || RHSC % 4 != 0) 1449 return true; 1450 1451 Base = N.getOperand(0); 1452 if (Base.getOpcode() == ISD::FrameIndex) { 1453 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1454 Base = CurDAG->getTargetFrameIndex( 1455 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1456 } 1457 1458 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1459 return true; 1460 } 1461 1462 //===--------------------------------------------------------------------===// 1463 1464 /// getAL - Returns a ARMCC::AL immediate node. 1465 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1466 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1467 } 1468 1469 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1470 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1471 MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); 1472 cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); 1473 } 1474 1475 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1476 LoadSDNode *LD = cast<LoadSDNode>(N); 1477 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1478 if (AM == ISD::UNINDEXED) 1479 return false; 1480 1481 EVT LoadedVT = LD->getMemoryVT(); 1482 SDValue Offset, AMOpc; 1483 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1484 unsigned Opcode = 0; 1485 bool Match = false; 1486 if (LoadedVT == MVT::i32 && isPre && 1487 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1488 Opcode = ARM::LDR_PRE_IMM; 1489 Match = true; 1490 } else if (LoadedVT == MVT::i32 && !isPre && 1491 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1492 Opcode = ARM::LDR_POST_IMM; 1493 Match = true; 1494 } else if (LoadedVT == MVT::i32 && 1495 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1496 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1497 Match = true; 1498 1499 } else if (LoadedVT == MVT::i16 && 1500 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1501 Match = true; 1502 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1503 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1504 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1505 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1506 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1507 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1508 Match = true; 1509 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1510 } 1511 } else { 1512 if (isPre && 1513 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1514 Match = true; 1515 Opcode = ARM::LDRB_PRE_IMM; 1516 } else if (!isPre && 1517 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1518 Match = true; 1519 Opcode = ARM::LDRB_POST_IMM; 1520 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1521 Match = true; 1522 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1523 } 1524 } 1525 } 1526 1527 if (Match) { 1528 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1529 SDValue Chain = LD->getChain(); 1530 SDValue Base = LD->getBasePtr(); 1531 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1532 CurDAG->getRegister(0, MVT::i32), Chain }; 1533 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1534 MVT::Other, Ops); 1535 transferMemOperands(N, New); 1536 ReplaceNode(N, New); 1537 return true; 1538 } else { 1539 SDValue Chain = LD->getChain(); 1540 SDValue Base = LD->getBasePtr(); 1541 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1542 CurDAG->getRegister(0, MVT::i32), Chain }; 1543 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1544 MVT::Other, Ops); 1545 transferMemOperands(N, New); 1546 ReplaceNode(N, New); 1547 return true; 1548 } 1549 } 1550 1551 return false; 1552 } 1553 1554 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1555 LoadSDNode *LD = cast<LoadSDNode>(N); 1556 EVT LoadedVT = LD->getMemoryVT(); 1557 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1558 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1559 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1560 return false; 1561 1562 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1563 if (!COffs || COffs->getZExtValue() != 4) 1564 return false; 1565 1566 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1567 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1568 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1569 // ISel. 1570 SDValue Chain = LD->getChain(); 1571 SDValue Base = LD->getBasePtr(); 1572 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1573 CurDAG->getRegister(0, MVT::i32), Chain }; 1574 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1575 MVT::i32, MVT::Other, Ops); 1576 transferMemOperands(N, New); 1577 ReplaceNode(N, New); 1578 return true; 1579 } 1580 1581 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1582 LoadSDNode *LD = cast<LoadSDNode>(N); 1583 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1584 if (AM == ISD::UNINDEXED) 1585 return false; 1586 1587 EVT LoadedVT = LD->getMemoryVT(); 1588 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1589 SDValue Offset; 1590 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1591 unsigned Opcode = 0; 1592 bool Match = false; 1593 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1594 switch (LoadedVT.getSimpleVT().SimpleTy) { 1595 case MVT::i32: 1596 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1597 break; 1598 case MVT::i16: 1599 if (isSExtLd) 1600 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1601 else 1602 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1603 break; 1604 case MVT::i8: 1605 case MVT::i1: 1606 if (isSExtLd) 1607 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1608 else 1609 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1610 break; 1611 default: 1612 return false; 1613 } 1614 Match = true; 1615 } 1616 1617 if (Match) { 1618 SDValue Chain = LD->getChain(); 1619 SDValue Base = LD->getBasePtr(); 1620 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1621 CurDAG->getRegister(0, MVT::i32), Chain }; 1622 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1623 MVT::Other, Ops); 1624 transferMemOperands(N, New); 1625 ReplaceNode(N, New); 1626 return true; 1627 } 1628 1629 return false; 1630 } 1631 1632 /// \brief Form a GPRPair pseudo register from a pair of GPR regs. 1633 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1634 SDLoc dl(V0.getNode()); 1635 SDValue RegClass = 1636 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1637 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1638 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1639 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1640 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1641 } 1642 1643 /// \brief Form a D register from a pair of S registers. 1644 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1645 SDLoc dl(V0.getNode()); 1646 SDValue RegClass = 1647 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1648 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1649 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1650 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1651 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1652 } 1653 1654 /// \brief Form a quad register from a pair of D registers. 1655 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1656 SDLoc dl(V0.getNode()); 1657 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1658 MVT::i32); 1659 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1660 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1661 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1662 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1663 } 1664 1665 /// \brief Form 4 consecutive D registers from a pair of Q registers. 1666 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1667 SDLoc dl(V0.getNode()); 1668 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1669 MVT::i32); 1670 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1671 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1672 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1673 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1674 } 1675 1676 /// \brief Form 4 consecutive S registers. 1677 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1678 SDValue V2, SDValue V3) { 1679 SDLoc dl(V0.getNode()); 1680 SDValue RegClass = 1681 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1682 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1683 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1684 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1685 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1686 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1687 V2, SubReg2, V3, SubReg3 }; 1688 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1689 } 1690 1691 /// \brief Form 4 consecutive D registers. 1692 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1693 SDValue V2, SDValue V3) { 1694 SDLoc dl(V0.getNode()); 1695 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1696 MVT::i32); 1697 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1698 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1699 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1700 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1701 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1702 V2, SubReg2, V3, SubReg3 }; 1703 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1704 } 1705 1706 /// \brief Form 4 consecutive Q registers. 1707 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1708 SDValue V2, SDValue V3) { 1709 SDLoc dl(V0.getNode()); 1710 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1711 MVT::i32); 1712 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1713 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1714 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1715 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1716 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1717 V2, SubReg2, V3, SubReg3 }; 1718 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1719 } 1720 1721 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1722 /// of a NEON VLD or VST instruction. The supported values depend on the 1723 /// number of registers being loaded. 1724 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1725 unsigned NumVecs, bool is64BitVector) { 1726 unsigned NumRegs = NumVecs; 1727 if (!is64BitVector && NumVecs < 3) 1728 NumRegs *= 2; 1729 1730 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1731 if (Alignment >= 32 && NumRegs == 4) 1732 Alignment = 32; 1733 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1734 Alignment = 16; 1735 else if (Alignment >= 8) 1736 Alignment = 8; 1737 else 1738 Alignment = 0; 1739 1740 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1741 } 1742 1743 static bool isVLDfixed(unsigned Opc) 1744 { 1745 switch (Opc) { 1746 default: return false; 1747 case ARM::VLD1d8wb_fixed : return true; 1748 case ARM::VLD1d16wb_fixed : return true; 1749 case ARM::VLD1d64Qwb_fixed : return true; 1750 case ARM::VLD1d32wb_fixed : return true; 1751 case ARM::VLD1d64wb_fixed : return true; 1752 case ARM::VLD1d64TPseudoWB_fixed : return true; 1753 case ARM::VLD1d64QPseudoWB_fixed : return true; 1754 case ARM::VLD1q8wb_fixed : return true; 1755 case ARM::VLD1q16wb_fixed : return true; 1756 case ARM::VLD1q32wb_fixed : return true; 1757 case ARM::VLD1q64wb_fixed : return true; 1758 case ARM::VLD1DUPd8wb_fixed : return true; 1759 case ARM::VLD1DUPd16wb_fixed : return true; 1760 case ARM::VLD1DUPd32wb_fixed : return true; 1761 case ARM::VLD1DUPq8wb_fixed : return true; 1762 case ARM::VLD1DUPq16wb_fixed : return true; 1763 case ARM::VLD1DUPq32wb_fixed : return true; 1764 case ARM::VLD2d8wb_fixed : return true; 1765 case ARM::VLD2d16wb_fixed : return true; 1766 case ARM::VLD2d32wb_fixed : return true; 1767 case ARM::VLD2q8PseudoWB_fixed : return true; 1768 case ARM::VLD2q16PseudoWB_fixed : return true; 1769 case ARM::VLD2q32PseudoWB_fixed : return true; 1770 case ARM::VLD2DUPd8wb_fixed : return true; 1771 case ARM::VLD2DUPd16wb_fixed : return true; 1772 case ARM::VLD2DUPd32wb_fixed : return true; 1773 } 1774 } 1775 1776 static bool isVSTfixed(unsigned Opc) 1777 { 1778 switch (Opc) { 1779 default: return false; 1780 case ARM::VST1d8wb_fixed : return true; 1781 case ARM::VST1d16wb_fixed : return true; 1782 case ARM::VST1d32wb_fixed : return true; 1783 case ARM::VST1d64wb_fixed : return true; 1784 case ARM::VST1q8wb_fixed : return true; 1785 case ARM::VST1q16wb_fixed : return true; 1786 case ARM::VST1q32wb_fixed : return true; 1787 case ARM::VST1q64wb_fixed : return true; 1788 case ARM::VST1d64TPseudoWB_fixed : return true; 1789 case ARM::VST1d64QPseudoWB_fixed : return true; 1790 case ARM::VST2d8wb_fixed : return true; 1791 case ARM::VST2d16wb_fixed : return true; 1792 case ARM::VST2d32wb_fixed : return true; 1793 case ARM::VST2q8PseudoWB_fixed : return true; 1794 case ARM::VST2q16PseudoWB_fixed : return true; 1795 case ARM::VST2q32PseudoWB_fixed : return true; 1796 } 1797 } 1798 1799 // Get the register stride update opcode of a VLD/VST instruction that 1800 // is otherwise equivalent to the given fixed stride updating instruction. 1801 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1802 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 1803 && "Incorrect fixed stride updating instruction."); 1804 switch (Opc) { 1805 default: break; 1806 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1807 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 1808 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 1809 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 1810 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 1811 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 1812 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 1813 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 1814 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 1815 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 1816 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 1817 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 1818 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 1819 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 1820 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 1821 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 1822 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 1823 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 1824 1825 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 1826 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 1827 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 1828 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 1829 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 1830 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 1831 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 1832 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 1833 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 1834 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 1835 1836 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 1837 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 1838 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 1839 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 1840 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 1841 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 1842 1843 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 1844 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 1845 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 1846 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 1847 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 1848 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 1849 1850 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 1851 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 1852 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 1853 } 1854 return Opc; // If not one we handle, return it unchanged. 1855 } 1856 1857 /// Returns true if the given increment is a Constant known to be equal to the 1858 /// access size performed by a NEON load/store. This means the "[rN]!" form can 1859 /// be used. 1860 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 1861 auto C = dyn_cast<ConstantSDNode>(Inc); 1862 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 1863 } 1864 1865 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 1866 const uint16_t *DOpcodes, 1867 const uint16_t *QOpcodes0, 1868 const uint16_t *QOpcodes1) { 1869 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 1870 SDLoc dl(N); 1871 1872 SDValue MemAddr, Align; 1873 unsigned AddrOpIdx = isUpdating ? 1 : 2; 1874 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1875 return; 1876 1877 SDValue Chain = N->getOperand(0); 1878 EVT VT = N->getValueType(0); 1879 bool is64BitVector = VT.is64BitVector(); 1880 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 1881 1882 unsigned OpcodeIndex; 1883 switch (VT.getSimpleVT().SimpleTy) { 1884 default: llvm_unreachable("unhandled vld type"); 1885 // Double-register operations: 1886 case MVT::v8i8: OpcodeIndex = 0; break; 1887 case MVT::v4i16: OpcodeIndex = 1; break; 1888 case MVT::v2f32: 1889 case MVT::v2i32: OpcodeIndex = 2; break; 1890 case MVT::v1i64: OpcodeIndex = 3; break; 1891 // Quad-register operations: 1892 case MVT::v16i8: OpcodeIndex = 0; break; 1893 case MVT::v8i16: OpcodeIndex = 1; break; 1894 case MVT::v4f32: 1895 case MVT::v4i32: OpcodeIndex = 2; break; 1896 case MVT::v2f64: 1897 case MVT::v2i64: OpcodeIndex = 3; 1898 assert(NumVecs == 1 && "v2i64 type only supported for VLD1"); 1899 break; 1900 } 1901 1902 EVT ResTy; 1903 if (NumVecs == 1) 1904 ResTy = VT; 1905 else { 1906 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 1907 if (!is64BitVector) 1908 ResTyElts *= 2; 1909 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 1910 } 1911 std::vector<EVT> ResTys; 1912 ResTys.push_back(ResTy); 1913 if (isUpdating) 1914 ResTys.push_back(MVT::i32); 1915 ResTys.push_back(MVT::Other); 1916 1917 SDValue Pred = getAL(CurDAG, dl); 1918 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1919 SDNode *VLd; 1920 SmallVector<SDValue, 7> Ops; 1921 1922 // Double registers and VLD1/VLD2 quad registers are directly supported. 1923 if (is64BitVector || NumVecs <= 2) { 1924 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 1925 QOpcodes0[OpcodeIndex]); 1926 Ops.push_back(MemAddr); 1927 Ops.push_back(Align); 1928 if (isUpdating) { 1929 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1930 // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 1931 // case entirely when the rest are updated to that form, too. 1932 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 1933 if ((NumVecs <= 2) && !IsImmUpdate) 1934 Opc = getVLDSTRegisterUpdateOpcode(Opc); 1935 // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 1936 // check for that explicitly too. Horribly hacky, but temporary. 1937 if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate) 1938 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 1939 } 1940 Ops.push_back(Pred); 1941 Ops.push_back(Reg0); 1942 Ops.push_back(Chain); 1943 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1944 1945 } else { 1946 // Otherwise, quad registers are loaded with two separate instructions, 1947 // where one loads the even registers and the other loads the odd registers. 1948 EVT AddrTy = MemAddr.getValueType(); 1949 1950 // Load the even subregs. This is always an updating load, so that it 1951 // provides the address to the second load for the odd subregs. 1952 SDValue ImplDef = 1953 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 1954 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 1955 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 1956 ResTy, AddrTy, MVT::Other, OpsA); 1957 Chain = SDValue(VLdA, 2); 1958 1959 // Load the odd subregs. 1960 Ops.push_back(SDValue(VLdA, 1)); 1961 Ops.push_back(Align); 1962 if (isUpdating) { 1963 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1964 assert(isa<ConstantSDNode>(Inc.getNode()) && 1965 "only constant post-increment update allowed for VLD3/4"); 1966 (void)Inc; 1967 Ops.push_back(Reg0); 1968 } 1969 Ops.push_back(SDValue(VLdA, 0)); 1970 Ops.push_back(Pred); 1971 Ops.push_back(Reg0); 1972 Ops.push_back(Chain); 1973 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 1974 } 1975 1976 // Transfer memoperands. 1977 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1978 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1979 cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1); 1980 1981 if (NumVecs == 1) { 1982 ReplaceNode(N, VLd); 1983 return; 1984 } 1985 1986 // Extract out the subregisters. 1987 SDValue SuperReg = SDValue(VLd, 0); 1988 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 1989 ARM::qsub_3 == ARM::qsub_0 + 3, 1990 "Unexpected subreg numbering"); 1991 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 1992 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 1993 ReplaceUses(SDValue(N, Vec), 1994 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 1995 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 1996 if (isUpdating) 1997 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 1998 CurDAG->RemoveDeadNode(N); 1999 } 2000 2001 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2002 const uint16_t *DOpcodes, 2003 const uint16_t *QOpcodes0, 2004 const uint16_t *QOpcodes1) { 2005 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2006 SDLoc dl(N); 2007 2008 SDValue MemAddr, Align; 2009 unsigned AddrOpIdx = isUpdating ? 1 : 2; 2010 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2011 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2012 return; 2013 2014 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2015 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2016 2017 SDValue Chain = N->getOperand(0); 2018 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2019 bool is64BitVector = VT.is64BitVector(); 2020 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2021 2022 unsigned OpcodeIndex; 2023 switch (VT.getSimpleVT().SimpleTy) { 2024 default: llvm_unreachable("unhandled vst type"); 2025 // Double-register operations: 2026 case MVT::v8i8: OpcodeIndex = 0; break; 2027 case MVT::v4i16: OpcodeIndex = 1; break; 2028 case MVT::v2f32: 2029 case MVT::v2i32: OpcodeIndex = 2; break; 2030 case MVT::v1i64: OpcodeIndex = 3; break; 2031 // Quad-register operations: 2032 case MVT::v16i8: OpcodeIndex = 0; break; 2033 case MVT::v8i16: OpcodeIndex = 1; break; 2034 case MVT::v4f32: 2035 case MVT::v4i32: OpcodeIndex = 2; break; 2036 case MVT::v2f64: 2037 case MVT::v2i64: OpcodeIndex = 3; 2038 assert(NumVecs == 1 && "v2i64 type only supported for VST1"); 2039 break; 2040 } 2041 2042 std::vector<EVT> ResTys; 2043 if (isUpdating) 2044 ResTys.push_back(MVT::i32); 2045 ResTys.push_back(MVT::Other); 2046 2047 SDValue Pred = getAL(CurDAG, dl); 2048 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2049 SmallVector<SDValue, 7> Ops; 2050 2051 // Double registers and VST1/VST2 quad registers are directly supported. 2052 if (is64BitVector || NumVecs <= 2) { 2053 SDValue SrcReg; 2054 if (NumVecs == 1) { 2055 SrcReg = N->getOperand(Vec0Idx); 2056 } else if (is64BitVector) { 2057 // Form a REG_SEQUENCE to force register allocation. 2058 SDValue V0 = N->getOperand(Vec0Idx + 0); 2059 SDValue V1 = N->getOperand(Vec0Idx + 1); 2060 if (NumVecs == 2) 2061 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2062 else { 2063 SDValue V2 = N->getOperand(Vec0Idx + 2); 2064 // If it's a vst3, form a quad D-register and leave the last part as 2065 // an undef. 2066 SDValue V3 = (NumVecs == 3) 2067 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2068 : N->getOperand(Vec0Idx + 3); 2069 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2070 } 2071 } else { 2072 // Form a QQ register. 2073 SDValue Q0 = N->getOperand(Vec0Idx); 2074 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2075 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2076 } 2077 2078 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2079 QOpcodes0[OpcodeIndex]); 2080 Ops.push_back(MemAddr); 2081 Ops.push_back(Align); 2082 if (isUpdating) { 2083 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2084 // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 2085 // case entirely when the rest are updated to that form, too. 2086 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2087 if (NumVecs <= 2 && !IsImmUpdate) 2088 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2089 // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so 2090 // check for that explicitly too. Horribly hacky, but temporary. 2091 if (!IsImmUpdate) 2092 Ops.push_back(Inc); 2093 else if (NumVecs > 2 && !isVSTfixed(Opc)) 2094 Ops.push_back(Reg0); 2095 } 2096 Ops.push_back(SrcReg); 2097 Ops.push_back(Pred); 2098 Ops.push_back(Reg0); 2099 Ops.push_back(Chain); 2100 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2101 2102 // Transfer memoperands. 2103 cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); 2104 2105 ReplaceNode(N, VSt); 2106 return; 2107 } 2108 2109 // Otherwise, quad registers are stored with two separate instructions, 2110 // where one stores the even registers and the other stores the odd registers. 2111 2112 // Form the QQQQ REG_SEQUENCE. 2113 SDValue V0 = N->getOperand(Vec0Idx + 0); 2114 SDValue V1 = N->getOperand(Vec0Idx + 1); 2115 SDValue V2 = N->getOperand(Vec0Idx + 2); 2116 SDValue V3 = (NumVecs == 3) 2117 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2118 : N->getOperand(Vec0Idx + 3); 2119 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2120 2121 // Store the even D registers. This is always an updating store, so that it 2122 // provides the address to the second store for the odd subregs. 2123 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2124 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2125 MemAddr.getValueType(), 2126 MVT::Other, OpsA); 2127 cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1); 2128 Chain = SDValue(VStA, 1); 2129 2130 // Store the odd D registers. 2131 Ops.push_back(SDValue(VStA, 0)); 2132 Ops.push_back(Align); 2133 if (isUpdating) { 2134 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2135 assert(isa<ConstantSDNode>(Inc.getNode()) && 2136 "only constant post-increment update allowed for VST3/4"); 2137 (void)Inc; 2138 Ops.push_back(Reg0); 2139 } 2140 Ops.push_back(RegSeq); 2141 Ops.push_back(Pred); 2142 Ops.push_back(Reg0); 2143 Ops.push_back(Chain); 2144 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2145 Ops); 2146 cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1); 2147 ReplaceNode(N, VStB); 2148 } 2149 2150 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2151 unsigned NumVecs, 2152 const uint16_t *DOpcodes, 2153 const uint16_t *QOpcodes) { 2154 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2155 SDLoc dl(N); 2156 2157 SDValue MemAddr, Align; 2158 unsigned AddrOpIdx = isUpdating ? 1 : 2; 2159 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2160 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2161 return; 2162 2163 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2164 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2165 2166 SDValue Chain = N->getOperand(0); 2167 unsigned Lane = 2168 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2169 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2170 bool is64BitVector = VT.is64BitVector(); 2171 2172 unsigned Alignment = 0; 2173 if (NumVecs != 3) { 2174 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2175 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2176 if (Alignment > NumBytes) 2177 Alignment = NumBytes; 2178 if (Alignment < 8 && Alignment < NumBytes) 2179 Alignment = 0; 2180 // Alignment must be a power of two; make sure of that. 2181 Alignment = (Alignment & -Alignment); 2182 if (Alignment == 1) 2183 Alignment = 0; 2184 } 2185 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2186 2187 unsigned OpcodeIndex; 2188 switch (VT.getSimpleVT().SimpleTy) { 2189 default: llvm_unreachable("unhandled vld/vst lane type"); 2190 // Double-register operations: 2191 case MVT::v8i8: OpcodeIndex = 0; break; 2192 case MVT::v4i16: OpcodeIndex = 1; break; 2193 case MVT::v2f32: 2194 case MVT::v2i32: OpcodeIndex = 2; break; 2195 // Quad-register operations: 2196 case MVT::v8i16: OpcodeIndex = 0; break; 2197 case MVT::v4f32: 2198 case MVT::v4i32: OpcodeIndex = 1; break; 2199 } 2200 2201 std::vector<EVT> ResTys; 2202 if (IsLoad) { 2203 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2204 if (!is64BitVector) 2205 ResTyElts *= 2; 2206 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2207 MVT::i64, ResTyElts)); 2208 } 2209 if (isUpdating) 2210 ResTys.push_back(MVT::i32); 2211 ResTys.push_back(MVT::Other); 2212 2213 SDValue Pred = getAL(CurDAG, dl); 2214 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2215 2216 SmallVector<SDValue, 8> Ops; 2217 Ops.push_back(MemAddr); 2218 Ops.push_back(Align); 2219 if (isUpdating) { 2220 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2221 bool IsImmUpdate = 2222 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2223 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2224 } 2225 2226 SDValue SuperReg; 2227 SDValue V0 = N->getOperand(Vec0Idx + 0); 2228 SDValue V1 = N->getOperand(Vec0Idx + 1); 2229 if (NumVecs == 2) { 2230 if (is64BitVector) 2231 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2232 else 2233 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2234 } else { 2235 SDValue V2 = N->getOperand(Vec0Idx + 2); 2236 SDValue V3 = (NumVecs == 3) 2237 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2238 : N->getOperand(Vec0Idx + 3); 2239 if (is64BitVector) 2240 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2241 else 2242 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2243 } 2244 Ops.push_back(SuperReg); 2245 Ops.push_back(getI32Imm(Lane, dl)); 2246 Ops.push_back(Pred); 2247 Ops.push_back(Reg0); 2248 Ops.push_back(Chain); 2249 2250 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2251 QOpcodes[OpcodeIndex]); 2252 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2253 cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1); 2254 if (!IsLoad) { 2255 ReplaceNode(N, VLdLn); 2256 return; 2257 } 2258 2259 // Extract the subregisters. 2260 SuperReg = SDValue(VLdLn, 0); 2261 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2262 ARM::qsub_3 == ARM::qsub_0 + 3, 2263 "Unexpected subreg numbering"); 2264 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2265 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2266 ReplaceUses(SDValue(N, Vec), 2267 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2268 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2269 if (isUpdating) 2270 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2271 CurDAG->RemoveDeadNode(N); 2272 } 2273 2274 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, 2275 const uint16_t *DOpcodes, 2276 const uint16_t *QOpcodes) { 2277 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2278 SDLoc dl(N); 2279 2280 SDValue MemAddr, Align; 2281 if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) 2282 return; 2283 2284 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2285 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2286 2287 SDValue Chain = N->getOperand(0); 2288 EVT VT = N->getValueType(0); 2289 2290 unsigned Alignment = 0; 2291 if (NumVecs != 3) { 2292 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2293 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2294 if (Alignment > NumBytes) 2295 Alignment = NumBytes; 2296 if (Alignment < 8 && Alignment < NumBytes) 2297 Alignment = 0; 2298 // Alignment must be a power of two; make sure of that. 2299 Alignment = (Alignment & -Alignment); 2300 if (Alignment == 1) 2301 Alignment = 0; 2302 } 2303 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2304 2305 unsigned Opc; 2306 switch (VT.getSimpleVT().SimpleTy) { 2307 default: llvm_unreachable("unhandled vld-dup type"); 2308 case MVT::v8i8: Opc = DOpcodes[0]; break; 2309 case MVT::v16i8: Opc = QOpcodes[0]; break; 2310 case MVT::v4i16: Opc = DOpcodes[1]; break; 2311 case MVT::v8i16: Opc = QOpcodes[1]; break; 2312 case MVT::v2f32: 2313 case MVT::v2i32: Opc = DOpcodes[2]; break; 2314 case MVT::v4f32: 2315 case MVT::v4i32: Opc = QOpcodes[2]; break; 2316 } 2317 2318 SDValue Pred = getAL(CurDAG, dl); 2319 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2320 SmallVector<SDValue, 6> Ops; 2321 Ops.push_back(MemAddr); 2322 Ops.push_back(Align); 2323 if (isUpdating) { 2324 // fixed-stride update instructions don't have an explicit writeback 2325 // operand. It's implicit in the opcode itself. 2326 SDValue Inc = N->getOperand(2); 2327 bool IsImmUpdate = 2328 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2329 if (NumVecs <= 2 && !IsImmUpdate) 2330 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2331 if (!IsImmUpdate) 2332 Ops.push_back(Inc); 2333 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2334 else if (NumVecs > 2) 2335 Ops.push_back(Reg0); 2336 } 2337 Ops.push_back(Pred); 2338 Ops.push_back(Reg0); 2339 Ops.push_back(Chain); 2340 2341 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2342 std::vector<EVT> ResTys; 2343 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts)); 2344 if (isUpdating) 2345 ResTys.push_back(MVT::i32); 2346 ResTys.push_back(MVT::Other); 2347 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2348 cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); 2349 2350 // Extract the subregisters. 2351 if (NumVecs == 1) { 2352 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 2353 } else { 2354 SDValue SuperReg = SDValue(VLdDup, 0); 2355 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 2356 unsigned SubIdx = ARM::dsub_0; 2357 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2358 ReplaceUses(SDValue(N, Vec), 2359 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 2360 } 2361 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 2362 if (isUpdating) 2363 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 2364 CurDAG->RemoveDeadNode(N); 2365 } 2366 2367 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 2368 if (!Subtarget->hasV6T2Ops()) 2369 return false; 2370 2371 unsigned Opc = isSigned 2372 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 2373 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 2374 SDLoc dl(N); 2375 2376 // For unsigned extracts, check for a shift right and mask 2377 unsigned And_imm = 0; 2378 if (N->getOpcode() == ISD::AND) { 2379 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 2380 2381 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2382 if (And_imm & (And_imm + 1)) 2383 return false; 2384 2385 unsigned Srl_imm = 0; 2386 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 2387 Srl_imm)) { 2388 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2389 2390 // Note: The width operand is encoded as width-1. 2391 unsigned Width = countTrailingOnes(And_imm) - 1; 2392 unsigned LSB = Srl_imm; 2393 2394 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2395 2396 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 2397 // It's cheaper to use a right shift to extract the top bits. 2398 if (Subtarget->isThumb()) { 2399 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 2400 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2401 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2402 getAL(CurDAG, dl), Reg0, Reg0 }; 2403 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2404 return true; 2405 } 2406 2407 // ARM models shift instructions as MOVsi with shifter operand. 2408 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 2409 SDValue ShOpc = 2410 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 2411 MVT::i32); 2412 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 2413 getAL(CurDAG, dl), Reg0, Reg0 }; 2414 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 2415 return true; 2416 } 2417 2418 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2419 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2420 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2421 getAL(CurDAG, dl), Reg0 }; 2422 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2423 return true; 2424 } 2425 } 2426 return false; 2427 } 2428 2429 // Otherwise, we're looking for a shift of a shift 2430 unsigned Shl_imm = 0; 2431 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 2432 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 2433 unsigned Srl_imm = 0; 2434 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 2435 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2436 // Note: The width operand is encoded as width-1. 2437 unsigned Width = 32 - Srl_imm - 1; 2438 int LSB = Srl_imm - Shl_imm; 2439 if (LSB < 0) 2440 return false; 2441 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2442 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2443 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2444 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2445 getAL(CurDAG, dl), Reg0 }; 2446 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2447 return true; 2448 } 2449 } 2450 2451 // Or we are looking for a shift of an and, with a mask operand 2452 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 2453 isShiftedMask_32(And_imm)) { 2454 unsigned Srl_imm = 0; 2455 unsigned LSB = countTrailingZeros(And_imm); 2456 // Shift must be the same as the ands lsb 2457 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 2458 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2459 unsigned MSB = 31 - countLeadingZeros(And_imm); 2460 // Note: The width operand is encoded as width-1. 2461 unsigned Width = MSB - LSB; 2462 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2463 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2464 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 2465 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2466 getAL(CurDAG, dl), Reg0 }; 2467 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2468 return true; 2469 } 2470 } 2471 2472 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 2473 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2474 unsigned LSB = 0; 2475 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 2476 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 2477 return false; 2478 2479 if (LSB + Width > 32) 2480 return false; 2481 2482 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2483 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2484 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2485 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 2486 getAL(CurDAG, dl), Reg0 }; 2487 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2488 return true; 2489 } 2490 2491 return false; 2492 } 2493 2494 /// Target-specific DAG combining for ISD::XOR. 2495 /// Target-independent combining lowers SELECT_CC nodes of the form 2496 /// select_cc setg[ge] X, 0, X, -X 2497 /// select_cc setgt X, -1, X, -X 2498 /// select_cc setl[te] X, 0, -X, X 2499 /// select_cc setlt X, 1, -X, X 2500 /// which represent Integer ABS into: 2501 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 2502 /// ARM instruction selection detects the latter and matches it to 2503 /// ARM::ABS or ARM::t2ABS machine node. 2504 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 2505 SDValue XORSrc0 = N->getOperand(0); 2506 SDValue XORSrc1 = N->getOperand(1); 2507 EVT VT = N->getValueType(0); 2508 2509 if (Subtarget->isThumb1Only()) 2510 return false; 2511 2512 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 2513 return false; 2514 2515 SDValue ADDSrc0 = XORSrc0.getOperand(0); 2516 SDValue ADDSrc1 = XORSrc0.getOperand(1); 2517 SDValue SRASrc0 = XORSrc1.getOperand(0); 2518 SDValue SRASrc1 = XORSrc1.getOperand(1); 2519 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 2520 EVT XType = SRASrc0.getValueType(); 2521 unsigned Size = XType.getSizeInBits() - 1; 2522 2523 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 2524 XType.isInteger() && SRAConstant != nullptr && 2525 Size == SRAConstant->getZExtValue()) { 2526 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 2527 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 2528 return true; 2529 } 2530 2531 return false; 2532 } 2533 2534 /// We've got special pseudo-instructions for these 2535 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 2536 unsigned Opcode; 2537 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 2538 if (MemTy == MVT::i8) 2539 Opcode = ARM::CMP_SWAP_8; 2540 else if (MemTy == MVT::i16) 2541 Opcode = ARM::CMP_SWAP_16; 2542 else if (MemTy == MVT::i32) 2543 Opcode = ARM::CMP_SWAP_32; 2544 else 2545 llvm_unreachable("Unknown AtomicCmpSwap type"); 2546 2547 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 2548 N->getOperand(0)}; 2549 SDNode *CmpSwap = CurDAG->getMachineNode( 2550 Opcode, SDLoc(N), 2551 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 2552 2553 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2554 MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); 2555 cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1); 2556 2557 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 2558 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 2559 CurDAG->RemoveDeadNode(N); 2560 } 2561 2562 static Optional<std::pair<unsigned, unsigned>> 2563 getContiguousRangeOfSetBits(const APInt &A) { 2564 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 2565 unsigned LastOne = A.countTrailingZeros(); 2566 if (A.countPopulation() != (FirstOne - LastOne + 1)) 2567 return Optional<std::pair<unsigned,unsigned>>(); 2568 return std::make_pair(FirstOne, LastOne); 2569 } 2570 2571 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 2572 assert(N->getOpcode() == ARMISD::CMPZ); 2573 SwitchEQNEToPLMI = false; 2574 2575 if (!Subtarget->isThumb()) 2576 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 2577 // LSR don't exist as standalone instructions - they need the barrel shifter. 2578 return; 2579 2580 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 2581 SDValue And = N->getOperand(0); 2582 if (!And->hasOneUse()) 2583 return; 2584 2585 SDValue Zero = N->getOperand(1); 2586 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 2587 And->getOpcode() != ISD::AND) 2588 return; 2589 SDValue X = And.getOperand(0); 2590 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 2591 2592 if (!C || !X->hasOneUse()) 2593 return; 2594 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 2595 if (!Range) 2596 return; 2597 2598 // There are several ways to lower this: 2599 SDNode *NewN; 2600 SDLoc dl(N); 2601 2602 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 2603 if (Subtarget->isThumb2()) { 2604 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 2605 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 2606 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2607 CurDAG->getRegister(0, MVT::i32) }; 2608 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 2609 } else { 2610 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 2611 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 2612 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 2613 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 2614 } 2615 }; 2616 2617 if (Range->second == 0) { 2618 // 1. Mask includes the LSB -> Simply shift the top N bits off 2619 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2620 ReplaceNode(And.getNode(), NewN); 2621 } else if (Range->first == 31) { 2622 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 2623 NewN = EmitShift(ARM::tLSRri, X, Range->second); 2624 ReplaceNode(And.getNode(), NewN); 2625 } else if (Range->first == Range->second) { 2626 // 3. Only one bit is set. We can shift this into the sign bit and use a 2627 // PL/MI comparison. 2628 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2629 ReplaceNode(And.getNode(), NewN); 2630 2631 SwitchEQNEToPLMI = true; 2632 } else if (!Subtarget->hasV6T2Ops()) { 2633 // 4. Do a double shift to clear bottom and top bits, but only in 2634 // thumb-1 mode as in thumb-2 we can use UBFX. 2635 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2636 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 2637 Range->second + (31 - Range->first)); 2638 ReplaceNode(And.getNode(), NewN); 2639 } 2640 2641 } 2642 2643 void ARMDAGToDAGISel::Select(SDNode *N) { 2644 SDLoc dl(N); 2645 2646 if (N->isMachineOpcode()) { 2647 N->setNodeId(-1); 2648 return; // Already selected. 2649 } 2650 2651 switch (N->getOpcode()) { 2652 default: break; 2653 case ISD::WRITE_REGISTER: 2654 if (tryWriteRegister(N)) 2655 return; 2656 break; 2657 case ISD::READ_REGISTER: 2658 if (tryReadRegister(N)) 2659 return; 2660 break; 2661 case ISD::INLINEASM: 2662 if (tryInlineAsm(N)) 2663 return; 2664 break; 2665 case ISD::XOR: 2666 // Select special operations if XOR node forms integer ABS pattern 2667 if (tryABSOp(N)) 2668 return; 2669 // Other cases are autogenerated. 2670 break; 2671 case ISD::Constant: { 2672 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 2673 // If we can't materialize the constant we need to use a literal pool 2674 if (ConstantMaterializationCost(Val) > 2) { 2675 SDValue CPIdx = CurDAG->getTargetConstantPool( 2676 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 2677 TLI->getPointerTy(CurDAG->getDataLayout())); 2678 2679 SDNode *ResNode; 2680 if (Subtarget->isThumb()) { 2681 SDValue Pred = getAL(CurDAG, dl); 2682 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2683 SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; 2684 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 2685 Ops); 2686 } else { 2687 SDValue Ops[] = { 2688 CPIdx, 2689 CurDAG->getTargetConstant(0, dl, MVT::i32), 2690 getAL(CurDAG, dl), 2691 CurDAG->getRegister(0, MVT::i32), 2692 CurDAG->getEntryNode() 2693 }; 2694 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 2695 Ops); 2696 } 2697 ReplaceNode(N, ResNode); 2698 return; 2699 } 2700 2701 // Other cases are autogenerated. 2702 break; 2703 } 2704 case ISD::FrameIndex: { 2705 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 2706 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 2707 SDValue TFI = CurDAG->getTargetFrameIndex( 2708 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 2709 if (Subtarget->isThumb1Only()) { 2710 // Set the alignment of the frame object to 4, to avoid having to generate 2711 // more than one ADD 2712 MachineFrameInfo &MFI = MF->getFrameInfo(); 2713 if (MFI.getObjectAlignment(FI) < 4) 2714 MFI.setObjectAlignment(FI, 4); 2715 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 2716 CurDAG->getTargetConstant(0, dl, MVT::i32)); 2717 return; 2718 } else { 2719 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 2720 ARM::t2ADDri : ARM::ADDri); 2721 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 2722 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2723 CurDAG->getRegister(0, MVT::i32) }; 2724 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2725 return; 2726 } 2727 } 2728 case ISD::SRL: 2729 if (tryV6T2BitfieldExtractOp(N, false)) 2730 return; 2731 break; 2732 case ISD::SIGN_EXTEND_INREG: 2733 case ISD::SRA: 2734 if (tryV6T2BitfieldExtractOp(N, true)) 2735 return; 2736 break; 2737 case ISD::MUL: 2738 if (Subtarget->isThumb1Only()) 2739 break; 2740 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 2741 unsigned RHSV = C->getZExtValue(); 2742 if (!RHSV) break; 2743 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 2744 unsigned ShImm = Log2_32(RHSV-1); 2745 if (ShImm >= 32) 2746 break; 2747 SDValue V = N->getOperand(0); 2748 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2749 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2750 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2751 if (Subtarget->isThumb()) { 2752 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2753 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 2754 return; 2755 } else { 2756 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2757 Reg0 }; 2758 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 2759 return; 2760 } 2761 } 2762 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 2763 unsigned ShImm = Log2_32(RHSV+1); 2764 if (ShImm >= 32) 2765 break; 2766 SDValue V = N->getOperand(0); 2767 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2768 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2769 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2770 if (Subtarget->isThumb()) { 2771 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2772 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 2773 return; 2774 } else { 2775 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2776 Reg0 }; 2777 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 2778 return; 2779 } 2780 } 2781 } 2782 break; 2783 case ISD::AND: { 2784 // Check for unsigned bitfield extract 2785 if (tryV6T2BitfieldExtractOp(N, false)) 2786 return; 2787 2788 // If an immediate is used in an AND node, it is possible that the immediate 2789 // can be more optimally materialized when negated. If this is the case we 2790 // can negate the immediate and use a BIC instead. 2791 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2792 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 2793 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 2794 2795 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 2796 // immediate can be negated and fit in the immediate operand of 2797 // a t2BIC, don't do any manual transform here as this can be 2798 // handled by the generic ISel machinery. 2799 bool PreferImmediateEncoding = 2800 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 2801 if (!PreferImmediateEncoding && 2802 ConstantMaterializationCost(Imm) > 2803 ConstantMaterializationCost(~Imm)) { 2804 // The current immediate costs more to materialize than a negated 2805 // immediate, so negate the immediate and use a BIC. 2806 SDValue NewImm = 2807 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 2808 // If the new constant didn't exist before, reposition it in the topological 2809 // ordering so it is just before N. Otherwise, don't touch its location. 2810 if (NewImm->getNodeId() == -1) 2811 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 2812 2813 if (!Subtarget->hasThumb2()) { 2814 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 2815 N->getOperand(0), NewImm, getAL(CurDAG, dl), 2816 CurDAG->getRegister(0, MVT::i32)}; 2817 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 2818 return; 2819 } else { 2820 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 2821 CurDAG->getRegister(0, MVT::i32), 2822 CurDAG->getRegister(0, MVT::i32)}; 2823 ReplaceNode(N, 2824 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 2825 return; 2826 } 2827 } 2828 } 2829 2830 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 2831 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 2832 // are entirely contributed by c2 and lower 16-bits are entirely contributed 2833 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 2834 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 2835 EVT VT = N->getValueType(0); 2836 if (VT != MVT::i32) 2837 break; 2838 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 2839 ? ARM::t2MOVTi16 2840 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 2841 if (!Opc) 2842 break; 2843 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2844 N1C = dyn_cast<ConstantSDNode>(N1); 2845 if (!N1C) 2846 break; 2847 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 2848 SDValue N2 = N0.getOperand(1); 2849 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 2850 if (!N2C) 2851 break; 2852 unsigned N1CVal = N1C->getZExtValue(); 2853 unsigned N2CVal = N2C->getZExtValue(); 2854 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 2855 (N1CVal & 0xffffU) == 0xffffU && 2856 (N2CVal & 0xffffU) == 0x0U) { 2857 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 2858 dl, MVT::i32); 2859 SDValue Ops[] = { N0.getOperand(0), Imm16, 2860 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 2861 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 2862 return; 2863 } 2864 } 2865 2866 break; 2867 } 2868 case ARMISD::UMAAL: { 2869 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 2870 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2871 N->getOperand(2), N->getOperand(3), 2872 getAL(CurDAG, dl), 2873 CurDAG->getRegister(0, MVT::i32) }; 2874 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 2875 return; 2876 } 2877 case ARMISD::UMLAL:{ 2878 if (Subtarget->isThumb()) { 2879 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2880 N->getOperand(3), getAL(CurDAG, dl), 2881 CurDAG->getRegister(0, MVT::i32)}; 2882 ReplaceNode( 2883 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 2884 return; 2885 }else{ 2886 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2887 N->getOperand(3), getAL(CurDAG, dl), 2888 CurDAG->getRegister(0, MVT::i32), 2889 CurDAG->getRegister(0, MVT::i32) }; 2890 ReplaceNode(N, CurDAG->getMachineNode( 2891 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 2892 MVT::i32, MVT::i32, Ops)); 2893 return; 2894 } 2895 } 2896 case ARMISD::SMLAL:{ 2897 if (Subtarget->isThumb()) { 2898 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2899 N->getOperand(3), getAL(CurDAG, dl), 2900 CurDAG->getRegister(0, MVT::i32)}; 2901 ReplaceNode( 2902 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 2903 return; 2904 }else{ 2905 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2906 N->getOperand(3), getAL(CurDAG, dl), 2907 CurDAG->getRegister(0, MVT::i32), 2908 CurDAG->getRegister(0, MVT::i32) }; 2909 ReplaceNode(N, CurDAG->getMachineNode( 2910 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 2911 MVT::i32, MVT::i32, Ops)); 2912 return; 2913 } 2914 } 2915 case ARMISD::SUBE: { 2916 if (!Subtarget->hasV6Ops()) 2917 break; 2918 // Look for a pattern to match SMMLS 2919 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 2920 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 2921 N->getOperand(2).getOpcode() != ARMISD::SUBC || 2922 !SDValue(N, 1).use_empty()) 2923 break; 2924 2925 if (Subtarget->isThumb()) 2926 assert(Subtarget->hasThumb2() && 2927 "This pattern should not be generated for Thumb"); 2928 2929 SDValue SmulLoHi = N->getOperand(1); 2930 SDValue Subc = N->getOperand(2); 2931 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 2932 2933 if (!Zero || Zero->getZExtValue() != 0 || 2934 Subc.getOperand(1) != SmulLoHi.getValue(0) || 2935 N->getOperand(1) != SmulLoHi.getValue(1) || 2936 N->getOperand(2) != Subc.getValue(1)) 2937 break; 2938 2939 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 2940 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 2941 N->getOperand(0), getAL(CurDAG, dl), 2942 CurDAG->getRegister(0, MVT::i32) }; 2943 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 2944 return; 2945 } 2946 case ISD::LOAD: { 2947 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 2948 if (tryT2IndexedLoad(N)) 2949 return; 2950 } else if (Subtarget->isThumb()) { 2951 if (tryT1IndexedLoad(N)) 2952 return; 2953 } else if (tryARMIndexedLoad(N)) 2954 return; 2955 // Other cases are autogenerated. 2956 break; 2957 } 2958 case ARMISD::BRCOND: { 2959 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 2960 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 2961 // Pattern complexity = 6 cost = 1 size = 0 2962 2963 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 2964 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 2965 // Pattern complexity = 6 cost = 1 size = 0 2966 2967 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 2968 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 2969 // Pattern complexity = 6 cost = 1 size = 0 2970 2971 unsigned Opc = Subtarget->isThumb() ? 2972 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 2973 SDValue Chain = N->getOperand(0); 2974 SDValue N1 = N->getOperand(1); 2975 SDValue N2 = N->getOperand(2); 2976 SDValue N3 = N->getOperand(3); 2977 SDValue InFlag = N->getOperand(4); 2978 assert(N1.getOpcode() == ISD::BasicBlock); 2979 assert(N2.getOpcode() == ISD::Constant); 2980 assert(N3.getOpcode() == ISD::Register); 2981 2982 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 2983 2984 if (InFlag.getOpcode() == ARMISD::CMPZ) { 2985 bool SwitchEQNEToPLMI; 2986 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 2987 InFlag = N->getOperand(4); 2988 2989 if (SwitchEQNEToPLMI) { 2990 switch ((ARMCC::CondCodes)CC) { 2991 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 2992 case ARMCC::NE: 2993 CC = (unsigned)ARMCC::MI; 2994 break; 2995 case ARMCC::EQ: 2996 CC = (unsigned)ARMCC::PL; 2997 break; 2998 } 2999 } 3000 } 3001 3002 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 3003 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 3004 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 3005 MVT::Glue, Ops); 3006 Chain = SDValue(ResNode, 0); 3007 if (N->getNumValues() == 2) { 3008 InFlag = SDValue(ResNode, 1); 3009 ReplaceUses(SDValue(N, 1), InFlag); 3010 } 3011 ReplaceUses(SDValue(N, 0), 3012 SDValue(Chain.getNode(), Chain.getResNo())); 3013 CurDAG->RemoveDeadNode(N); 3014 return; 3015 } 3016 3017 case ARMISD::CMPZ: { 3018 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 3019 // This allows us to avoid materializing the expensive negative constant. 3020 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 3021 // for its glue output. 3022 SDValue X = N->getOperand(0); 3023 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 3024 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 3025 int64_t Addend = -C->getSExtValue(); 3026 3027 SDNode *Add = nullptr; 3028 // ADDS can be better than CMN if the immediate fits in a 3029 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 3030 // Outside that range we can just use a CMN which is 32-bit but has a 3031 // 12-bit immediate range. 3032 if (Addend < 1<<8) { 3033 if (Subtarget->isThumb2()) { 3034 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3035 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3036 CurDAG->getRegister(0, MVT::i32) }; 3037 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 3038 } else { 3039 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 3040 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 3041 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3042 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3043 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3044 } 3045 } 3046 if (Add) { 3047 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 3048 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 3049 } 3050 } 3051 // Other cases are autogenerated. 3052 break; 3053 } 3054 3055 case ARMISD::CMOV: { 3056 SDValue InFlag = N->getOperand(4); 3057 3058 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3059 bool SwitchEQNEToPLMI; 3060 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3061 3062 if (SwitchEQNEToPLMI) { 3063 SDValue ARMcc = N->getOperand(2); 3064 ARMCC::CondCodes CC = 3065 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 3066 3067 switch (CC) { 3068 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3069 case ARMCC::NE: 3070 CC = ARMCC::MI; 3071 break; 3072 case ARMCC::EQ: 3073 CC = ARMCC::PL; 3074 break; 3075 } 3076 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 3077 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 3078 N->getOperand(3), N->getOperand(4)}; 3079 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 3080 } 3081 3082 } 3083 // Other cases are autogenerated. 3084 break; 3085 } 3086 3087 case ARMISD::VZIP: { 3088 unsigned Opc = 0; 3089 EVT VT = N->getValueType(0); 3090 switch (VT.getSimpleVT().SimpleTy) { 3091 default: return; 3092 case MVT::v8i8: Opc = ARM::VZIPd8; break; 3093 case MVT::v4i16: Opc = ARM::VZIPd16; break; 3094 case MVT::v2f32: 3095 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3096 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3097 case MVT::v16i8: Opc = ARM::VZIPq8; break; 3098 case MVT::v8i16: Opc = ARM::VZIPq16; break; 3099 case MVT::v4f32: 3100 case MVT::v4i32: Opc = ARM::VZIPq32; break; 3101 } 3102 SDValue Pred = getAL(CurDAG, dl); 3103 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3104 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3105 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3106 return; 3107 } 3108 case ARMISD::VUZP: { 3109 unsigned Opc = 0; 3110 EVT VT = N->getValueType(0); 3111 switch (VT.getSimpleVT().SimpleTy) { 3112 default: return; 3113 case MVT::v8i8: Opc = ARM::VUZPd8; break; 3114 case MVT::v4i16: Opc = ARM::VUZPd16; break; 3115 case MVT::v2f32: 3116 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3117 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3118 case MVT::v16i8: Opc = ARM::VUZPq8; break; 3119 case MVT::v8i16: Opc = ARM::VUZPq16; break; 3120 case MVT::v4f32: 3121 case MVT::v4i32: Opc = ARM::VUZPq32; break; 3122 } 3123 SDValue Pred = getAL(CurDAG, dl); 3124 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3125 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3126 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3127 return; 3128 } 3129 case ARMISD::VTRN: { 3130 unsigned Opc = 0; 3131 EVT VT = N->getValueType(0); 3132 switch (VT.getSimpleVT().SimpleTy) { 3133 default: return; 3134 case MVT::v8i8: Opc = ARM::VTRNd8; break; 3135 case MVT::v4i16: Opc = ARM::VTRNd16; break; 3136 case MVT::v2f32: 3137 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3138 case MVT::v16i8: Opc = ARM::VTRNq8; break; 3139 case MVT::v8i16: Opc = ARM::VTRNq16; break; 3140 case MVT::v4f32: 3141 case MVT::v4i32: Opc = ARM::VTRNq32; break; 3142 } 3143 SDValue Pred = getAL(CurDAG, dl); 3144 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3145 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3146 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3147 return; 3148 } 3149 case ARMISD::BUILD_VECTOR: { 3150 EVT VecVT = N->getValueType(0); 3151 EVT EltVT = VecVT.getVectorElementType(); 3152 unsigned NumElts = VecVT.getVectorNumElements(); 3153 if (EltVT == MVT::f64) { 3154 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 3155 ReplaceNode( 3156 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3157 return; 3158 } 3159 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 3160 if (NumElts == 2) { 3161 ReplaceNode( 3162 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3163 return; 3164 } 3165 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 3166 ReplaceNode(N, 3167 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 3168 N->getOperand(2), N->getOperand(3))); 3169 return; 3170 } 3171 3172 case ARMISD::VLD1DUP: { 3173 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 3174 ARM::VLD1DUPd32 }; 3175 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 3176 ARM::VLD1DUPq32 }; 3177 SelectVLDDup(N, false, 1, DOpcodes, QOpcodes); 3178 return; 3179 } 3180 3181 case ARMISD::VLD2DUP: { 3182 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 3183 ARM::VLD2DUPd32 }; 3184 SelectVLDDup(N, false, 2, Opcodes); 3185 return; 3186 } 3187 3188 case ARMISD::VLD3DUP: { 3189 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 3190 ARM::VLD3DUPd16Pseudo, 3191 ARM::VLD3DUPd32Pseudo }; 3192 SelectVLDDup(N, false, 3, Opcodes); 3193 return; 3194 } 3195 3196 case ARMISD::VLD4DUP: { 3197 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 3198 ARM::VLD4DUPd16Pseudo, 3199 ARM::VLD4DUPd32Pseudo }; 3200 SelectVLDDup(N, false, 4, Opcodes); 3201 return; 3202 } 3203 3204 case ARMISD::VLD1DUP_UPD: { 3205 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 3206 ARM::VLD1DUPd16wb_fixed, 3207 ARM::VLD1DUPd32wb_fixed }; 3208 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 3209 ARM::VLD1DUPq16wb_fixed, 3210 ARM::VLD1DUPq32wb_fixed }; 3211 SelectVLDDup(N, true, 1, DOpcodes, QOpcodes); 3212 return; 3213 } 3214 3215 case ARMISD::VLD2DUP_UPD: { 3216 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 3217 ARM::VLD2DUPd16wb_fixed, 3218 ARM::VLD2DUPd32wb_fixed }; 3219 SelectVLDDup(N, true, 2, Opcodes); 3220 return; 3221 } 3222 3223 case ARMISD::VLD3DUP_UPD: { 3224 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 3225 ARM::VLD3DUPd16Pseudo_UPD, 3226 ARM::VLD3DUPd32Pseudo_UPD }; 3227 SelectVLDDup(N, true, 3, Opcodes); 3228 return; 3229 } 3230 3231 case ARMISD::VLD4DUP_UPD: { 3232 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 3233 ARM::VLD4DUPd16Pseudo_UPD, 3234 ARM::VLD4DUPd32Pseudo_UPD }; 3235 SelectVLDDup(N, true, 4, Opcodes); 3236 return; 3237 } 3238 3239 case ARMISD::VLD1_UPD: { 3240 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 3241 ARM::VLD1d16wb_fixed, 3242 ARM::VLD1d32wb_fixed, 3243 ARM::VLD1d64wb_fixed }; 3244 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 3245 ARM::VLD1q16wb_fixed, 3246 ARM::VLD1q32wb_fixed, 3247 ARM::VLD1q64wb_fixed }; 3248 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 3249 return; 3250 } 3251 3252 case ARMISD::VLD2_UPD: { 3253 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed, 3254 ARM::VLD2d16wb_fixed, 3255 ARM::VLD2d32wb_fixed, 3256 ARM::VLD1q64wb_fixed}; 3257 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, 3258 ARM::VLD2q16PseudoWB_fixed, 3259 ARM::VLD2q32PseudoWB_fixed }; 3260 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 3261 return; 3262 } 3263 3264 case ARMISD::VLD3_UPD: { 3265 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 3266 ARM::VLD3d16Pseudo_UPD, 3267 ARM::VLD3d32Pseudo_UPD, 3268 ARM::VLD1d64TPseudoWB_fixed}; 3269 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3270 ARM::VLD3q16Pseudo_UPD, 3271 ARM::VLD3q32Pseudo_UPD }; 3272 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 3273 ARM::VLD3q16oddPseudo_UPD, 3274 ARM::VLD3q32oddPseudo_UPD }; 3275 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3276 return; 3277 } 3278 3279 case ARMISD::VLD4_UPD: { 3280 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, 3281 ARM::VLD4d16Pseudo_UPD, 3282 ARM::VLD4d32Pseudo_UPD, 3283 ARM::VLD1d64QPseudoWB_fixed}; 3284 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3285 ARM::VLD4q16Pseudo_UPD, 3286 ARM::VLD4q32Pseudo_UPD }; 3287 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, 3288 ARM::VLD4q16oddPseudo_UPD, 3289 ARM::VLD4q32oddPseudo_UPD }; 3290 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3291 return; 3292 } 3293 3294 case ARMISD::VLD2LN_UPD: { 3295 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 3296 ARM::VLD2LNd16Pseudo_UPD, 3297 ARM::VLD2LNd32Pseudo_UPD }; 3298 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 3299 ARM::VLD2LNq32Pseudo_UPD }; 3300 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 3301 return; 3302 } 3303 3304 case ARMISD::VLD3LN_UPD: { 3305 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 3306 ARM::VLD3LNd16Pseudo_UPD, 3307 ARM::VLD3LNd32Pseudo_UPD }; 3308 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 3309 ARM::VLD3LNq32Pseudo_UPD }; 3310 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 3311 return; 3312 } 3313 3314 case ARMISD::VLD4LN_UPD: { 3315 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 3316 ARM::VLD4LNd16Pseudo_UPD, 3317 ARM::VLD4LNd32Pseudo_UPD }; 3318 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 3319 ARM::VLD4LNq32Pseudo_UPD }; 3320 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 3321 return; 3322 } 3323 3324 case ARMISD::VST1_UPD: { 3325 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 3326 ARM::VST1d16wb_fixed, 3327 ARM::VST1d32wb_fixed, 3328 ARM::VST1d64wb_fixed }; 3329 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 3330 ARM::VST1q16wb_fixed, 3331 ARM::VST1q32wb_fixed, 3332 ARM::VST1q64wb_fixed }; 3333 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 3334 return; 3335 } 3336 3337 case ARMISD::VST2_UPD: { 3338 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed, 3339 ARM::VST2d16wb_fixed, 3340 ARM::VST2d32wb_fixed, 3341 ARM::VST1q64wb_fixed}; 3342 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, 3343 ARM::VST2q16PseudoWB_fixed, 3344 ARM::VST2q32PseudoWB_fixed }; 3345 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 3346 return; 3347 } 3348 3349 case ARMISD::VST3_UPD: { 3350 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 3351 ARM::VST3d16Pseudo_UPD, 3352 ARM::VST3d32Pseudo_UPD, 3353 ARM::VST1d64TPseudoWB_fixed}; 3354 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3355 ARM::VST3q16Pseudo_UPD, 3356 ARM::VST3q32Pseudo_UPD }; 3357 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 3358 ARM::VST3q16oddPseudo_UPD, 3359 ARM::VST3q32oddPseudo_UPD }; 3360 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3361 return; 3362 } 3363 3364 case ARMISD::VST4_UPD: { 3365 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD, 3366 ARM::VST4d16Pseudo_UPD, 3367 ARM::VST4d32Pseudo_UPD, 3368 ARM::VST1d64QPseudoWB_fixed}; 3369 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3370 ARM::VST4q16Pseudo_UPD, 3371 ARM::VST4q32Pseudo_UPD }; 3372 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, 3373 ARM::VST4q16oddPseudo_UPD, 3374 ARM::VST4q32oddPseudo_UPD }; 3375 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3376 return; 3377 } 3378 3379 case ARMISD::VST2LN_UPD: { 3380 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 3381 ARM::VST2LNd16Pseudo_UPD, 3382 ARM::VST2LNd32Pseudo_UPD }; 3383 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 3384 ARM::VST2LNq32Pseudo_UPD }; 3385 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 3386 return; 3387 } 3388 3389 case ARMISD::VST3LN_UPD: { 3390 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 3391 ARM::VST3LNd16Pseudo_UPD, 3392 ARM::VST3LNd32Pseudo_UPD }; 3393 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 3394 ARM::VST3LNq32Pseudo_UPD }; 3395 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 3396 return; 3397 } 3398 3399 case ARMISD::VST4LN_UPD: { 3400 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 3401 ARM::VST4LNd16Pseudo_UPD, 3402 ARM::VST4LNd32Pseudo_UPD }; 3403 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 3404 ARM::VST4LNq32Pseudo_UPD }; 3405 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 3406 return; 3407 } 3408 3409 case ISD::INTRINSIC_VOID: 3410 case ISD::INTRINSIC_W_CHAIN: { 3411 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 3412 switch (IntNo) { 3413 default: 3414 break; 3415 3416 case Intrinsic::arm_mrrc: 3417 case Intrinsic::arm_mrrc2: { 3418 SDLoc dl(N); 3419 SDValue Chain = N->getOperand(0); 3420 unsigned Opc; 3421 3422 if (Subtarget->isThumb()) 3423 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 3424 else 3425 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 3426 3427 SmallVector<SDValue, 5> Ops; 3428 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 3429 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 3430 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 3431 3432 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 3433 // instruction will always be '1111' but it is possible in assembly language to specify 3434 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 3435 if (Opc != ARM::MRRC2) { 3436 Ops.push_back(getAL(CurDAG, dl)); 3437 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3438 } 3439 3440 Ops.push_back(Chain); 3441 3442 // Writes to two registers. 3443 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 3444 3445 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 3446 return; 3447 } 3448 case Intrinsic::arm_ldaexd: 3449 case Intrinsic::arm_ldrexd: { 3450 SDLoc dl(N); 3451 SDValue Chain = N->getOperand(0); 3452 SDValue MemAddr = N->getOperand(2); 3453 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 3454 3455 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 3456 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 3457 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 3458 3459 // arm_ldrexd returns a i64 value in {i32, i32} 3460 std::vector<EVT> ResTys; 3461 if (isThumb) { 3462 ResTys.push_back(MVT::i32); 3463 ResTys.push_back(MVT::i32); 3464 } else 3465 ResTys.push_back(MVT::Untyped); 3466 ResTys.push_back(MVT::Other); 3467 3468 // Place arguments in the right order. 3469 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 3470 CurDAG->getRegister(0, MVT::i32), Chain}; 3471 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3472 // Transfer memoperands. 3473 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3474 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3475 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); 3476 3477 // Remap uses. 3478 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 3479 if (!SDValue(N, 0).use_empty()) { 3480 SDValue Result; 3481 if (isThumb) 3482 Result = SDValue(Ld, 0); 3483 else { 3484 SDValue SubRegIdx = 3485 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 3486 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3487 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3488 Result = SDValue(ResNode,0); 3489 } 3490 ReplaceUses(SDValue(N, 0), Result); 3491 } 3492 if (!SDValue(N, 1).use_empty()) { 3493 SDValue Result; 3494 if (isThumb) 3495 Result = SDValue(Ld, 1); 3496 else { 3497 SDValue SubRegIdx = 3498 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 3499 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3500 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3501 Result = SDValue(ResNode,0); 3502 } 3503 ReplaceUses(SDValue(N, 1), Result); 3504 } 3505 ReplaceUses(SDValue(N, 2), OutChain); 3506 CurDAG->RemoveDeadNode(N); 3507 return; 3508 } 3509 case Intrinsic::arm_stlexd: 3510 case Intrinsic::arm_strexd: { 3511 SDLoc dl(N); 3512 SDValue Chain = N->getOperand(0); 3513 SDValue Val0 = N->getOperand(2); 3514 SDValue Val1 = N->getOperand(3); 3515 SDValue MemAddr = N->getOperand(4); 3516 3517 // Store exclusive double return a i32 value which is the return status 3518 // of the issued store. 3519 const EVT ResTys[] = {MVT::i32, MVT::Other}; 3520 3521 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 3522 // Place arguments in the right order. 3523 SmallVector<SDValue, 7> Ops; 3524 if (isThumb) { 3525 Ops.push_back(Val0); 3526 Ops.push_back(Val1); 3527 } else 3528 // arm_strexd uses GPRPair. 3529 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 3530 Ops.push_back(MemAddr); 3531 Ops.push_back(getAL(CurDAG, dl)); 3532 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3533 Ops.push_back(Chain); 3534 3535 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 3536 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 3537 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 3538 3539 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3540 // Transfer memoperands. 3541 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3542 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3543 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 3544 3545 ReplaceNode(N, St); 3546 return; 3547 } 3548 3549 case Intrinsic::arm_neon_vld1: { 3550 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 3551 ARM::VLD1d32, ARM::VLD1d64 }; 3552 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 3553 ARM::VLD1q32, ARM::VLD1q64}; 3554 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 3555 return; 3556 } 3557 3558 case Intrinsic::arm_neon_vld2: { 3559 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 3560 ARM::VLD2d32, ARM::VLD1q64 }; 3561 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 3562 ARM::VLD2q32Pseudo }; 3563 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 3564 return; 3565 } 3566 3567 case Intrinsic::arm_neon_vld3: { 3568 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 3569 ARM::VLD3d16Pseudo, 3570 ARM::VLD3d32Pseudo, 3571 ARM::VLD1d64TPseudo }; 3572 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3573 ARM::VLD3q16Pseudo_UPD, 3574 ARM::VLD3q32Pseudo_UPD }; 3575 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 3576 ARM::VLD3q16oddPseudo, 3577 ARM::VLD3q32oddPseudo }; 3578 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3579 return; 3580 } 3581 3582 case Intrinsic::arm_neon_vld4: { 3583 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 3584 ARM::VLD4d16Pseudo, 3585 ARM::VLD4d32Pseudo, 3586 ARM::VLD1d64QPseudo }; 3587 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3588 ARM::VLD4q16Pseudo_UPD, 3589 ARM::VLD4q32Pseudo_UPD }; 3590 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 3591 ARM::VLD4q16oddPseudo, 3592 ARM::VLD4q32oddPseudo }; 3593 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3594 return; 3595 } 3596 3597 case Intrinsic::arm_neon_vld2lane: { 3598 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 3599 ARM::VLD2LNd16Pseudo, 3600 ARM::VLD2LNd32Pseudo }; 3601 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 3602 ARM::VLD2LNq32Pseudo }; 3603 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 3604 return; 3605 } 3606 3607 case Intrinsic::arm_neon_vld3lane: { 3608 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 3609 ARM::VLD3LNd16Pseudo, 3610 ARM::VLD3LNd32Pseudo }; 3611 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 3612 ARM::VLD3LNq32Pseudo }; 3613 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 3614 return; 3615 } 3616 3617 case Intrinsic::arm_neon_vld4lane: { 3618 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 3619 ARM::VLD4LNd16Pseudo, 3620 ARM::VLD4LNd32Pseudo }; 3621 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 3622 ARM::VLD4LNq32Pseudo }; 3623 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 3624 return; 3625 } 3626 3627 case Intrinsic::arm_neon_vst1: { 3628 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 3629 ARM::VST1d32, ARM::VST1d64 }; 3630 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 3631 ARM::VST1q32, ARM::VST1q64 }; 3632 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 3633 return; 3634 } 3635 3636 case Intrinsic::arm_neon_vst2: { 3637 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 3638 ARM::VST2d32, ARM::VST1q64 }; 3639 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 3640 ARM::VST2q32Pseudo }; 3641 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 3642 return; 3643 } 3644 3645 case Intrinsic::arm_neon_vst3: { 3646 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 3647 ARM::VST3d16Pseudo, 3648 ARM::VST3d32Pseudo, 3649 ARM::VST1d64TPseudo }; 3650 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3651 ARM::VST3q16Pseudo_UPD, 3652 ARM::VST3q32Pseudo_UPD }; 3653 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 3654 ARM::VST3q16oddPseudo, 3655 ARM::VST3q32oddPseudo }; 3656 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3657 return; 3658 } 3659 3660 case Intrinsic::arm_neon_vst4: { 3661 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 3662 ARM::VST4d16Pseudo, 3663 ARM::VST4d32Pseudo, 3664 ARM::VST1d64QPseudo }; 3665 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3666 ARM::VST4q16Pseudo_UPD, 3667 ARM::VST4q32Pseudo_UPD }; 3668 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 3669 ARM::VST4q16oddPseudo, 3670 ARM::VST4q32oddPseudo }; 3671 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3672 return; 3673 } 3674 3675 case Intrinsic::arm_neon_vst2lane: { 3676 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 3677 ARM::VST2LNd16Pseudo, 3678 ARM::VST2LNd32Pseudo }; 3679 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 3680 ARM::VST2LNq32Pseudo }; 3681 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 3682 return; 3683 } 3684 3685 case Intrinsic::arm_neon_vst3lane: { 3686 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 3687 ARM::VST3LNd16Pseudo, 3688 ARM::VST3LNd32Pseudo }; 3689 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 3690 ARM::VST3LNq32Pseudo }; 3691 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 3692 return; 3693 } 3694 3695 case Intrinsic::arm_neon_vst4lane: { 3696 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 3697 ARM::VST4LNd16Pseudo, 3698 ARM::VST4LNd32Pseudo }; 3699 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 3700 ARM::VST4LNq32Pseudo }; 3701 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 3702 return; 3703 } 3704 } 3705 break; 3706 } 3707 3708 case ISD::ATOMIC_CMP_SWAP: 3709 SelectCMP_SWAP(N); 3710 return; 3711 } 3712 3713 SelectCode(N); 3714 } 3715 3716 // Inspect a register string of the form 3717 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 3718 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 3719 // and obtain the integer operands from them, adding these operands to the 3720 // provided vector. 3721 static void getIntOperandsFromRegisterString(StringRef RegString, 3722 SelectionDAG *CurDAG, 3723 const SDLoc &DL, 3724 std::vector<SDValue> &Ops) { 3725 SmallVector<StringRef, 5> Fields; 3726 RegString.split(Fields, ':'); 3727 3728 if (Fields.size() > 1) { 3729 bool AllIntFields = true; 3730 3731 for (StringRef Field : Fields) { 3732 // Need to trim out leading 'cp' characters and get the integer field. 3733 unsigned IntField; 3734 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 3735 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 3736 } 3737 3738 assert(AllIntFields && 3739 "Unexpected non-integer value in special register string."); 3740 } 3741 } 3742 3743 // Maps a Banked Register string to its mask value. The mask value returned is 3744 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 3745 // mask operand, which expresses which register is to be used, e.g. r8, and in 3746 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 3747 // was invalid. 3748 static inline int getBankedRegisterMask(StringRef RegString) { 3749 return StringSwitch<int>(RegString.lower()) 3750 .Case("r8_usr", 0x00) 3751 .Case("r9_usr", 0x01) 3752 .Case("r10_usr", 0x02) 3753 .Case("r11_usr", 0x03) 3754 .Case("r12_usr", 0x04) 3755 .Case("sp_usr", 0x05) 3756 .Case("lr_usr", 0x06) 3757 .Case("r8_fiq", 0x08) 3758 .Case("r9_fiq", 0x09) 3759 .Case("r10_fiq", 0x0a) 3760 .Case("r11_fiq", 0x0b) 3761 .Case("r12_fiq", 0x0c) 3762 .Case("sp_fiq", 0x0d) 3763 .Case("lr_fiq", 0x0e) 3764 .Case("lr_irq", 0x10) 3765 .Case("sp_irq", 0x11) 3766 .Case("lr_svc", 0x12) 3767 .Case("sp_svc", 0x13) 3768 .Case("lr_abt", 0x14) 3769 .Case("sp_abt", 0x15) 3770 .Case("lr_und", 0x16) 3771 .Case("sp_und", 0x17) 3772 .Case("lr_mon", 0x1c) 3773 .Case("sp_mon", 0x1d) 3774 .Case("elr_hyp", 0x1e) 3775 .Case("sp_hyp", 0x1f) 3776 .Case("spsr_fiq", 0x2e) 3777 .Case("spsr_irq", 0x30) 3778 .Case("spsr_svc", 0x32) 3779 .Case("spsr_abt", 0x34) 3780 .Case("spsr_und", 0x36) 3781 .Case("spsr_mon", 0x3c) 3782 .Case("spsr_hyp", 0x3e) 3783 .Default(-1); 3784 } 3785 3786 // Maps a MClass special register string to its value for use in the 3787 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand. 3788 // Returns -1 to signify that the string was invalid. 3789 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) { 3790 return StringSwitch<int>(RegString.lower()) 3791 .Case("apsr", 0x0) 3792 .Case("iapsr", 0x1) 3793 .Case("eapsr", 0x2) 3794 .Case("xpsr", 0x3) 3795 .Case("ipsr", 0x5) 3796 .Case("epsr", 0x6) 3797 .Case("iepsr", 0x7) 3798 .Case("msp", 0x8) 3799 .Case("psp", 0x9) 3800 .Case("primask", 0x10) 3801 .Case("basepri", 0x11) 3802 .Case("basepri_max", 0x12) 3803 .Case("faultmask", 0x13) 3804 .Case("control", 0x14) 3805 .Case("msplim", 0x0a) 3806 .Case("psplim", 0x0b) 3807 .Case("sp", 0x18) 3808 .Default(-1); 3809 } 3810 3811 // The flags here are common to those allowed for apsr in the A class cores and 3812 // those allowed for the special registers in the M class cores. Returns a 3813 // value representing which flags were present, -1 if invalid. 3814 static inline int getMClassFlagsMask(StringRef Flags) { 3815 return StringSwitch<int>(Flags) 3816 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 3817 // correct when flags are not permitted 3818 .Case("g", 0x1) 3819 .Case("nzcvq", 0x2) 3820 .Case("nzcvqg", 0x3) 3821 .Default(-1); 3822 } 3823 3824 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, 3825 const ARMSubtarget *Subtarget) { 3826 // Ensure that the register (without flags) was a valid M Class special 3827 // register. 3828 int SYSmvalue = getMClassRegisterSYSmValueMask(Reg); 3829 if (SYSmvalue == -1) 3830 return -1; 3831 3832 // basepri, basepri_max and faultmask are only valid for V7m. 3833 if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13) 3834 return -1; 3835 3836 if (Subtarget->has8MSecExt() && Flags.lower() == "ns") { 3837 Flags = ""; 3838 SYSmvalue |= 0x80; 3839 } 3840 3841 if (!Subtarget->has8MSecExt() && 3842 (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14)) 3843 return -1; 3844 3845 if (!Subtarget->hasV8MMainlineOps() && 3846 (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 || 3847 SYSmvalue == 0x93)) 3848 return -1; 3849 3850 // If it was a read then we won't be expecting flags and so at this point 3851 // we can return the mask. 3852 if (IsRead) { 3853 if (Flags.empty()) 3854 return SYSmvalue; 3855 else 3856 return -1; 3857 } 3858 3859 // We know we are now handling a write so need to get the mask for the flags. 3860 int Mask = getMClassFlagsMask(Flags); 3861 3862 // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values 3863 // shouldn't have flags present. 3864 if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty())) 3865 return -1; 3866 3867 // The _g and _nzcvqg versions are only valid if the DSP extension is 3868 // available. 3869 if (!Subtarget->hasDSP() && (Mask & 0x1)) 3870 return -1; 3871 3872 // The register was valid so need to put the mask in the correct place 3873 // (the flags need to be in bits 11-10) and combine with the SYSmvalue to 3874 // construct the operand for the instruction node. 3875 return SYSmvalue | Mask << 10; 3876 } 3877 3878 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 3879 // The mask operand contains the special register (R Bit) in bit 4, whether 3880 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 3881 // bits 3-0 contains the fields to be accessed in the special register, set by 3882 // the flags provided with the register. 3883 int Mask = 0; 3884 if (Reg == "apsr") { 3885 // The flags permitted for apsr are the same flags that are allowed in 3886 // M class registers. We get the flag value and then shift the flags into 3887 // the correct place to combine with the mask. 3888 Mask = getMClassFlagsMask(Flags); 3889 if (Mask == -1) 3890 return -1; 3891 return Mask << 2; 3892 } 3893 3894 if (Reg != "cpsr" && Reg != "spsr") { 3895 return -1; 3896 } 3897 3898 // This is the same as if the flags were "fc" 3899 if (Flags.empty() || Flags == "all") 3900 return Mask | 0x9; 3901 3902 // Inspect the supplied flags string and set the bits in the mask for 3903 // the relevant and valid flags allowed for cpsr and spsr. 3904 for (char Flag : Flags) { 3905 int FlagVal; 3906 switch (Flag) { 3907 case 'c': 3908 FlagVal = 0x1; 3909 break; 3910 case 'x': 3911 FlagVal = 0x2; 3912 break; 3913 case 's': 3914 FlagVal = 0x4; 3915 break; 3916 case 'f': 3917 FlagVal = 0x8; 3918 break; 3919 default: 3920 FlagVal = 0; 3921 } 3922 3923 // This avoids allowing strings where the same flag bit appears twice. 3924 if (!FlagVal || (Mask & FlagVal)) 3925 return -1; 3926 Mask |= FlagVal; 3927 } 3928 3929 // If the register is spsr then we need to set the R bit. 3930 if (Reg == "spsr") 3931 Mask |= 0x10; 3932 3933 return Mask; 3934 } 3935 3936 // Lower the read_register intrinsic to ARM specific DAG nodes 3937 // using the supplied metadata string to select the instruction node to use 3938 // and the registers/masks to construct as operands for the node. 3939 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 3940 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 3941 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 3942 bool IsThumb2 = Subtarget->isThumb2(); 3943 SDLoc DL(N); 3944 3945 std::vector<SDValue> Ops; 3946 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 3947 3948 if (!Ops.empty()) { 3949 // If the special register string was constructed of fields (as defined 3950 // in the ACLE) then need to lower to MRC node (32 bit) or 3951 // MRRC node(64 bit), we can make the distinction based on the number of 3952 // operands we have. 3953 unsigned Opcode; 3954 SmallVector<EVT, 3> ResTypes; 3955 if (Ops.size() == 5){ 3956 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 3957 ResTypes.append({ MVT::i32, MVT::Other }); 3958 } else { 3959 assert(Ops.size() == 3 && 3960 "Invalid number of fields in special register string."); 3961 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 3962 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 3963 } 3964 3965 Ops.push_back(getAL(CurDAG, DL)); 3966 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3967 Ops.push_back(N->getOperand(0)); 3968 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 3969 return true; 3970 } 3971 3972 std::string SpecialReg = RegString->getString().lower(); 3973 3974 int BankedReg = getBankedRegisterMask(SpecialReg); 3975 if (BankedReg != -1) { 3976 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 3977 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 3978 N->getOperand(0) }; 3979 ReplaceNode( 3980 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 3981 DL, MVT::i32, MVT::Other, Ops)); 3982 return true; 3983 } 3984 3985 // The VFP registers are read by creating SelectionDAG nodes with opcodes 3986 // corresponding to the register that is being read from. So we switch on the 3987 // string to find which opcode we need to use. 3988 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 3989 .Case("fpscr", ARM::VMRS) 3990 .Case("fpexc", ARM::VMRS_FPEXC) 3991 .Case("fpsid", ARM::VMRS_FPSID) 3992 .Case("mvfr0", ARM::VMRS_MVFR0) 3993 .Case("mvfr1", ARM::VMRS_MVFR1) 3994 .Case("mvfr2", ARM::VMRS_MVFR2) 3995 .Case("fpinst", ARM::VMRS_FPINST) 3996 .Case("fpinst2", ARM::VMRS_FPINST2) 3997 .Default(0); 3998 3999 // If an opcode was found then we can lower the read to a VFP instruction. 4000 if (Opcode) { 4001 if (!Subtarget->hasVFP2()) 4002 return false; 4003 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8()) 4004 return false; 4005 4006 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4007 N->getOperand(0) }; 4008 ReplaceNode(N, 4009 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 4010 return true; 4011 } 4012 4013 // If the target is M Class then need to validate that the register string 4014 // is an acceptable value, so check that a mask can be constructed from the 4015 // string. 4016 if (Subtarget->isMClass()) { 4017 StringRef Flags = "", Reg = SpecialReg; 4018 if (Reg.endswith("_ns")) { 4019 Flags = "ns"; 4020 Reg = Reg.drop_back(3); 4021 } 4022 4023 int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget); 4024 if (SYSmValue == -1) 4025 return false; 4026 4027 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4028 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4029 N->getOperand(0) }; 4030 ReplaceNode( 4031 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 4032 return true; 4033 } 4034 4035 // Here we know the target is not M Class so we need to check if it is one 4036 // of the remaining possible values which are apsr, cpsr or spsr. 4037 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 4038 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4039 N->getOperand(0) }; 4040 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 4041 DL, MVT::i32, MVT::Other, Ops)); 4042 return true; 4043 } 4044 4045 if (SpecialReg == "spsr") { 4046 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4047 N->getOperand(0) }; 4048 ReplaceNode( 4049 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 4050 MVT::i32, MVT::Other, Ops)); 4051 return true; 4052 } 4053 4054 return false; 4055 } 4056 4057 // Lower the write_register intrinsic to ARM specific DAG nodes 4058 // using the supplied metadata string to select the instruction node to use 4059 // and the registers/masks to use in the nodes 4060 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 4061 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4062 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4063 bool IsThumb2 = Subtarget->isThumb2(); 4064 SDLoc DL(N); 4065 4066 std::vector<SDValue> Ops; 4067 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4068 4069 if (!Ops.empty()) { 4070 // If the special register string was constructed of fields (as defined 4071 // in the ACLE) then need to lower to MCR node (32 bit) or 4072 // MCRR node(64 bit), we can make the distinction based on the number of 4073 // operands we have. 4074 unsigned Opcode; 4075 if (Ops.size() == 5) { 4076 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 4077 Ops.insert(Ops.begin()+2, N->getOperand(2)); 4078 } else { 4079 assert(Ops.size() == 3 && 4080 "Invalid number of fields in special register string."); 4081 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 4082 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 4083 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 4084 } 4085 4086 Ops.push_back(getAL(CurDAG, DL)); 4087 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4088 Ops.push_back(N->getOperand(0)); 4089 4090 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4091 return true; 4092 } 4093 4094 std::string SpecialReg = RegString->getString().lower(); 4095 int BankedReg = getBankedRegisterMask(SpecialReg); 4096 if (BankedReg != -1) { 4097 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 4098 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4099 N->getOperand(0) }; 4100 ReplaceNode( 4101 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 4102 DL, MVT::Other, Ops)); 4103 return true; 4104 } 4105 4106 // The VFP registers are written to by creating SelectionDAG nodes with 4107 // opcodes corresponding to the register that is being written. So we switch 4108 // on the string to find which opcode we need to use. 4109 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4110 .Case("fpscr", ARM::VMSR) 4111 .Case("fpexc", ARM::VMSR_FPEXC) 4112 .Case("fpsid", ARM::VMSR_FPSID) 4113 .Case("fpinst", ARM::VMSR_FPINST) 4114 .Case("fpinst2", ARM::VMSR_FPINST2) 4115 .Default(0); 4116 4117 if (Opcode) { 4118 if (!Subtarget->hasVFP2()) 4119 return false; 4120 Ops = { N->getOperand(2), getAL(CurDAG, DL), 4121 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4122 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4123 return true; 4124 } 4125 4126 std::pair<StringRef, StringRef> Fields; 4127 Fields = StringRef(SpecialReg).rsplit('_'); 4128 std::string Reg = Fields.first.str(); 4129 StringRef Flags = Fields.second; 4130 4131 // If the target was M Class then need to validate the special register value 4132 // and retrieve the mask for use in the instruction node. 4133 if (Subtarget->isMClass()) { 4134 // basepri_max gets split so need to correct Reg and Flags. 4135 if (SpecialReg == "basepri_max") { 4136 Reg = SpecialReg; 4137 Flags = ""; 4138 } 4139 int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget); 4140 if (SYSmValue == -1) 4141 return false; 4142 4143 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4144 N->getOperand(2), getAL(CurDAG, DL), 4145 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4146 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 4147 return true; 4148 } 4149 4150 // We then check to see if a valid mask can be constructed for one of the 4151 // register string values permitted for the A and R class cores. These values 4152 // are apsr, spsr and cpsr; these are also valid on older cores. 4153 int Mask = getARClassRegisterMask(Reg, Flags); 4154 if (Mask != -1) { 4155 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 4156 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4157 N->getOperand(0) }; 4158 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 4159 DL, MVT::Other, Ops)); 4160 return true; 4161 } 4162 4163 return false; 4164 } 4165 4166 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 4167 std::vector<SDValue> AsmNodeOperands; 4168 unsigned Flag, Kind; 4169 bool Changed = false; 4170 unsigned NumOps = N->getNumOperands(); 4171 4172 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 4173 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 4174 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 4175 // respectively. Since there is no constraint to explicitly specify a 4176 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 4177 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 4178 // them into a GPRPair. 4179 4180 SDLoc dl(N); 4181 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 4182 : SDValue(nullptr,0); 4183 4184 SmallVector<bool, 8> OpChanged; 4185 // Glue node will be appended late. 4186 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 4187 SDValue op = N->getOperand(i); 4188 AsmNodeOperands.push_back(op); 4189 4190 if (i < InlineAsm::Op_FirstOperand) 4191 continue; 4192 4193 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 4194 Flag = C->getZExtValue(); 4195 Kind = InlineAsm::getKind(Flag); 4196 } 4197 else 4198 continue; 4199 4200 // Immediate operands to inline asm in the SelectionDAG are modeled with 4201 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 4202 // the second is a constant with the value of the immediate. If we get here 4203 // and we have a Kind_Imm, skip the next operand, and continue. 4204 if (Kind == InlineAsm::Kind_Imm) { 4205 SDValue op = N->getOperand(++i); 4206 AsmNodeOperands.push_back(op); 4207 continue; 4208 } 4209 4210 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 4211 if (NumRegs) 4212 OpChanged.push_back(false); 4213 4214 unsigned DefIdx = 0; 4215 bool IsTiedToChangedOp = false; 4216 // If it's a use that is tied with a previous def, it has no 4217 // reg class constraint. 4218 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 4219 IsTiedToChangedOp = OpChanged[DefIdx]; 4220 4221 // Memory operands to inline asm in the SelectionDAG are modeled with two 4222 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 4223 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 4224 // it doesn't get misinterpreted), and continue. We do this here because 4225 // it's important to update the OpChanged array correctly before moving on. 4226 if (Kind == InlineAsm::Kind_Mem) { 4227 SDValue op = N->getOperand(++i); 4228 AsmNodeOperands.push_back(op); 4229 continue; 4230 } 4231 4232 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 4233 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 4234 continue; 4235 4236 unsigned RC; 4237 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 4238 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 4239 || NumRegs != 2) 4240 continue; 4241 4242 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 4243 SDValue V0 = N->getOperand(i+1); 4244 SDValue V1 = N->getOperand(i+2); 4245 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 4246 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 4247 SDValue PairedReg; 4248 MachineRegisterInfo &MRI = MF->getRegInfo(); 4249 4250 if (Kind == InlineAsm::Kind_RegDef || 4251 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 4252 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 4253 // the original GPRs. 4254 4255 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 4256 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 4257 SDValue Chain = SDValue(N,0); 4258 4259 SDNode *GU = N->getGluedUser(); 4260 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 4261 Chain.getValue(1)); 4262 4263 // Extract values from a GPRPair reg and copy to the original GPR reg. 4264 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4265 RegCopy); 4266 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4267 RegCopy); 4268 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 4269 RegCopy.getValue(1)); 4270 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 4271 4272 // Update the original glue user. 4273 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 4274 Ops.push_back(T1.getValue(1)); 4275 CurDAG->UpdateNodeOperands(GU, Ops); 4276 } 4277 else { 4278 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 4279 // GPRPair and then pass the GPRPair to the inline asm. 4280 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 4281 4282 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 4283 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 4284 Chain.getValue(1)); 4285 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 4286 T0.getValue(1)); 4287 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 4288 4289 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 4290 // i32 VRs of inline asm with it. 4291 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 4292 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 4293 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 4294 4295 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 4296 Glue = Chain.getValue(1); 4297 } 4298 4299 Changed = true; 4300 4301 if(PairedReg.getNode()) { 4302 OpChanged[OpChanged.size() -1 ] = true; 4303 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 4304 if (IsTiedToChangedOp) 4305 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 4306 else 4307 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 4308 // Replace the current flag. 4309 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 4310 Flag, dl, MVT::i32); 4311 // Add the new register node and skip the original two GPRs. 4312 AsmNodeOperands.push_back(PairedReg); 4313 // Skip the next two GPRs. 4314 i += 2; 4315 } 4316 } 4317 4318 if (Glue.getNode()) 4319 AsmNodeOperands.push_back(Glue); 4320 if (!Changed) 4321 return false; 4322 4323 SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), 4324 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 4325 New->setNodeId(-1); 4326 ReplaceNode(N, New.getNode()); 4327 return true; 4328 } 4329 4330 4331 bool ARMDAGToDAGISel:: 4332 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 4333 std::vector<SDValue> &OutOps) { 4334 switch(ConstraintID) { 4335 default: 4336 llvm_unreachable("Unexpected asm memory constraint"); 4337 case InlineAsm::Constraint_i: 4338 // FIXME: It seems strange that 'i' is needed here since it's supposed to 4339 // be an immediate and not a memory constraint. 4340 LLVM_FALLTHROUGH; 4341 case InlineAsm::Constraint_m: 4342 case InlineAsm::Constraint_o: 4343 case InlineAsm::Constraint_Q: 4344 case InlineAsm::Constraint_Um: 4345 case InlineAsm::Constraint_Un: 4346 case InlineAsm::Constraint_Uq: 4347 case InlineAsm::Constraint_Us: 4348 case InlineAsm::Constraint_Ut: 4349 case InlineAsm::Constraint_Uv: 4350 case InlineAsm::Constraint_Uy: 4351 // Require the address to be in a register. That is safe for all ARM 4352 // variants and it is hard to do anything much smarter without knowing 4353 // how the operand is used. 4354 OutOps.push_back(Op); 4355 return false; 4356 } 4357 return true; 4358 } 4359 4360 /// createARMISelDag - This pass converts a legalized DAG into a 4361 /// ARM-specific DAG, ready for instruction scheduling. 4362 /// 4363 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 4364 CodeGenOpt::Level OptLevel) { 4365 return new ARMDAGToDAGISel(TM, OptLevel); 4366 } 4367