1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/CodeGen/TargetLowering.h" 26 #include "llvm/IR/CallingConv.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/LLVMContext.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Target/TargetOptions.h" 36 37 using namespace llvm; 38 39 #define DEBUG_TYPE "arm-isel" 40 41 static cl::opt<bool> 42 DisableShifterOp("disable-shifter-op", cl::Hidden, 43 cl::desc("Disable isel of shifter-op"), 44 cl::init(false)); 45 46 //===--------------------------------------------------------------------===// 47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 48 /// instructions for SelectionDAG operations. 49 /// 50 namespace { 51 52 class ARMDAGToDAGISel : public SelectionDAGISel { 53 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 54 /// make the right decision when generating code for different targets. 55 const ARMSubtarget *Subtarget; 56 57 public: 58 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 59 : SelectionDAGISel(tm, OptLevel) {} 60 61 bool runOnMachineFunction(MachineFunction &MF) override { 62 // Reset the subtarget each time through. 63 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 64 SelectionDAGISel::runOnMachineFunction(MF); 65 return true; 66 } 67 68 StringRef getPassName() const override { return "ARM Instruction Selection"; } 69 70 void PreprocessISelDAG() override; 71 72 /// getI32Imm - Return a target constant of type i32 with the specified 73 /// value. 74 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 75 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 76 } 77 78 void Select(SDNode *N) override; 79 80 bool hasNoVMLxHazardUse(SDNode *N) const; 81 bool isShifterOpProfitable(const SDValue &Shift, 82 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 83 bool SelectRegShifterOperand(SDValue N, SDValue &A, 84 SDValue &B, SDValue &C, 85 bool CheckProfitability = true); 86 bool SelectImmShifterOperand(SDValue N, SDValue &A, 87 SDValue &B, bool CheckProfitability = true); 88 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 89 SDValue &B, SDValue &C) { 90 // Don't apply the profitability check 91 return SelectRegShifterOperand(N, A, B, C, false); 92 } 93 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 94 SDValue &B) { 95 // Don't apply the profitability check 96 return SelectImmShifterOperand(N, A, B, false); 97 } 98 99 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 100 101 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 102 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 103 104 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 105 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 106 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 107 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 108 return true; 109 } 110 111 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 112 SDValue &Offset, SDValue &Opc); 113 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 114 SDValue &Offset, SDValue &Opc); 115 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 116 SDValue &Offset, SDValue &Opc); 117 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 118 bool SelectAddrMode3(SDValue N, SDValue &Base, 119 SDValue &Offset, SDValue &Opc); 120 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 121 SDValue &Offset, SDValue &Opc); 122 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 123 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 124 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 125 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 126 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 127 128 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 129 130 // Thumb Addressing Modes: 131 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 132 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 133 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 134 SDValue &OffImm); 135 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 136 SDValue &OffImm); 137 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 138 SDValue &OffImm); 139 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 140 SDValue &OffImm); 141 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 142 143 // Thumb 2 Addressing Modes: 144 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 145 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 146 SDValue &OffImm); 147 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 148 SDValue &OffImm); 149 template <unsigned Shift> 150 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 151 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 152 unsigned Shift); 153 template <unsigned Shift> 154 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 155 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 156 SDValue &OffReg, SDValue &ShImm); 157 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 158 159 inline bool is_so_imm(unsigned Imm) const { 160 return ARM_AM::getSOImmVal(Imm) != -1; 161 } 162 163 inline bool is_so_imm_not(unsigned Imm) const { 164 return ARM_AM::getSOImmVal(~Imm) != -1; 165 } 166 167 inline bool is_t2_so_imm(unsigned Imm) const { 168 return ARM_AM::getT2SOImmVal(Imm) != -1; 169 } 170 171 inline bool is_t2_so_imm_not(unsigned Imm) const { 172 return ARM_AM::getT2SOImmVal(~Imm) != -1; 173 } 174 175 // Include the pieces autogenerated from the target description. 176 #include "ARMGenDAGISel.inc" 177 178 private: 179 void transferMemOperands(SDNode *Src, SDNode *Dst); 180 181 /// Indexed (pre/post inc/dec) load matching code for ARM. 182 bool tryARMIndexedLoad(SDNode *N); 183 bool tryT1IndexedLoad(SDNode *N); 184 bool tryT2IndexedLoad(SDNode *N); 185 bool tryMVEIndexedLoad(SDNode *N); 186 187 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 188 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 189 /// loads of D registers and even subregs and odd subregs of Q registers. 190 /// For NumVecs <= 2, QOpcodes1 is not used. 191 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 192 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 193 const uint16_t *QOpcodes1); 194 195 /// SelectVST - Select NEON store intrinsics. NumVecs should 196 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 197 /// stores of D registers and even subregs and odd subregs of Q registers. 198 /// For NumVecs <= 2, QOpcodes1 is not used. 199 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 200 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 201 const uint16_t *QOpcodes1); 202 203 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 204 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 205 /// load/store of D registers and Q registers. 206 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 207 unsigned NumVecs, const uint16_t *DOpcodes, 208 const uint16_t *QOpcodes); 209 210 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 211 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 212 /// for loading D registers. 213 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 214 unsigned NumVecs, const uint16_t *DOpcodes, 215 const uint16_t *QOpcodes0 = nullptr, 216 const uint16_t *QOpcodes1 = nullptr); 217 218 /// Try to select SBFX/UBFX instructions for ARM. 219 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 220 221 // Select special operations if node forms integer ABS pattern 222 bool tryABSOp(SDNode *N); 223 224 bool tryReadRegister(SDNode *N); 225 bool tryWriteRegister(SDNode *N); 226 227 bool tryInlineAsm(SDNode *N); 228 229 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 230 231 void SelectCMP_SWAP(SDNode *N); 232 233 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 234 /// inline asm expressions. 235 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 236 std::vector<SDValue> &OutOps) override; 237 238 // Form pairs of consecutive R, S, D, or Q registers. 239 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 240 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 241 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 242 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 243 244 // Form sequences of 4 consecutive S, D, or Q registers. 245 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 246 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 247 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 248 249 // Get the alignment operand for a NEON VLD or VST instruction. 250 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 251 bool is64BitVector); 252 253 /// Returns the number of instructions required to materialize the given 254 /// constant in a register, or 3 if a literal pool load is needed. 255 unsigned ConstantMaterializationCost(unsigned Val) const; 256 257 /// Checks if N is a multiplication by a constant where we can extract out a 258 /// power of two from the constant so that it can be used in a shift, but only 259 /// if it simplifies the materialization of the constant. Returns true if it 260 /// is, and assigns to PowerOfTwo the power of two that should be extracted 261 /// out and to NewMulConst the new constant to be multiplied by. 262 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 263 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 264 265 /// Replace N with M in CurDAG, in a way that also ensures that M gets 266 /// selected when N would have been selected. 267 void replaceDAGValue(const SDValue &N, SDValue M); 268 }; 269 } 270 271 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 272 /// operand. If so Imm will receive the 32-bit value. 273 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 274 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 275 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 276 return true; 277 } 278 return false; 279 } 280 281 // isInt32Immediate - This method tests to see if a constant operand. 282 // If so Imm will receive the 32 bit value. 283 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 284 return isInt32Immediate(N.getNode(), Imm); 285 } 286 287 // isOpcWithIntImmediate - This method tests to see if the node is a specific 288 // opcode and that it has a immediate integer right operand. 289 // If so Imm will receive the 32 bit value. 290 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 291 return N->getOpcode() == Opc && 292 isInt32Immediate(N->getOperand(1).getNode(), Imm); 293 } 294 295 /// Check whether a particular node is a constant value representable as 296 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 297 /// 298 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 299 static bool isScaledConstantInRange(SDValue Node, int Scale, 300 int RangeMin, int RangeMax, 301 int &ScaledConstant) { 302 assert(Scale > 0 && "Invalid scale!"); 303 304 // Check that this is a constant. 305 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 306 if (!C) 307 return false; 308 309 ScaledConstant = (int) C->getZExtValue(); 310 if ((ScaledConstant % Scale) != 0) 311 return false; 312 313 ScaledConstant /= Scale; 314 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 315 } 316 317 void ARMDAGToDAGISel::PreprocessISelDAG() { 318 if (!Subtarget->hasV6T2Ops()) 319 return; 320 321 bool isThumb2 = Subtarget->isThumb(); 322 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 323 E = CurDAG->allnodes_end(); I != E; ) { 324 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 325 326 if (N->getOpcode() != ISD::ADD) 327 continue; 328 329 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 330 // leading zeros, followed by consecutive set bits, followed by 1 or 2 331 // trailing zeros, e.g. 1020. 332 // Transform the expression to 333 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 334 // of trailing zeros of c2. The left shift would be folded as an shifter 335 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 336 // node (UBFX). 337 338 SDValue N0 = N->getOperand(0); 339 SDValue N1 = N->getOperand(1); 340 unsigned And_imm = 0; 341 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 342 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 343 std::swap(N0, N1); 344 } 345 if (!And_imm) 346 continue; 347 348 // Check if the AND mask is an immediate of the form: 000.....1111111100 349 unsigned TZ = countTrailingZeros(And_imm); 350 if (TZ != 1 && TZ != 2) 351 // Be conservative here. Shifter operands aren't always free. e.g. On 352 // Swift, left shifter operand of 1 / 2 for free but others are not. 353 // e.g. 354 // ubfx r3, r1, #16, #8 355 // ldr.w r3, [r0, r3, lsl #2] 356 // vs. 357 // mov.w r9, #1020 358 // and.w r2, r9, r1, lsr #14 359 // ldr r2, [r0, r2] 360 continue; 361 And_imm >>= TZ; 362 if (And_imm & (And_imm + 1)) 363 continue; 364 365 // Look for (and (srl X, c1), c2). 366 SDValue Srl = N1.getOperand(0); 367 unsigned Srl_imm = 0; 368 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 369 (Srl_imm <= 2)) 370 continue; 371 372 // Make sure first operand is not a shifter operand which would prevent 373 // folding of the left shift. 374 SDValue CPTmp0; 375 SDValue CPTmp1; 376 SDValue CPTmp2; 377 if (isThumb2) { 378 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 379 continue; 380 } else { 381 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 382 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 383 continue; 384 } 385 386 // Now make the transformation. 387 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 388 Srl.getOperand(0), 389 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 390 MVT::i32)); 391 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 392 Srl, 393 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 394 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 395 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 396 CurDAG->UpdateNodeOperands(N, N0, N1); 397 } 398 } 399 400 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 401 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 402 /// least on current ARM implementations) which should be avoidded. 403 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 404 if (OptLevel == CodeGenOpt::None) 405 return true; 406 407 if (!Subtarget->hasVMLxHazards()) 408 return true; 409 410 if (!N->hasOneUse()) 411 return false; 412 413 SDNode *Use = *N->use_begin(); 414 if (Use->getOpcode() == ISD::CopyToReg) 415 return true; 416 if (Use->isMachineOpcode()) { 417 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 418 CurDAG->getSubtarget().getInstrInfo()); 419 420 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 421 if (MCID.mayStore()) 422 return true; 423 unsigned Opcode = MCID.getOpcode(); 424 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 425 return true; 426 // vmlx feeding into another vmlx. We actually want to unfold 427 // the use later in the MLxExpansion pass. e.g. 428 // vmla 429 // vmla (stall 8 cycles) 430 // 431 // vmul (5 cycles) 432 // vadd (5 cycles) 433 // vmla 434 // This adds up to about 18 - 19 cycles. 435 // 436 // vmla 437 // vmul (stall 4 cycles) 438 // vadd adds up to about 14 cycles. 439 return TII->isFpMLxInstruction(Opcode); 440 } 441 442 return false; 443 } 444 445 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 446 ARM_AM::ShiftOpc ShOpcVal, 447 unsigned ShAmt) { 448 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 449 return true; 450 if (Shift.hasOneUse()) 451 return true; 452 // R << 2 is free. 453 return ShOpcVal == ARM_AM::lsl && 454 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 455 } 456 457 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { 458 if (Subtarget->isThumb()) { 459 if (Val <= 255) return 1; // MOV 460 if (Subtarget->hasV6T2Ops() && 461 (Val <= 0xffff || // MOV 462 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW 463 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN 464 return 1; 465 if (Val <= 510) return 2; // MOV + ADDi8 466 if (~Val <= 255) return 2; // MOV + MVN 467 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL 468 } else { 469 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV 470 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN 471 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW 472 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs 473 } 474 if (Subtarget->useMovt()) return 2; // MOVW + MOVT 475 return 3; // Literal pool load 476 } 477 478 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 479 unsigned MaxShift, 480 unsigned &PowerOfTwo, 481 SDValue &NewMulConst) const { 482 assert(N.getOpcode() == ISD::MUL); 483 assert(MaxShift > 0); 484 485 // If the multiply is used in more than one place then changing the constant 486 // will make other uses incorrect, so don't. 487 if (!N.hasOneUse()) return false; 488 // Check if the multiply is by a constant 489 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 490 if (!MulConst) return false; 491 // If the constant is used in more than one place then modifying it will mean 492 // we need to materialize two constants instead of one, which is a bad idea. 493 if (!MulConst->hasOneUse()) return false; 494 unsigned MulConstVal = MulConst->getZExtValue(); 495 if (MulConstVal == 0) return false; 496 497 // Find the largest power of 2 that MulConstVal is a multiple of 498 PowerOfTwo = MaxShift; 499 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 500 --PowerOfTwo; 501 if (PowerOfTwo == 0) return false; 502 } 503 504 // Only optimise if the new cost is better 505 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 506 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 507 unsigned OldCost = ConstantMaterializationCost(MulConstVal); 508 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal); 509 return NewCost < OldCost; 510 } 511 512 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 513 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 514 ReplaceUses(N, M); 515 } 516 517 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 518 SDValue &BaseReg, 519 SDValue &Opc, 520 bool CheckProfitability) { 521 if (DisableShifterOp) 522 return false; 523 524 // If N is a multiply-by-constant and it's profitable to extract a shift and 525 // use it in a shifted operand do so. 526 if (N.getOpcode() == ISD::MUL) { 527 unsigned PowerOfTwo = 0; 528 SDValue NewMulConst; 529 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 530 HandleSDNode Handle(N); 531 SDLoc Loc(N); 532 replaceDAGValue(N.getOperand(1), NewMulConst); 533 BaseReg = Handle.getValue(); 534 Opc = CurDAG->getTargetConstant( 535 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 536 return true; 537 } 538 } 539 540 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 541 542 // Don't match base register only case. That is matched to a separate 543 // lower complexity pattern with explicit register operand. 544 if (ShOpcVal == ARM_AM::no_shift) return false; 545 546 BaseReg = N.getOperand(0); 547 unsigned ShImmVal = 0; 548 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 549 if (!RHS) return false; 550 ShImmVal = RHS->getZExtValue() & 31; 551 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 552 SDLoc(N), MVT::i32); 553 return true; 554 } 555 556 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 557 SDValue &BaseReg, 558 SDValue &ShReg, 559 SDValue &Opc, 560 bool CheckProfitability) { 561 if (DisableShifterOp) 562 return false; 563 564 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 565 566 // Don't match base register only case. That is matched to a separate 567 // lower complexity pattern with explicit register operand. 568 if (ShOpcVal == ARM_AM::no_shift) return false; 569 570 BaseReg = N.getOperand(0); 571 unsigned ShImmVal = 0; 572 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 573 if (RHS) return false; 574 575 ShReg = N.getOperand(1); 576 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 577 return false; 578 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 579 SDLoc(N), MVT::i32); 580 return true; 581 } 582 583 // Determine whether an ISD::OR's operands are suitable to turn the operation 584 // into an addition, which often has more compact encodings. 585 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 586 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 587 Out = N; 588 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 589 } 590 591 592 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 593 SDValue &Base, 594 SDValue &OffImm) { 595 // Match simple R + imm12 operands. 596 597 // Base only. 598 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 599 !CurDAG->isBaseWithConstantOffset(N)) { 600 if (N.getOpcode() == ISD::FrameIndex) { 601 // Match frame index. 602 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 603 Base = CurDAG->getTargetFrameIndex( 604 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 605 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 606 return true; 607 } 608 609 if (N.getOpcode() == ARMISD::Wrapper && 610 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 611 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 612 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 613 Base = N.getOperand(0); 614 } else 615 Base = N; 616 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 617 return true; 618 } 619 620 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 621 int RHSC = (int)RHS->getSExtValue(); 622 if (N.getOpcode() == ISD::SUB) 623 RHSC = -RHSC; 624 625 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 626 Base = N.getOperand(0); 627 if (Base.getOpcode() == ISD::FrameIndex) { 628 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 629 Base = CurDAG->getTargetFrameIndex( 630 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 631 } 632 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 633 return true; 634 } 635 } 636 637 // Base only. 638 Base = N; 639 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 640 return true; 641 } 642 643 644 645 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 646 SDValue &Opc) { 647 if (N.getOpcode() == ISD::MUL && 648 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 649 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 650 // X * [3,5,9] -> X + X * [2,4,8] etc. 651 int RHSC = (int)RHS->getZExtValue(); 652 if (RHSC & 1) { 653 RHSC = RHSC & ~1; 654 ARM_AM::AddrOpc AddSub = ARM_AM::add; 655 if (RHSC < 0) { 656 AddSub = ARM_AM::sub; 657 RHSC = - RHSC; 658 } 659 if (isPowerOf2_32(RHSC)) { 660 unsigned ShAmt = Log2_32(RHSC); 661 Base = Offset = N.getOperand(0); 662 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 663 ARM_AM::lsl), 664 SDLoc(N), MVT::i32); 665 return true; 666 } 667 } 668 } 669 } 670 671 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 672 // ISD::OR that is equivalent to an ISD::ADD. 673 !CurDAG->isBaseWithConstantOffset(N)) 674 return false; 675 676 // Leave simple R +/- imm12 operands for LDRi12 677 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 678 int RHSC; 679 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 680 -0x1000+1, 0x1000, RHSC)) // 12 bits. 681 return false; 682 } 683 684 // Otherwise this is R +/- [possibly shifted] R. 685 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 686 ARM_AM::ShiftOpc ShOpcVal = 687 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 688 unsigned ShAmt = 0; 689 690 Base = N.getOperand(0); 691 Offset = N.getOperand(1); 692 693 if (ShOpcVal != ARM_AM::no_shift) { 694 // Check to see if the RHS of the shift is a constant, if not, we can't fold 695 // it. 696 if (ConstantSDNode *Sh = 697 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 698 ShAmt = Sh->getZExtValue(); 699 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 700 Offset = N.getOperand(1).getOperand(0); 701 else { 702 ShAmt = 0; 703 ShOpcVal = ARM_AM::no_shift; 704 } 705 } else { 706 ShOpcVal = ARM_AM::no_shift; 707 } 708 } 709 710 // Try matching (R shl C) + (R). 711 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 712 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 713 N.getOperand(0).hasOneUse())) { 714 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 715 if (ShOpcVal != ARM_AM::no_shift) { 716 // Check to see if the RHS of the shift is a constant, if not, we can't 717 // fold it. 718 if (ConstantSDNode *Sh = 719 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 720 ShAmt = Sh->getZExtValue(); 721 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 722 Offset = N.getOperand(0).getOperand(0); 723 Base = N.getOperand(1); 724 } else { 725 ShAmt = 0; 726 ShOpcVal = ARM_AM::no_shift; 727 } 728 } else { 729 ShOpcVal = ARM_AM::no_shift; 730 } 731 } 732 } 733 734 // If Offset is a multiply-by-constant and it's profitable to extract a shift 735 // and use it in a shifted operand do so. 736 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 737 unsigned PowerOfTwo = 0; 738 SDValue NewMulConst; 739 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 740 HandleSDNode Handle(Offset); 741 replaceDAGValue(Offset.getOperand(1), NewMulConst); 742 Offset = Handle.getValue(); 743 ShAmt = PowerOfTwo; 744 ShOpcVal = ARM_AM::lsl; 745 } 746 } 747 748 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 749 SDLoc(N), MVT::i32); 750 return true; 751 } 752 753 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 754 SDValue &Offset, SDValue &Opc) { 755 unsigned Opcode = Op->getOpcode(); 756 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 757 ? cast<LoadSDNode>(Op)->getAddressingMode() 758 : cast<StoreSDNode>(Op)->getAddressingMode(); 759 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 760 ? ARM_AM::add : ARM_AM::sub; 761 int Val; 762 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 763 return false; 764 765 Offset = N; 766 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 767 unsigned ShAmt = 0; 768 if (ShOpcVal != ARM_AM::no_shift) { 769 // Check to see if the RHS of the shift is a constant, if not, we can't fold 770 // it. 771 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 772 ShAmt = Sh->getZExtValue(); 773 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 774 Offset = N.getOperand(0); 775 else { 776 ShAmt = 0; 777 ShOpcVal = ARM_AM::no_shift; 778 } 779 } else { 780 ShOpcVal = ARM_AM::no_shift; 781 } 782 } 783 784 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 785 SDLoc(N), MVT::i32); 786 return true; 787 } 788 789 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 790 SDValue &Offset, SDValue &Opc) { 791 unsigned Opcode = Op->getOpcode(); 792 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 793 ? cast<LoadSDNode>(Op)->getAddressingMode() 794 : cast<StoreSDNode>(Op)->getAddressingMode(); 795 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 796 ? ARM_AM::add : ARM_AM::sub; 797 int Val; 798 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 799 if (AddSub == ARM_AM::sub) Val *= -1; 800 Offset = CurDAG->getRegister(0, MVT::i32); 801 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 802 return true; 803 } 804 805 return false; 806 } 807 808 809 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 810 SDValue &Offset, SDValue &Opc) { 811 unsigned Opcode = Op->getOpcode(); 812 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 813 ? cast<LoadSDNode>(Op)->getAddressingMode() 814 : cast<StoreSDNode>(Op)->getAddressingMode(); 815 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 816 ? ARM_AM::add : ARM_AM::sub; 817 int Val; 818 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 819 Offset = CurDAG->getRegister(0, MVT::i32); 820 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 821 ARM_AM::no_shift), 822 SDLoc(Op), MVT::i32); 823 return true; 824 } 825 826 return false; 827 } 828 829 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 830 Base = N; 831 return true; 832 } 833 834 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 835 SDValue &Base, SDValue &Offset, 836 SDValue &Opc) { 837 if (N.getOpcode() == ISD::SUB) { 838 // X - C is canonicalize to X + -C, no need to handle it here. 839 Base = N.getOperand(0); 840 Offset = N.getOperand(1); 841 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 842 MVT::i32); 843 return true; 844 } 845 846 if (!CurDAG->isBaseWithConstantOffset(N)) { 847 Base = N; 848 if (N.getOpcode() == ISD::FrameIndex) { 849 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 850 Base = CurDAG->getTargetFrameIndex( 851 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 852 } 853 Offset = CurDAG->getRegister(0, MVT::i32); 854 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 855 MVT::i32); 856 return true; 857 } 858 859 // If the RHS is +/- imm8, fold into addr mode. 860 int RHSC; 861 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 862 -256 + 1, 256, RHSC)) { // 8 bits. 863 Base = N.getOperand(0); 864 if (Base.getOpcode() == ISD::FrameIndex) { 865 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 866 Base = CurDAG->getTargetFrameIndex( 867 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 868 } 869 Offset = CurDAG->getRegister(0, MVT::i32); 870 871 ARM_AM::AddrOpc AddSub = ARM_AM::add; 872 if (RHSC < 0) { 873 AddSub = ARM_AM::sub; 874 RHSC = -RHSC; 875 } 876 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 877 MVT::i32); 878 return true; 879 } 880 881 Base = N.getOperand(0); 882 Offset = N.getOperand(1); 883 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 884 MVT::i32); 885 return true; 886 } 887 888 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 889 SDValue &Offset, SDValue &Opc) { 890 unsigned Opcode = Op->getOpcode(); 891 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 892 ? cast<LoadSDNode>(Op)->getAddressingMode() 893 : cast<StoreSDNode>(Op)->getAddressingMode(); 894 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 895 ? ARM_AM::add : ARM_AM::sub; 896 int Val; 897 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 898 Offset = CurDAG->getRegister(0, MVT::i32); 899 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 900 MVT::i32); 901 return true; 902 } 903 904 Offset = N; 905 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 906 MVT::i32); 907 return true; 908 } 909 910 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 911 bool FP16) { 912 if (!CurDAG->isBaseWithConstantOffset(N)) { 913 Base = N; 914 if (N.getOpcode() == ISD::FrameIndex) { 915 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 916 Base = CurDAG->getTargetFrameIndex( 917 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 918 } else if (N.getOpcode() == ARMISD::Wrapper && 919 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 920 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 921 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 922 Base = N.getOperand(0); 923 } 924 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 925 SDLoc(N), MVT::i32); 926 return true; 927 } 928 929 // If the RHS is +/- imm8, fold into addr mode. 930 int RHSC; 931 const int Scale = FP16 ? 2 : 4; 932 933 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 934 Base = N.getOperand(0); 935 if (Base.getOpcode() == ISD::FrameIndex) { 936 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 937 Base = CurDAG->getTargetFrameIndex( 938 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 939 } 940 941 ARM_AM::AddrOpc AddSub = ARM_AM::add; 942 if (RHSC < 0) { 943 AddSub = ARM_AM::sub; 944 RHSC = -RHSC; 945 } 946 947 if (FP16) 948 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 949 SDLoc(N), MVT::i32); 950 else 951 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 952 SDLoc(N), MVT::i32); 953 954 return true; 955 } 956 957 Base = N; 958 959 if (FP16) 960 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 961 SDLoc(N), MVT::i32); 962 else 963 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 964 SDLoc(N), MVT::i32); 965 966 return true; 967 } 968 969 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 970 SDValue &Base, SDValue &Offset) { 971 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 972 } 973 974 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 975 SDValue &Base, SDValue &Offset) { 976 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 977 } 978 979 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 980 SDValue &Align) { 981 Addr = N; 982 983 unsigned Alignment = 0; 984 985 MemSDNode *MemN = cast<MemSDNode>(Parent); 986 987 if (isa<LSBaseSDNode>(MemN) || 988 ((MemN->getOpcode() == ARMISD::VST1_UPD || 989 MemN->getOpcode() == ARMISD::VLD1_UPD) && 990 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 991 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 992 // The maximum alignment is equal to the memory size being referenced. 993 unsigned MMOAlign = MemN->getAlignment(); 994 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 995 if (MMOAlign >= MemSize && MemSize > 1) 996 Alignment = MemSize; 997 } else { 998 // All other uses of addrmode6 are for intrinsics. For now just record 999 // the raw alignment value; it will be refined later based on the legal 1000 // alignment operands for the intrinsic. 1001 Alignment = MemN->getAlignment(); 1002 } 1003 1004 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1005 return true; 1006 } 1007 1008 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1009 SDValue &Offset) { 1010 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1011 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1012 if (AM != ISD::POST_INC) 1013 return false; 1014 Offset = N; 1015 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1016 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1017 Offset = CurDAG->getRegister(0, MVT::i32); 1018 } 1019 return true; 1020 } 1021 1022 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1023 SDValue &Offset, SDValue &Label) { 1024 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1025 Offset = N.getOperand(0); 1026 SDValue N1 = N.getOperand(1); 1027 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1028 SDLoc(N), MVT::i32); 1029 return true; 1030 } 1031 1032 return false; 1033 } 1034 1035 1036 //===----------------------------------------------------------------------===// 1037 // Thumb Addressing Modes 1038 //===----------------------------------------------------------------------===// 1039 1040 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1041 // Negative numbers are difficult to materialise in thumb1. If we are 1042 // selecting the add of a negative, instead try to select ri with a zero 1043 // offset, so create the add node directly which will become a sub. 1044 if (N.getOpcode() != ISD::ADD) 1045 return false; 1046 1047 // Look for an imm which is not legal for ld/st, but is legal for sub. 1048 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1049 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1050 1051 return false; 1052 } 1053 1054 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1055 SDValue &Offset) { 1056 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1057 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1058 if (!NC || !NC->isNullValue()) 1059 return false; 1060 1061 Base = Offset = N; 1062 return true; 1063 } 1064 1065 Base = N.getOperand(0); 1066 Offset = N.getOperand(1); 1067 return true; 1068 } 1069 1070 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1071 SDValue &Offset) { 1072 if (shouldUseZeroOffsetLdSt(N)) 1073 return false; // Select ri instead 1074 return SelectThumbAddrModeRRSext(N, Base, Offset); 1075 } 1076 1077 bool 1078 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1079 SDValue &Base, SDValue &OffImm) { 1080 if (shouldUseZeroOffsetLdSt(N)) { 1081 Base = N; 1082 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1083 return true; 1084 } 1085 1086 if (!CurDAG->isBaseWithConstantOffset(N)) { 1087 if (N.getOpcode() == ISD::ADD) { 1088 return false; // We want to select register offset instead 1089 } else if (N.getOpcode() == ARMISD::Wrapper && 1090 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1091 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1092 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1093 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1094 Base = N.getOperand(0); 1095 } else { 1096 Base = N; 1097 } 1098 1099 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1100 return true; 1101 } 1102 1103 // If the RHS is + imm5 * scale, fold into addr mode. 1104 int RHSC; 1105 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1106 Base = N.getOperand(0); 1107 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1108 return true; 1109 } 1110 1111 // Offset is too large, so use register offset instead. 1112 return false; 1113 } 1114 1115 bool 1116 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1117 SDValue &OffImm) { 1118 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1119 } 1120 1121 bool 1122 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1123 SDValue &OffImm) { 1124 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1125 } 1126 1127 bool 1128 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1129 SDValue &OffImm) { 1130 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1131 } 1132 1133 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1134 SDValue &Base, SDValue &OffImm) { 1135 if (N.getOpcode() == ISD::FrameIndex) { 1136 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1137 // Only multiples of 4 are allowed for the offset, so the frame object 1138 // alignment must be at least 4. 1139 MachineFrameInfo &MFI = MF->getFrameInfo(); 1140 if (MFI.getObjectAlignment(FI) < 4) 1141 MFI.setObjectAlignment(FI, 4); 1142 Base = CurDAG->getTargetFrameIndex( 1143 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1144 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1145 return true; 1146 } 1147 1148 if (!CurDAG->isBaseWithConstantOffset(N)) 1149 return false; 1150 1151 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1152 // If the RHS is + imm8 * scale, fold into addr mode. 1153 int RHSC; 1154 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1155 Base = N.getOperand(0); 1156 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1157 // Make sure the offset is inside the object, or we might fail to 1158 // allocate an emergency spill slot. (An out-of-range access is UB, but 1159 // it could show up anyway.) 1160 MachineFrameInfo &MFI = MF->getFrameInfo(); 1161 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1162 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1163 // indexed by the LHS must be 4-byte aligned. 1164 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4) 1165 MFI.setObjectAlignment(FI, 4); 1166 if (MFI.getObjectAlignment(FI) >= 4) { 1167 Base = CurDAG->getTargetFrameIndex( 1168 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1169 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1170 return true; 1171 } 1172 } 1173 } 1174 } 1175 1176 return false; 1177 } 1178 1179 1180 //===----------------------------------------------------------------------===// 1181 // Thumb 2 Addressing Modes 1182 //===----------------------------------------------------------------------===// 1183 1184 1185 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1186 SDValue &Base, SDValue &OffImm) { 1187 // Match simple R + imm12 operands. 1188 1189 // Base only. 1190 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1191 !CurDAG->isBaseWithConstantOffset(N)) { 1192 if (N.getOpcode() == ISD::FrameIndex) { 1193 // Match frame index. 1194 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1195 Base = CurDAG->getTargetFrameIndex( 1196 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1197 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1198 return true; 1199 } 1200 1201 if (N.getOpcode() == ARMISD::Wrapper && 1202 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1203 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1204 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1205 Base = N.getOperand(0); 1206 if (Base.getOpcode() == ISD::TargetConstantPool) 1207 return false; // We want to select t2LDRpci instead. 1208 } else 1209 Base = N; 1210 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1211 return true; 1212 } 1213 1214 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1215 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1216 // Let t2LDRi8 handle (R - imm8). 1217 return false; 1218 1219 int RHSC = (int)RHS->getZExtValue(); 1220 if (N.getOpcode() == ISD::SUB) 1221 RHSC = -RHSC; 1222 1223 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1224 Base = N.getOperand(0); 1225 if (Base.getOpcode() == ISD::FrameIndex) { 1226 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1227 Base = CurDAG->getTargetFrameIndex( 1228 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1229 } 1230 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1231 return true; 1232 } 1233 } 1234 1235 // Base only. 1236 Base = N; 1237 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1238 return true; 1239 } 1240 1241 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1242 SDValue &Base, SDValue &OffImm) { 1243 // Match simple R - imm8 operands. 1244 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1245 !CurDAG->isBaseWithConstantOffset(N)) 1246 return false; 1247 1248 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1249 int RHSC = (int)RHS->getSExtValue(); 1250 if (N.getOpcode() == ISD::SUB) 1251 RHSC = -RHSC; 1252 1253 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1254 Base = N.getOperand(0); 1255 if (Base.getOpcode() == ISD::FrameIndex) { 1256 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1257 Base = CurDAG->getTargetFrameIndex( 1258 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1259 } 1260 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1261 return true; 1262 } 1263 } 1264 1265 return false; 1266 } 1267 1268 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1269 SDValue &OffImm){ 1270 unsigned Opcode = Op->getOpcode(); 1271 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1272 ? cast<LoadSDNode>(Op)->getAddressingMode() 1273 : cast<StoreSDNode>(Op)->getAddressingMode(); 1274 int RHSC; 1275 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1276 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1277 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1278 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1279 return true; 1280 } 1281 1282 return false; 1283 } 1284 1285 template<unsigned Shift> 1286 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, 1287 SDValue &Base, SDValue &OffImm) { 1288 if (N.getOpcode() == ISD::SUB || 1289 CurDAG->isBaseWithConstantOffset(N)) { 1290 if (auto RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1291 int RHSC = (int)RHS->getZExtValue(); 1292 if (N.getOpcode() == ISD::SUB) 1293 RHSC = -RHSC; 1294 1295 if (isShiftedInt<7, Shift>(RHSC)) { 1296 Base = N.getOperand(0); 1297 if (Base.getOpcode() == ISD::FrameIndex) { 1298 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1299 Base = CurDAG->getTargetFrameIndex( 1300 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1301 } 1302 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1303 return true; 1304 } 1305 } 1306 } 1307 1308 // Base only. 1309 Base = N; 1310 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1311 return true; 1312 } 1313 1314 template <unsigned Shift> 1315 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1316 SDValue &OffImm) { 1317 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1318 } 1319 1320 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1321 SDValue &OffImm, 1322 unsigned Shift) { 1323 unsigned Opcode = Op->getOpcode(); 1324 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1325 ? cast<LoadSDNode>(Op)->getAddressingMode() 1326 : cast<StoreSDNode>(Op)->getAddressingMode(); 1327 int RHSC; 1328 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits. 1329 OffImm = 1330 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1331 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1332 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1333 MVT::i32); 1334 return true; 1335 } 1336 return false; 1337 } 1338 1339 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1340 SDValue &Base, 1341 SDValue &OffReg, SDValue &ShImm) { 1342 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1343 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1344 return false; 1345 1346 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1347 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1348 int RHSC = (int)RHS->getZExtValue(); 1349 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1350 return false; 1351 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1352 return false; 1353 } 1354 1355 // Look for (R + R) or (R + (R << [1,2,3])). 1356 unsigned ShAmt = 0; 1357 Base = N.getOperand(0); 1358 OffReg = N.getOperand(1); 1359 1360 // Swap if it is ((R << c) + R). 1361 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1362 if (ShOpcVal != ARM_AM::lsl) { 1363 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1364 if (ShOpcVal == ARM_AM::lsl) 1365 std::swap(Base, OffReg); 1366 } 1367 1368 if (ShOpcVal == ARM_AM::lsl) { 1369 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1370 // it. 1371 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1372 ShAmt = Sh->getZExtValue(); 1373 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1374 OffReg = OffReg.getOperand(0); 1375 else { 1376 ShAmt = 0; 1377 } 1378 } 1379 } 1380 1381 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1382 // and use it in a shifted operand do so. 1383 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1384 unsigned PowerOfTwo = 0; 1385 SDValue NewMulConst; 1386 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1387 HandleSDNode Handle(OffReg); 1388 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1389 OffReg = Handle.getValue(); 1390 ShAmt = PowerOfTwo; 1391 } 1392 } 1393 1394 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1395 1396 return true; 1397 } 1398 1399 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1400 SDValue &OffImm) { 1401 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1402 // instructions. 1403 Base = N; 1404 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1405 1406 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1407 return true; 1408 1409 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1410 if (!RHS) 1411 return true; 1412 1413 uint32_t RHSC = (int)RHS->getZExtValue(); 1414 if (RHSC > 1020 || RHSC % 4 != 0) 1415 return true; 1416 1417 Base = N.getOperand(0); 1418 if (Base.getOpcode() == ISD::FrameIndex) { 1419 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1420 Base = CurDAG->getTargetFrameIndex( 1421 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1422 } 1423 1424 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1425 return true; 1426 } 1427 1428 //===--------------------------------------------------------------------===// 1429 1430 /// getAL - Returns a ARMCC::AL immediate node. 1431 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1432 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1433 } 1434 1435 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1436 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1437 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1438 } 1439 1440 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1441 LoadSDNode *LD = cast<LoadSDNode>(N); 1442 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1443 if (AM == ISD::UNINDEXED) 1444 return false; 1445 1446 EVT LoadedVT = LD->getMemoryVT(); 1447 SDValue Offset, AMOpc; 1448 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1449 unsigned Opcode = 0; 1450 bool Match = false; 1451 if (LoadedVT == MVT::i32 && isPre && 1452 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1453 Opcode = ARM::LDR_PRE_IMM; 1454 Match = true; 1455 } else if (LoadedVT == MVT::i32 && !isPre && 1456 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1457 Opcode = ARM::LDR_POST_IMM; 1458 Match = true; 1459 } else if (LoadedVT == MVT::i32 && 1460 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1461 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1462 Match = true; 1463 1464 } else if (LoadedVT == MVT::i16 && 1465 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1466 Match = true; 1467 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1468 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1469 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1470 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1471 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1472 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1473 Match = true; 1474 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1475 } 1476 } else { 1477 if (isPre && 1478 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1479 Match = true; 1480 Opcode = ARM::LDRB_PRE_IMM; 1481 } else if (!isPre && 1482 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1483 Match = true; 1484 Opcode = ARM::LDRB_POST_IMM; 1485 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1486 Match = true; 1487 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1488 } 1489 } 1490 } 1491 1492 if (Match) { 1493 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1494 SDValue Chain = LD->getChain(); 1495 SDValue Base = LD->getBasePtr(); 1496 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1497 CurDAG->getRegister(0, MVT::i32), Chain }; 1498 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1499 MVT::Other, Ops); 1500 transferMemOperands(N, New); 1501 ReplaceNode(N, New); 1502 return true; 1503 } else { 1504 SDValue Chain = LD->getChain(); 1505 SDValue Base = LD->getBasePtr(); 1506 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1507 CurDAG->getRegister(0, MVT::i32), Chain }; 1508 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1509 MVT::Other, Ops); 1510 transferMemOperands(N, New); 1511 ReplaceNode(N, New); 1512 return true; 1513 } 1514 } 1515 1516 return false; 1517 } 1518 1519 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1520 LoadSDNode *LD = cast<LoadSDNode>(N); 1521 EVT LoadedVT = LD->getMemoryVT(); 1522 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1523 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1524 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1525 return false; 1526 1527 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1528 if (!COffs || COffs->getZExtValue() != 4) 1529 return false; 1530 1531 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1532 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1533 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1534 // ISel. 1535 SDValue Chain = LD->getChain(); 1536 SDValue Base = LD->getBasePtr(); 1537 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1538 CurDAG->getRegister(0, MVT::i32), Chain }; 1539 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1540 MVT::i32, MVT::Other, Ops); 1541 transferMemOperands(N, New); 1542 ReplaceNode(N, New); 1543 return true; 1544 } 1545 1546 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1547 LoadSDNode *LD = cast<LoadSDNode>(N); 1548 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1549 if (AM == ISD::UNINDEXED) 1550 return false; 1551 1552 EVT LoadedVT = LD->getMemoryVT(); 1553 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1554 SDValue Offset; 1555 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1556 unsigned Opcode = 0; 1557 bool Match = false; 1558 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1559 switch (LoadedVT.getSimpleVT().SimpleTy) { 1560 case MVT::i32: 1561 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1562 break; 1563 case MVT::i16: 1564 if (isSExtLd) 1565 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1566 else 1567 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1568 break; 1569 case MVT::i8: 1570 case MVT::i1: 1571 if (isSExtLd) 1572 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1573 else 1574 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1575 break; 1576 default: 1577 return false; 1578 } 1579 Match = true; 1580 } 1581 1582 if (Match) { 1583 SDValue Chain = LD->getChain(); 1584 SDValue Base = LD->getBasePtr(); 1585 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1586 CurDAG->getRegister(0, MVT::i32), Chain }; 1587 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1588 MVT::Other, Ops); 1589 transferMemOperands(N, New); 1590 ReplaceNode(N, New); 1591 return true; 1592 } 1593 1594 return false; 1595 } 1596 1597 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1598 LoadSDNode *LD = cast<LoadSDNode>(N); 1599 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1600 if (AM == ISD::UNINDEXED) 1601 return false; 1602 EVT LoadedVT = LD->getMemoryVT(); 1603 if (!LoadedVT.isVector()) 1604 return false; 1605 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1606 SDValue Offset; 1607 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1608 unsigned Opcode = 0; 1609 unsigned Align = LD->getAlignment(); 1610 bool IsLE = Subtarget->isLittle(); 1611 1612 if (Align >= 2 && LoadedVT == MVT::v4i16 && 1613 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) { 1614 if (isSExtLd) 1615 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1616 else 1617 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1618 } else if (LoadedVT == MVT::v8i8 && 1619 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { 1620 if (isSExtLd) 1621 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1622 else 1623 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1624 } else if (LoadedVT == MVT::v4i8 && 1625 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { 1626 if (isSExtLd) 1627 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1628 else 1629 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1630 } else if (Align >= 4 && 1631 (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) && 1632 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2)) 1633 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1634 else if (Align >= 2 && 1635 (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) && 1636 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) 1637 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1638 else if ((IsLE || LoadedVT == MVT::v16i8) && 1639 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) 1640 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1641 else 1642 return false; 1643 1644 SDValue Chain = LD->getChain(); 1645 SDValue Base = LD->getBasePtr(); 1646 SDValue Ops[] = {Base, Offset, 1647 CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32), 1648 CurDAG->getRegister(0, MVT::i32), Chain}; 1649 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0), 1650 MVT::i32, MVT::Other, Ops); 1651 transferMemOperands(N, New); 1652 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1653 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1654 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1655 CurDAG->RemoveDeadNode(N); 1656 return true; 1657 } 1658 1659 /// Form a GPRPair pseudo register from a pair of GPR regs. 1660 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1661 SDLoc dl(V0.getNode()); 1662 SDValue RegClass = 1663 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1664 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1665 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1666 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1667 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1668 } 1669 1670 /// Form a D register from a pair of S registers. 1671 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1672 SDLoc dl(V0.getNode()); 1673 SDValue RegClass = 1674 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1675 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1676 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1677 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1678 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1679 } 1680 1681 /// Form a quad register from a pair of D registers. 1682 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1683 SDLoc dl(V0.getNode()); 1684 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1685 MVT::i32); 1686 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1687 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1688 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1689 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1690 } 1691 1692 /// Form 4 consecutive D registers from a pair of Q registers. 1693 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1694 SDLoc dl(V0.getNode()); 1695 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1696 MVT::i32); 1697 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1698 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1699 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1700 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1701 } 1702 1703 /// Form 4 consecutive S registers. 1704 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1705 SDValue V2, SDValue V3) { 1706 SDLoc dl(V0.getNode()); 1707 SDValue RegClass = 1708 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1709 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1710 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1711 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1712 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1713 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1714 V2, SubReg2, V3, SubReg3 }; 1715 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1716 } 1717 1718 /// Form 4 consecutive D registers. 1719 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1720 SDValue V2, SDValue V3) { 1721 SDLoc dl(V0.getNode()); 1722 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1723 MVT::i32); 1724 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1725 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1726 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1727 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1728 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1729 V2, SubReg2, V3, SubReg3 }; 1730 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1731 } 1732 1733 /// Form 4 consecutive Q registers. 1734 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1735 SDValue V2, SDValue V3) { 1736 SDLoc dl(V0.getNode()); 1737 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1738 MVT::i32); 1739 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1740 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1741 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1742 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1743 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1744 V2, SubReg2, V3, SubReg3 }; 1745 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1746 } 1747 1748 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1749 /// of a NEON VLD or VST instruction. The supported values depend on the 1750 /// number of registers being loaded. 1751 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1752 unsigned NumVecs, bool is64BitVector) { 1753 unsigned NumRegs = NumVecs; 1754 if (!is64BitVector && NumVecs < 3) 1755 NumRegs *= 2; 1756 1757 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1758 if (Alignment >= 32 && NumRegs == 4) 1759 Alignment = 32; 1760 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1761 Alignment = 16; 1762 else if (Alignment >= 8) 1763 Alignment = 8; 1764 else 1765 Alignment = 0; 1766 1767 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1768 } 1769 1770 static bool isVLDfixed(unsigned Opc) 1771 { 1772 switch (Opc) { 1773 default: return false; 1774 case ARM::VLD1d8wb_fixed : return true; 1775 case ARM::VLD1d16wb_fixed : return true; 1776 case ARM::VLD1d64Qwb_fixed : return true; 1777 case ARM::VLD1d32wb_fixed : return true; 1778 case ARM::VLD1d64wb_fixed : return true; 1779 case ARM::VLD1d64TPseudoWB_fixed : return true; 1780 case ARM::VLD1d64QPseudoWB_fixed : return true; 1781 case ARM::VLD1q8wb_fixed : return true; 1782 case ARM::VLD1q16wb_fixed : return true; 1783 case ARM::VLD1q32wb_fixed : return true; 1784 case ARM::VLD1q64wb_fixed : return true; 1785 case ARM::VLD1DUPd8wb_fixed : return true; 1786 case ARM::VLD1DUPd16wb_fixed : return true; 1787 case ARM::VLD1DUPd32wb_fixed : return true; 1788 case ARM::VLD1DUPq8wb_fixed : return true; 1789 case ARM::VLD1DUPq16wb_fixed : return true; 1790 case ARM::VLD1DUPq32wb_fixed : return true; 1791 case ARM::VLD2d8wb_fixed : return true; 1792 case ARM::VLD2d16wb_fixed : return true; 1793 case ARM::VLD2d32wb_fixed : return true; 1794 case ARM::VLD2q8PseudoWB_fixed : return true; 1795 case ARM::VLD2q16PseudoWB_fixed : return true; 1796 case ARM::VLD2q32PseudoWB_fixed : return true; 1797 case ARM::VLD2DUPd8wb_fixed : return true; 1798 case ARM::VLD2DUPd16wb_fixed : return true; 1799 case ARM::VLD2DUPd32wb_fixed : return true; 1800 } 1801 } 1802 1803 static bool isVSTfixed(unsigned Opc) 1804 { 1805 switch (Opc) { 1806 default: return false; 1807 case ARM::VST1d8wb_fixed : return true; 1808 case ARM::VST1d16wb_fixed : return true; 1809 case ARM::VST1d32wb_fixed : return true; 1810 case ARM::VST1d64wb_fixed : return true; 1811 case ARM::VST1q8wb_fixed : return true; 1812 case ARM::VST1q16wb_fixed : return true; 1813 case ARM::VST1q32wb_fixed : return true; 1814 case ARM::VST1q64wb_fixed : return true; 1815 case ARM::VST1d64TPseudoWB_fixed : return true; 1816 case ARM::VST1d64QPseudoWB_fixed : return true; 1817 case ARM::VST2d8wb_fixed : return true; 1818 case ARM::VST2d16wb_fixed : return true; 1819 case ARM::VST2d32wb_fixed : return true; 1820 case ARM::VST2q8PseudoWB_fixed : return true; 1821 case ARM::VST2q16PseudoWB_fixed : return true; 1822 case ARM::VST2q32PseudoWB_fixed : return true; 1823 } 1824 } 1825 1826 // Get the register stride update opcode of a VLD/VST instruction that 1827 // is otherwise equivalent to the given fixed stride updating instruction. 1828 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1829 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 1830 && "Incorrect fixed stride updating instruction."); 1831 switch (Opc) { 1832 default: break; 1833 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1834 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 1835 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 1836 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 1837 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 1838 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 1839 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 1840 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 1841 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 1842 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 1843 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 1844 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 1845 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 1846 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 1847 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 1848 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 1849 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 1850 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 1851 1852 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 1853 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 1854 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 1855 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 1856 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 1857 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 1858 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 1859 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 1860 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 1861 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 1862 1863 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 1864 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 1865 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 1866 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 1867 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 1868 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 1869 1870 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 1871 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 1872 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 1873 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 1874 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 1875 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 1876 1877 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 1878 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 1879 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 1880 } 1881 return Opc; // If not one we handle, return it unchanged. 1882 } 1883 1884 /// Returns true if the given increment is a Constant known to be equal to the 1885 /// access size performed by a NEON load/store. This means the "[rN]!" form can 1886 /// be used. 1887 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 1888 auto C = dyn_cast<ConstantSDNode>(Inc); 1889 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 1890 } 1891 1892 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 1893 const uint16_t *DOpcodes, 1894 const uint16_t *QOpcodes0, 1895 const uint16_t *QOpcodes1) { 1896 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 1897 SDLoc dl(N); 1898 1899 SDValue MemAddr, Align; 1900 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 1901 // nodes are not intrinsics. 1902 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 1903 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1904 return; 1905 1906 SDValue Chain = N->getOperand(0); 1907 EVT VT = N->getValueType(0); 1908 bool is64BitVector = VT.is64BitVector(); 1909 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 1910 1911 unsigned OpcodeIndex; 1912 switch (VT.getSimpleVT().SimpleTy) { 1913 default: llvm_unreachable("unhandled vld type"); 1914 // Double-register operations: 1915 case MVT::v8i8: OpcodeIndex = 0; break; 1916 case MVT::v4f16: 1917 case MVT::v4i16: OpcodeIndex = 1; break; 1918 case MVT::v2f32: 1919 case MVT::v2i32: OpcodeIndex = 2; break; 1920 case MVT::v1i64: OpcodeIndex = 3; break; 1921 // Quad-register operations: 1922 case MVT::v16i8: OpcodeIndex = 0; break; 1923 case MVT::v8f16: 1924 case MVT::v8i16: OpcodeIndex = 1; break; 1925 case MVT::v4f32: 1926 case MVT::v4i32: OpcodeIndex = 2; break; 1927 case MVT::v2f64: 1928 case MVT::v2i64: OpcodeIndex = 3; break; 1929 } 1930 1931 EVT ResTy; 1932 if (NumVecs == 1) 1933 ResTy = VT; 1934 else { 1935 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 1936 if (!is64BitVector) 1937 ResTyElts *= 2; 1938 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 1939 } 1940 std::vector<EVT> ResTys; 1941 ResTys.push_back(ResTy); 1942 if (isUpdating) 1943 ResTys.push_back(MVT::i32); 1944 ResTys.push_back(MVT::Other); 1945 1946 SDValue Pred = getAL(CurDAG, dl); 1947 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1948 SDNode *VLd; 1949 SmallVector<SDValue, 7> Ops; 1950 1951 // Double registers and VLD1/VLD2 quad registers are directly supported. 1952 if (is64BitVector || NumVecs <= 2) { 1953 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 1954 QOpcodes0[OpcodeIndex]); 1955 Ops.push_back(MemAddr); 1956 Ops.push_back(Align); 1957 if (isUpdating) { 1958 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1959 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 1960 if (!IsImmUpdate) { 1961 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 1962 // check for the opcode rather than the number of vector elements. 1963 if (isVLDfixed(Opc)) 1964 Opc = getVLDSTRegisterUpdateOpcode(Opc); 1965 Ops.push_back(Inc); 1966 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 1967 // the operands if not such an opcode. 1968 } else if (!isVLDfixed(Opc)) 1969 Ops.push_back(Reg0); 1970 } 1971 Ops.push_back(Pred); 1972 Ops.push_back(Reg0); 1973 Ops.push_back(Chain); 1974 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1975 1976 } else { 1977 // Otherwise, quad registers are loaded with two separate instructions, 1978 // where one loads the even registers and the other loads the odd registers. 1979 EVT AddrTy = MemAddr.getValueType(); 1980 1981 // Load the even subregs. This is always an updating load, so that it 1982 // provides the address to the second load for the odd subregs. 1983 SDValue ImplDef = 1984 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 1985 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 1986 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 1987 ResTy, AddrTy, MVT::Other, OpsA); 1988 Chain = SDValue(VLdA, 2); 1989 1990 // Load the odd subregs. 1991 Ops.push_back(SDValue(VLdA, 1)); 1992 Ops.push_back(Align); 1993 if (isUpdating) { 1994 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1995 assert(isa<ConstantSDNode>(Inc.getNode()) && 1996 "only constant post-increment update allowed for VLD3/4"); 1997 (void)Inc; 1998 Ops.push_back(Reg0); 1999 } 2000 Ops.push_back(SDValue(VLdA, 0)); 2001 Ops.push_back(Pred); 2002 Ops.push_back(Reg0); 2003 Ops.push_back(Chain); 2004 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2005 } 2006 2007 // Transfer memoperands. 2008 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2009 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2010 2011 if (NumVecs == 1) { 2012 ReplaceNode(N, VLd); 2013 return; 2014 } 2015 2016 // Extract out the subregisters. 2017 SDValue SuperReg = SDValue(VLd, 0); 2018 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2019 ARM::qsub_3 == ARM::qsub_0 + 3, 2020 "Unexpected subreg numbering"); 2021 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2022 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2023 ReplaceUses(SDValue(N, Vec), 2024 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2025 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2026 if (isUpdating) 2027 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2028 CurDAG->RemoveDeadNode(N); 2029 } 2030 2031 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2032 const uint16_t *DOpcodes, 2033 const uint16_t *QOpcodes0, 2034 const uint16_t *QOpcodes1) { 2035 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2036 SDLoc dl(N); 2037 2038 SDValue MemAddr, Align; 2039 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2040 // nodes are not intrinsics. 2041 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2042 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2043 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2044 return; 2045 2046 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2047 2048 SDValue Chain = N->getOperand(0); 2049 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2050 bool is64BitVector = VT.is64BitVector(); 2051 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2052 2053 unsigned OpcodeIndex; 2054 switch (VT.getSimpleVT().SimpleTy) { 2055 default: llvm_unreachable("unhandled vst type"); 2056 // Double-register operations: 2057 case MVT::v8i8: OpcodeIndex = 0; break; 2058 case MVT::v4f16: 2059 case MVT::v4i16: OpcodeIndex = 1; break; 2060 case MVT::v2f32: 2061 case MVT::v2i32: OpcodeIndex = 2; break; 2062 case MVT::v1i64: OpcodeIndex = 3; break; 2063 // Quad-register operations: 2064 case MVT::v16i8: OpcodeIndex = 0; break; 2065 case MVT::v8f16: 2066 case MVT::v8i16: OpcodeIndex = 1; break; 2067 case MVT::v4f32: 2068 case MVT::v4i32: OpcodeIndex = 2; break; 2069 case MVT::v2f64: 2070 case MVT::v2i64: OpcodeIndex = 3; break; 2071 } 2072 2073 std::vector<EVT> ResTys; 2074 if (isUpdating) 2075 ResTys.push_back(MVT::i32); 2076 ResTys.push_back(MVT::Other); 2077 2078 SDValue Pred = getAL(CurDAG, dl); 2079 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2080 SmallVector<SDValue, 7> Ops; 2081 2082 // Double registers and VST1/VST2 quad registers are directly supported. 2083 if (is64BitVector || NumVecs <= 2) { 2084 SDValue SrcReg; 2085 if (NumVecs == 1) { 2086 SrcReg = N->getOperand(Vec0Idx); 2087 } else if (is64BitVector) { 2088 // Form a REG_SEQUENCE to force register allocation. 2089 SDValue V0 = N->getOperand(Vec0Idx + 0); 2090 SDValue V1 = N->getOperand(Vec0Idx + 1); 2091 if (NumVecs == 2) 2092 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2093 else { 2094 SDValue V2 = N->getOperand(Vec0Idx + 2); 2095 // If it's a vst3, form a quad D-register and leave the last part as 2096 // an undef. 2097 SDValue V3 = (NumVecs == 3) 2098 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2099 : N->getOperand(Vec0Idx + 3); 2100 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2101 } 2102 } else { 2103 // Form a QQ register. 2104 SDValue Q0 = N->getOperand(Vec0Idx); 2105 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2106 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2107 } 2108 2109 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2110 QOpcodes0[OpcodeIndex]); 2111 Ops.push_back(MemAddr); 2112 Ops.push_back(Align); 2113 if (isUpdating) { 2114 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2115 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2116 if (!IsImmUpdate) { 2117 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2118 // check for the opcode rather than the number of vector elements. 2119 if (isVSTfixed(Opc)) 2120 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2121 Ops.push_back(Inc); 2122 } 2123 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2124 // the operands if not such an opcode. 2125 else if (!isVSTfixed(Opc)) 2126 Ops.push_back(Reg0); 2127 } 2128 Ops.push_back(SrcReg); 2129 Ops.push_back(Pred); 2130 Ops.push_back(Reg0); 2131 Ops.push_back(Chain); 2132 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2133 2134 // Transfer memoperands. 2135 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2136 2137 ReplaceNode(N, VSt); 2138 return; 2139 } 2140 2141 // Otherwise, quad registers are stored with two separate instructions, 2142 // where one stores the even registers and the other stores the odd registers. 2143 2144 // Form the QQQQ REG_SEQUENCE. 2145 SDValue V0 = N->getOperand(Vec0Idx + 0); 2146 SDValue V1 = N->getOperand(Vec0Idx + 1); 2147 SDValue V2 = N->getOperand(Vec0Idx + 2); 2148 SDValue V3 = (NumVecs == 3) 2149 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2150 : N->getOperand(Vec0Idx + 3); 2151 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2152 2153 // Store the even D registers. This is always an updating store, so that it 2154 // provides the address to the second store for the odd subregs. 2155 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2156 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2157 MemAddr.getValueType(), 2158 MVT::Other, OpsA); 2159 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2160 Chain = SDValue(VStA, 1); 2161 2162 // Store the odd D registers. 2163 Ops.push_back(SDValue(VStA, 0)); 2164 Ops.push_back(Align); 2165 if (isUpdating) { 2166 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2167 assert(isa<ConstantSDNode>(Inc.getNode()) && 2168 "only constant post-increment update allowed for VST3/4"); 2169 (void)Inc; 2170 Ops.push_back(Reg0); 2171 } 2172 Ops.push_back(RegSeq); 2173 Ops.push_back(Pred); 2174 Ops.push_back(Reg0); 2175 Ops.push_back(Chain); 2176 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2177 Ops); 2178 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2179 ReplaceNode(N, VStB); 2180 } 2181 2182 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2183 unsigned NumVecs, 2184 const uint16_t *DOpcodes, 2185 const uint16_t *QOpcodes) { 2186 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2187 SDLoc dl(N); 2188 2189 SDValue MemAddr, Align; 2190 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2191 // nodes are not intrinsics. 2192 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2193 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2194 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2195 return; 2196 2197 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2198 2199 SDValue Chain = N->getOperand(0); 2200 unsigned Lane = 2201 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2202 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2203 bool is64BitVector = VT.is64BitVector(); 2204 2205 unsigned Alignment = 0; 2206 if (NumVecs != 3) { 2207 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2208 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2209 if (Alignment > NumBytes) 2210 Alignment = NumBytes; 2211 if (Alignment < 8 && Alignment < NumBytes) 2212 Alignment = 0; 2213 // Alignment must be a power of two; make sure of that. 2214 Alignment = (Alignment & -Alignment); 2215 if (Alignment == 1) 2216 Alignment = 0; 2217 } 2218 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2219 2220 unsigned OpcodeIndex; 2221 switch (VT.getSimpleVT().SimpleTy) { 2222 default: llvm_unreachable("unhandled vld/vst lane type"); 2223 // Double-register operations: 2224 case MVT::v8i8: OpcodeIndex = 0; break; 2225 case MVT::v4f16: 2226 case MVT::v4i16: OpcodeIndex = 1; break; 2227 case MVT::v2f32: 2228 case MVT::v2i32: OpcodeIndex = 2; break; 2229 // Quad-register operations: 2230 case MVT::v8f16: 2231 case MVT::v8i16: OpcodeIndex = 0; break; 2232 case MVT::v4f32: 2233 case MVT::v4i32: OpcodeIndex = 1; break; 2234 } 2235 2236 std::vector<EVT> ResTys; 2237 if (IsLoad) { 2238 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2239 if (!is64BitVector) 2240 ResTyElts *= 2; 2241 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2242 MVT::i64, ResTyElts)); 2243 } 2244 if (isUpdating) 2245 ResTys.push_back(MVT::i32); 2246 ResTys.push_back(MVT::Other); 2247 2248 SDValue Pred = getAL(CurDAG, dl); 2249 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2250 2251 SmallVector<SDValue, 8> Ops; 2252 Ops.push_back(MemAddr); 2253 Ops.push_back(Align); 2254 if (isUpdating) { 2255 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2256 bool IsImmUpdate = 2257 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2258 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2259 } 2260 2261 SDValue SuperReg; 2262 SDValue V0 = N->getOperand(Vec0Idx + 0); 2263 SDValue V1 = N->getOperand(Vec0Idx + 1); 2264 if (NumVecs == 2) { 2265 if (is64BitVector) 2266 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2267 else 2268 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2269 } else { 2270 SDValue V2 = N->getOperand(Vec0Idx + 2); 2271 SDValue V3 = (NumVecs == 3) 2272 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2273 : N->getOperand(Vec0Idx + 3); 2274 if (is64BitVector) 2275 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2276 else 2277 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2278 } 2279 Ops.push_back(SuperReg); 2280 Ops.push_back(getI32Imm(Lane, dl)); 2281 Ops.push_back(Pred); 2282 Ops.push_back(Reg0); 2283 Ops.push_back(Chain); 2284 2285 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2286 QOpcodes[OpcodeIndex]); 2287 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2288 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2289 if (!IsLoad) { 2290 ReplaceNode(N, VLdLn); 2291 return; 2292 } 2293 2294 // Extract the subregisters. 2295 SuperReg = SDValue(VLdLn, 0); 2296 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2297 ARM::qsub_3 == ARM::qsub_0 + 3, 2298 "Unexpected subreg numbering"); 2299 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2300 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2301 ReplaceUses(SDValue(N, Vec), 2302 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2303 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2304 if (isUpdating) 2305 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2306 CurDAG->RemoveDeadNode(N); 2307 } 2308 2309 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2310 bool isUpdating, unsigned NumVecs, 2311 const uint16_t *DOpcodes, 2312 const uint16_t *QOpcodes0, 2313 const uint16_t *QOpcodes1) { 2314 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2315 SDLoc dl(N); 2316 2317 SDValue MemAddr, Align; 2318 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2319 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2320 return; 2321 2322 SDValue Chain = N->getOperand(0); 2323 EVT VT = N->getValueType(0); 2324 bool is64BitVector = VT.is64BitVector(); 2325 2326 unsigned Alignment = 0; 2327 if (NumVecs != 3) { 2328 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2329 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2330 if (Alignment > NumBytes) 2331 Alignment = NumBytes; 2332 if (Alignment < 8 && Alignment < NumBytes) 2333 Alignment = 0; 2334 // Alignment must be a power of two; make sure of that. 2335 Alignment = (Alignment & -Alignment); 2336 if (Alignment == 1) 2337 Alignment = 0; 2338 } 2339 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2340 2341 unsigned OpcodeIndex; 2342 switch (VT.getSimpleVT().SimpleTy) { 2343 default: llvm_unreachable("unhandled vld-dup type"); 2344 case MVT::v8i8: 2345 case MVT::v16i8: OpcodeIndex = 0; break; 2346 case MVT::v4i16: 2347 case MVT::v8i16: 2348 case MVT::v4f16: 2349 case MVT::v8f16: 2350 OpcodeIndex = 1; break; 2351 case MVT::v2f32: 2352 case MVT::v2i32: 2353 case MVT::v4f32: 2354 case MVT::v4i32: OpcodeIndex = 2; break; 2355 case MVT::v1f64: 2356 case MVT::v1i64: OpcodeIndex = 3; break; 2357 } 2358 2359 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2360 if (!is64BitVector) 2361 ResTyElts *= 2; 2362 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2363 2364 std::vector<EVT> ResTys; 2365 ResTys.push_back(ResTy); 2366 if (isUpdating) 2367 ResTys.push_back(MVT::i32); 2368 ResTys.push_back(MVT::Other); 2369 2370 SDValue Pred = getAL(CurDAG, dl); 2371 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2372 2373 SDNode *VLdDup; 2374 if (is64BitVector || NumVecs == 1) { 2375 SmallVector<SDValue, 6> Ops; 2376 Ops.push_back(MemAddr); 2377 Ops.push_back(Align); 2378 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] : 2379 QOpcodes0[OpcodeIndex]; 2380 if (isUpdating) { 2381 // fixed-stride update instructions don't have an explicit writeback 2382 // operand. It's implicit in the opcode itself. 2383 SDValue Inc = N->getOperand(2); 2384 bool IsImmUpdate = 2385 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2386 if (NumVecs <= 2 && !IsImmUpdate) 2387 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2388 if (!IsImmUpdate) 2389 Ops.push_back(Inc); 2390 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2391 else if (NumVecs > 2) 2392 Ops.push_back(Reg0); 2393 } 2394 Ops.push_back(Pred); 2395 Ops.push_back(Reg0); 2396 Ops.push_back(Chain); 2397 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2398 } else if (NumVecs == 2) { 2399 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain }; 2400 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2401 dl, ResTys, OpsA); 2402 2403 Chain = SDValue(VLdA, 1); 2404 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain }; 2405 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2406 } else { 2407 SDValue ImplDef = 2408 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2409 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain }; 2410 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2411 dl, ResTys, OpsA); 2412 2413 SDValue SuperReg = SDValue(VLdA, 0); 2414 Chain = SDValue(VLdA, 1); 2415 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain }; 2416 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2417 } 2418 2419 // Transfer memoperands. 2420 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2421 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 2422 2423 // Extract the subregisters. 2424 if (NumVecs == 1) { 2425 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 2426 } else { 2427 SDValue SuperReg = SDValue(VLdDup, 0); 2428 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 2429 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2430 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 2431 ReplaceUses(SDValue(N, Vec), 2432 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 2433 } 2434 } 2435 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 2436 if (isUpdating) 2437 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 2438 CurDAG->RemoveDeadNode(N); 2439 } 2440 2441 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 2442 if (!Subtarget->hasV6T2Ops()) 2443 return false; 2444 2445 unsigned Opc = isSigned 2446 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 2447 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 2448 SDLoc dl(N); 2449 2450 // For unsigned extracts, check for a shift right and mask 2451 unsigned And_imm = 0; 2452 if (N->getOpcode() == ISD::AND) { 2453 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 2454 2455 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2456 if (And_imm & (And_imm + 1)) 2457 return false; 2458 2459 unsigned Srl_imm = 0; 2460 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 2461 Srl_imm)) { 2462 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2463 2464 // Mask off the unnecessary bits of the AND immediate; normally 2465 // DAGCombine will do this, but that might not happen if 2466 // targetShrinkDemandedConstant chooses a different immediate. 2467 And_imm &= -1U >> Srl_imm; 2468 2469 // Note: The width operand is encoded as width-1. 2470 unsigned Width = countTrailingOnes(And_imm) - 1; 2471 unsigned LSB = Srl_imm; 2472 2473 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2474 2475 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 2476 // It's cheaper to use a right shift to extract the top bits. 2477 if (Subtarget->isThumb()) { 2478 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 2479 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2480 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2481 getAL(CurDAG, dl), Reg0, Reg0 }; 2482 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2483 return true; 2484 } 2485 2486 // ARM models shift instructions as MOVsi with shifter operand. 2487 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 2488 SDValue ShOpc = 2489 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 2490 MVT::i32); 2491 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 2492 getAL(CurDAG, dl), Reg0, Reg0 }; 2493 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 2494 return true; 2495 } 2496 2497 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2498 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2499 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2500 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2501 getAL(CurDAG, dl), Reg0 }; 2502 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2503 return true; 2504 } 2505 } 2506 return false; 2507 } 2508 2509 // Otherwise, we're looking for a shift of a shift 2510 unsigned Shl_imm = 0; 2511 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 2512 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 2513 unsigned Srl_imm = 0; 2514 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 2515 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2516 // Note: The width operand is encoded as width-1. 2517 unsigned Width = 32 - Srl_imm - 1; 2518 int LSB = Srl_imm - Shl_imm; 2519 if (LSB < 0) 2520 return false; 2521 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2522 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2523 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2524 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2525 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2526 getAL(CurDAG, dl), Reg0 }; 2527 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2528 return true; 2529 } 2530 } 2531 2532 // Or we are looking for a shift of an and, with a mask operand 2533 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 2534 isShiftedMask_32(And_imm)) { 2535 unsigned Srl_imm = 0; 2536 unsigned LSB = countTrailingZeros(And_imm); 2537 // Shift must be the same as the ands lsb 2538 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 2539 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2540 unsigned MSB = 31 - countLeadingZeros(And_imm); 2541 // Note: The width operand is encoded as width-1. 2542 unsigned Width = MSB - LSB; 2543 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2544 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2545 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2546 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 2547 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2548 getAL(CurDAG, dl), Reg0 }; 2549 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2550 return true; 2551 } 2552 } 2553 2554 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 2555 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2556 unsigned LSB = 0; 2557 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 2558 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 2559 return false; 2560 2561 if (LSB + Width > 32) 2562 return false; 2563 2564 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2565 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 2566 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2567 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2568 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 2569 getAL(CurDAG, dl), Reg0 }; 2570 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2571 return true; 2572 } 2573 2574 return false; 2575 } 2576 2577 /// Target-specific DAG combining for ISD::XOR. 2578 /// Target-independent combining lowers SELECT_CC nodes of the form 2579 /// select_cc setg[ge] X, 0, X, -X 2580 /// select_cc setgt X, -1, X, -X 2581 /// select_cc setl[te] X, 0, -X, X 2582 /// select_cc setlt X, 1, -X, X 2583 /// which represent Integer ABS into: 2584 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 2585 /// ARM instruction selection detects the latter and matches it to 2586 /// ARM::ABS or ARM::t2ABS machine node. 2587 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 2588 SDValue XORSrc0 = N->getOperand(0); 2589 SDValue XORSrc1 = N->getOperand(1); 2590 EVT VT = N->getValueType(0); 2591 2592 if (Subtarget->isThumb1Only()) 2593 return false; 2594 2595 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 2596 return false; 2597 2598 SDValue ADDSrc0 = XORSrc0.getOperand(0); 2599 SDValue ADDSrc1 = XORSrc0.getOperand(1); 2600 SDValue SRASrc0 = XORSrc1.getOperand(0); 2601 SDValue SRASrc1 = XORSrc1.getOperand(1); 2602 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 2603 EVT XType = SRASrc0.getValueType(); 2604 unsigned Size = XType.getSizeInBits() - 1; 2605 2606 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 2607 XType.isInteger() && SRAConstant != nullptr && 2608 Size == SRAConstant->getZExtValue()) { 2609 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 2610 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 2611 return true; 2612 } 2613 2614 return false; 2615 } 2616 2617 /// We've got special pseudo-instructions for these 2618 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 2619 unsigned Opcode; 2620 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 2621 if (MemTy == MVT::i8) 2622 Opcode = ARM::CMP_SWAP_8; 2623 else if (MemTy == MVT::i16) 2624 Opcode = ARM::CMP_SWAP_16; 2625 else if (MemTy == MVT::i32) 2626 Opcode = ARM::CMP_SWAP_32; 2627 else 2628 llvm_unreachable("Unknown AtomicCmpSwap type"); 2629 2630 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 2631 N->getOperand(0)}; 2632 SDNode *CmpSwap = CurDAG->getMachineNode( 2633 Opcode, SDLoc(N), 2634 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 2635 2636 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 2637 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 2638 2639 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 2640 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 2641 CurDAG->RemoveDeadNode(N); 2642 } 2643 2644 static Optional<std::pair<unsigned, unsigned>> 2645 getContiguousRangeOfSetBits(const APInt &A) { 2646 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 2647 unsigned LastOne = A.countTrailingZeros(); 2648 if (A.countPopulation() != (FirstOne - LastOne + 1)) 2649 return Optional<std::pair<unsigned,unsigned>>(); 2650 return std::make_pair(FirstOne, LastOne); 2651 } 2652 2653 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 2654 assert(N->getOpcode() == ARMISD::CMPZ); 2655 SwitchEQNEToPLMI = false; 2656 2657 if (!Subtarget->isThumb()) 2658 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 2659 // LSR don't exist as standalone instructions - they need the barrel shifter. 2660 return; 2661 2662 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 2663 SDValue And = N->getOperand(0); 2664 if (!And->hasOneUse()) 2665 return; 2666 2667 SDValue Zero = N->getOperand(1); 2668 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 2669 And->getOpcode() != ISD::AND) 2670 return; 2671 SDValue X = And.getOperand(0); 2672 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 2673 2674 if (!C) 2675 return; 2676 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 2677 if (!Range) 2678 return; 2679 2680 // There are several ways to lower this: 2681 SDNode *NewN; 2682 SDLoc dl(N); 2683 2684 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 2685 if (Subtarget->isThumb2()) { 2686 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 2687 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 2688 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2689 CurDAG->getRegister(0, MVT::i32) }; 2690 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 2691 } else { 2692 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 2693 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 2694 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 2695 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 2696 } 2697 }; 2698 2699 if (Range->second == 0) { 2700 // 1. Mask includes the LSB -> Simply shift the top N bits off 2701 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2702 ReplaceNode(And.getNode(), NewN); 2703 } else if (Range->first == 31) { 2704 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 2705 NewN = EmitShift(ARM::tLSRri, X, Range->second); 2706 ReplaceNode(And.getNode(), NewN); 2707 } else if (Range->first == Range->second) { 2708 // 3. Only one bit is set. We can shift this into the sign bit and use a 2709 // PL/MI comparison. 2710 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2711 ReplaceNode(And.getNode(), NewN); 2712 2713 SwitchEQNEToPLMI = true; 2714 } else if (!Subtarget->hasV6T2Ops()) { 2715 // 4. Do a double shift to clear bottom and top bits, but only in 2716 // thumb-1 mode as in thumb-2 we can use UBFX. 2717 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2718 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 2719 Range->second + (31 - Range->first)); 2720 ReplaceNode(And.getNode(), NewN); 2721 } 2722 2723 } 2724 2725 void ARMDAGToDAGISel::Select(SDNode *N) { 2726 SDLoc dl(N); 2727 2728 if (N->isMachineOpcode()) { 2729 N->setNodeId(-1); 2730 return; // Already selected. 2731 } 2732 2733 switch (N->getOpcode()) { 2734 default: break; 2735 case ISD::STORE: { 2736 // For Thumb1, match an sp-relative store in C++. This is a little 2737 // unfortunate, but I don't think I can make the chain check work 2738 // otherwise. (The chain of the store has to be the same as the chain 2739 // of the CopyFromReg, or else we can't replace the CopyFromReg with 2740 // a direct reference to "SP".) 2741 // 2742 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 2743 // a different addressing mode from other four-byte stores. 2744 // 2745 // This pattern usually comes up with call arguments. 2746 StoreSDNode *ST = cast<StoreSDNode>(N); 2747 SDValue Ptr = ST->getBasePtr(); 2748 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 2749 int RHSC = 0; 2750 if (Ptr.getOpcode() == ISD::ADD && 2751 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 2752 Ptr = Ptr.getOperand(0); 2753 2754 if (Ptr.getOpcode() == ISD::CopyFromReg && 2755 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 2756 Ptr.getOperand(0) == ST->getChain()) { 2757 SDValue Ops[] = {ST->getValue(), 2758 CurDAG->getRegister(ARM::SP, MVT::i32), 2759 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 2760 getAL(CurDAG, dl), 2761 CurDAG->getRegister(0, MVT::i32), 2762 ST->getChain()}; 2763 MachineSDNode *ResNode = 2764 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 2765 MachineMemOperand *MemOp = ST->getMemOperand(); 2766 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 2767 ReplaceNode(N, ResNode); 2768 return; 2769 } 2770 } 2771 break; 2772 } 2773 case ISD::WRITE_REGISTER: 2774 if (tryWriteRegister(N)) 2775 return; 2776 break; 2777 case ISD::READ_REGISTER: 2778 if (tryReadRegister(N)) 2779 return; 2780 break; 2781 case ISD::INLINEASM: 2782 case ISD::INLINEASM_BR: 2783 if (tryInlineAsm(N)) 2784 return; 2785 break; 2786 case ISD::XOR: 2787 // Select special operations if XOR node forms integer ABS pattern 2788 if (tryABSOp(N)) 2789 return; 2790 // Other cases are autogenerated. 2791 break; 2792 case ISD::Constant: { 2793 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 2794 // If we can't materialize the constant we need to use a literal pool 2795 if (ConstantMaterializationCost(Val) > 2) { 2796 SDValue CPIdx = CurDAG->getTargetConstantPool( 2797 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 2798 TLI->getPointerTy(CurDAG->getDataLayout())); 2799 2800 SDNode *ResNode; 2801 if (Subtarget->isThumb()) { 2802 SDValue Ops[] = { 2803 CPIdx, 2804 getAL(CurDAG, dl), 2805 CurDAG->getRegister(0, MVT::i32), 2806 CurDAG->getEntryNode() 2807 }; 2808 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 2809 Ops); 2810 } else { 2811 SDValue Ops[] = { 2812 CPIdx, 2813 CurDAG->getTargetConstant(0, dl, MVT::i32), 2814 getAL(CurDAG, dl), 2815 CurDAG->getRegister(0, MVT::i32), 2816 CurDAG->getEntryNode() 2817 }; 2818 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 2819 Ops); 2820 } 2821 // Annotate the Node with memory operand information so that MachineInstr 2822 // queries work properly. This e.g. gives the register allocation the 2823 // required information for rematerialization. 2824 MachineFunction& MF = CurDAG->getMachineFunction(); 2825 MachineMemOperand *MemOp = 2826 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 2827 MachineMemOperand::MOLoad, 4, 4); 2828 2829 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 2830 2831 ReplaceNode(N, ResNode); 2832 return; 2833 } 2834 2835 // Other cases are autogenerated. 2836 break; 2837 } 2838 case ISD::FrameIndex: { 2839 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 2840 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 2841 SDValue TFI = CurDAG->getTargetFrameIndex( 2842 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 2843 if (Subtarget->isThumb1Only()) { 2844 // Set the alignment of the frame object to 4, to avoid having to generate 2845 // more than one ADD 2846 MachineFrameInfo &MFI = MF->getFrameInfo(); 2847 if (MFI.getObjectAlignment(FI) < 4) 2848 MFI.setObjectAlignment(FI, 4); 2849 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 2850 CurDAG->getTargetConstant(0, dl, MVT::i32)); 2851 return; 2852 } else { 2853 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 2854 ARM::t2ADDri : ARM::ADDri); 2855 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 2856 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2857 CurDAG->getRegister(0, MVT::i32) }; 2858 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2859 return; 2860 } 2861 } 2862 case ISD::SRL: 2863 if (tryV6T2BitfieldExtractOp(N, false)) 2864 return; 2865 break; 2866 case ISD::SIGN_EXTEND_INREG: 2867 case ISD::SRA: 2868 if (tryV6T2BitfieldExtractOp(N, true)) 2869 return; 2870 break; 2871 case ISD::MUL: 2872 if (Subtarget->isThumb1Only()) 2873 break; 2874 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 2875 unsigned RHSV = C->getZExtValue(); 2876 if (!RHSV) break; 2877 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 2878 unsigned ShImm = Log2_32(RHSV-1); 2879 if (ShImm >= 32) 2880 break; 2881 SDValue V = N->getOperand(0); 2882 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2883 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2884 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2885 if (Subtarget->isThumb()) { 2886 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2887 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 2888 return; 2889 } else { 2890 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2891 Reg0 }; 2892 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 2893 return; 2894 } 2895 } 2896 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 2897 unsigned ShImm = Log2_32(RHSV+1); 2898 if (ShImm >= 32) 2899 break; 2900 SDValue V = N->getOperand(0); 2901 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2902 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2903 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2904 if (Subtarget->isThumb()) { 2905 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2906 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 2907 return; 2908 } else { 2909 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2910 Reg0 }; 2911 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 2912 return; 2913 } 2914 } 2915 } 2916 break; 2917 case ISD::AND: { 2918 // Check for unsigned bitfield extract 2919 if (tryV6T2BitfieldExtractOp(N, false)) 2920 return; 2921 2922 // If an immediate is used in an AND node, it is possible that the immediate 2923 // can be more optimally materialized when negated. If this is the case we 2924 // can negate the immediate and use a BIC instead. 2925 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2926 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 2927 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 2928 2929 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 2930 // immediate can be negated and fit in the immediate operand of 2931 // a t2BIC, don't do any manual transform here as this can be 2932 // handled by the generic ISel machinery. 2933 bool PreferImmediateEncoding = 2934 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 2935 if (!PreferImmediateEncoding && 2936 ConstantMaterializationCost(Imm) > 2937 ConstantMaterializationCost(~Imm)) { 2938 // The current immediate costs more to materialize than a negated 2939 // immediate, so negate the immediate and use a BIC. 2940 SDValue NewImm = 2941 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 2942 // If the new constant didn't exist before, reposition it in the topological 2943 // ordering so it is just before N. Otherwise, don't touch its location. 2944 if (NewImm->getNodeId() == -1) 2945 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 2946 2947 if (!Subtarget->hasThumb2()) { 2948 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 2949 N->getOperand(0), NewImm, getAL(CurDAG, dl), 2950 CurDAG->getRegister(0, MVT::i32)}; 2951 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 2952 return; 2953 } else { 2954 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 2955 CurDAG->getRegister(0, MVT::i32), 2956 CurDAG->getRegister(0, MVT::i32)}; 2957 ReplaceNode(N, 2958 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 2959 return; 2960 } 2961 } 2962 } 2963 2964 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 2965 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 2966 // are entirely contributed by c2 and lower 16-bits are entirely contributed 2967 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 2968 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 2969 EVT VT = N->getValueType(0); 2970 if (VT != MVT::i32) 2971 break; 2972 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 2973 ? ARM::t2MOVTi16 2974 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 2975 if (!Opc) 2976 break; 2977 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2978 N1C = dyn_cast<ConstantSDNode>(N1); 2979 if (!N1C) 2980 break; 2981 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 2982 SDValue N2 = N0.getOperand(1); 2983 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 2984 if (!N2C) 2985 break; 2986 unsigned N1CVal = N1C->getZExtValue(); 2987 unsigned N2CVal = N2C->getZExtValue(); 2988 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 2989 (N1CVal & 0xffffU) == 0xffffU && 2990 (N2CVal & 0xffffU) == 0x0U) { 2991 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 2992 dl, MVT::i32); 2993 SDValue Ops[] = { N0.getOperand(0), Imm16, 2994 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 2995 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 2996 return; 2997 } 2998 } 2999 3000 break; 3001 } 3002 case ARMISD::UMAAL: { 3003 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3004 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3005 N->getOperand(2), N->getOperand(3), 3006 getAL(CurDAG, dl), 3007 CurDAG->getRegister(0, MVT::i32) }; 3008 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3009 return; 3010 } 3011 case ARMISD::UMLAL:{ 3012 if (Subtarget->isThumb()) { 3013 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3014 N->getOperand(3), getAL(CurDAG, dl), 3015 CurDAG->getRegister(0, MVT::i32)}; 3016 ReplaceNode( 3017 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3018 return; 3019 }else{ 3020 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3021 N->getOperand(3), getAL(CurDAG, dl), 3022 CurDAG->getRegister(0, MVT::i32), 3023 CurDAG->getRegister(0, MVT::i32) }; 3024 ReplaceNode(N, CurDAG->getMachineNode( 3025 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3026 MVT::i32, MVT::i32, Ops)); 3027 return; 3028 } 3029 } 3030 case ARMISD::SMLAL:{ 3031 if (Subtarget->isThumb()) { 3032 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3033 N->getOperand(3), getAL(CurDAG, dl), 3034 CurDAG->getRegister(0, MVT::i32)}; 3035 ReplaceNode( 3036 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3037 return; 3038 }else{ 3039 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3040 N->getOperand(3), getAL(CurDAG, dl), 3041 CurDAG->getRegister(0, MVT::i32), 3042 CurDAG->getRegister(0, MVT::i32) }; 3043 ReplaceNode(N, CurDAG->getMachineNode( 3044 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3045 MVT::i32, MVT::i32, Ops)); 3046 return; 3047 } 3048 } 3049 case ARMISD::SUBE: { 3050 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3051 break; 3052 // Look for a pattern to match SMMLS 3053 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3054 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3055 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3056 !SDValue(N, 1).use_empty()) 3057 break; 3058 3059 if (Subtarget->isThumb()) 3060 assert(Subtarget->hasThumb2() && 3061 "This pattern should not be generated for Thumb"); 3062 3063 SDValue SmulLoHi = N->getOperand(1); 3064 SDValue Subc = N->getOperand(2); 3065 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 3066 3067 if (!Zero || Zero->getZExtValue() != 0 || 3068 Subc.getOperand(1) != SmulLoHi.getValue(0) || 3069 N->getOperand(1) != SmulLoHi.getValue(1) || 3070 N->getOperand(2) != Subc.getValue(1)) 3071 break; 3072 3073 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3074 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3075 N->getOperand(0), getAL(CurDAG, dl), 3076 CurDAG->getRegister(0, MVT::i32) }; 3077 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3078 return; 3079 } 3080 case ISD::LOAD: { 3081 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3082 return; 3083 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3084 if (tryT2IndexedLoad(N)) 3085 return; 3086 } else if (Subtarget->isThumb()) { 3087 if (tryT1IndexedLoad(N)) 3088 return; 3089 } else if (tryARMIndexedLoad(N)) 3090 return; 3091 // Other cases are autogenerated. 3092 break; 3093 } 3094 case ARMISD::WLS: 3095 case ARMISD::LE: { 3096 SDValue Ops[] = { N->getOperand(1), 3097 N->getOperand(2), 3098 N->getOperand(0) }; 3099 unsigned Opc = N->getOpcode() == ARMISD::WLS ? 3100 ARM::t2WhileLoopStart : ARM::t2LoopEnd; 3101 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 3102 ReplaceUses(N, New); 3103 CurDAG->RemoveDeadNode(N); 3104 return; 3105 } 3106 case ARMISD::LOOP_DEC: { 3107 SDValue Ops[] = { N->getOperand(1), 3108 N->getOperand(2), 3109 N->getOperand(0) }; 3110 SDNode *Dec = 3111 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3112 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 3113 ReplaceUses(N, Dec); 3114 CurDAG->RemoveDeadNode(N); 3115 return; 3116 } 3117 case ARMISD::BRCOND: { 3118 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3119 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3120 // Pattern complexity = 6 cost = 1 size = 0 3121 3122 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3123 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 3124 // Pattern complexity = 6 cost = 1 size = 0 3125 3126 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3127 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3128 // Pattern complexity = 6 cost = 1 size = 0 3129 3130 unsigned Opc = Subtarget->isThumb() ? 3131 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 3132 SDValue Chain = N->getOperand(0); 3133 SDValue N1 = N->getOperand(1); 3134 SDValue N2 = N->getOperand(2); 3135 SDValue N3 = N->getOperand(3); 3136 SDValue InFlag = N->getOperand(4); 3137 assert(N1.getOpcode() == ISD::BasicBlock); 3138 assert(N2.getOpcode() == ISD::Constant); 3139 assert(N3.getOpcode() == ISD::Register); 3140 3141 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 3142 3143 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3144 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 3145 SDValue Int = InFlag.getOperand(0); 3146 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 3147 3148 // Handle low-overhead loops. 3149 if (ID == Intrinsic::loop_decrement_reg) { 3150 SDValue Elements = Int.getOperand(2); 3151 SDValue Size = CurDAG->getTargetConstant( 3152 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 3153 MVT::i32); 3154 3155 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 3156 SDNode *LoopDec = 3157 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3158 CurDAG->getVTList(MVT::i32, MVT::Other), 3159 Args); 3160 ReplaceUses(Int.getNode(), LoopDec); 3161 3162 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 3163 SDNode *LoopEnd = 3164 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 3165 3166 ReplaceUses(N, LoopEnd); 3167 CurDAG->RemoveDeadNode(N); 3168 CurDAG->RemoveDeadNode(InFlag.getNode()); 3169 CurDAG->RemoveDeadNode(Int.getNode()); 3170 return; 3171 } 3172 } 3173 3174 bool SwitchEQNEToPLMI; 3175 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3176 InFlag = N->getOperand(4); 3177 3178 if (SwitchEQNEToPLMI) { 3179 switch ((ARMCC::CondCodes)CC) { 3180 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3181 case ARMCC::NE: 3182 CC = (unsigned)ARMCC::MI; 3183 break; 3184 case ARMCC::EQ: 3185 CC = (unsigned)ARMCC::PL; 3186 break; 3187 } 3188 } 3189 } 3190 3191 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 3192 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 3193 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 3194 MVT::Glue, Ops); 3195 Chain = SDValue(ResNode, 0); 3196 if (N->getNumValues() == 2) { 3197 InFlag = SDValue(ResNode, 1); 3198 ReplaceUses(SDValue(N, 1), InFlag); 3199 } 3200 ReplaceUses(SDValue(N, 0), 3201 SDValue(Chain.getNode(), Chain.getResNo())); 3202 CurDAG->RemoveDeadNode(N); 3203 return; 3204 } 3205 3206 case ARMISD::CMPZ: { 3207 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 3208 // This allows us to avoid materializing the expensive negative constant. 3209 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 3210 // for its glue output. 3211 SDValue X = N->getOperand(0); 3212 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 3213 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 3214 int64_t Addend = -C->getSExtValue(); 3215 3216 SDNode *Add = nullptr; 3217 // ADDS can be better than CMN if the immediate fits in a 3218 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 3219 // Outside that range we can just use a CMN which is 32-bit but has a 3220 // 12-bit immediate range. 3221 if (Addend < 1<<8) { 3222 if (Subtarget->isThumb2()) { 3223 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3224 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3225 CurDAG->getRegister(0, MVT::i32) }; 3226 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 3227 } else { 3228 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 3229 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 3230 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3231 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3232 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3233 } 3234 } 3235 if (Add) { 3236 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 3237 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 3238 } 3239 } 3240 // Other cases are autogenerated. 3241 break; 3242 } 3243 3244 case ARMISD::CMOV: { 3245 SDValue InFlag = N->getOperand(4); 3246 3247 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3248 bool SwitchEQNEToPLMI; 3249 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3250 3251 if (SwitchEQNEToPLMI) { 3252 SDValue ARMcc = N->getOperand(2); 3253 ARMCC::CondCodes CC = 3254 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 3255 3256 switch (CC) { 3257 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3258 case ARMCC::NE: 3259 CC = ARMCC::MI; 3260 break; 3261 case ARMCC::EQ: 3262 CC = ARMCC::PL; 3263 break; 3264 } 3265 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 3266 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 3267 N->getOperand(3), N->getOperand(4)}; 3268 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 3269 } 3270 3271 } 3272 // Other cases are autogenerated. 3273 break; 3274 } 3275 3276 case ARMISD::VZIP: { 3277 unsigned Opc = 0; 3278 EVT VT = N->getValueType(0); 3279 switch (VT.getSimpleVT().SimpleTy) { 3280 default: return; 3281 case MVT::v8i8: Opc = ARM::VZIPd8; break; 3282 case MVT::v4f16: 3283 case MVT::v4i16: Opc = ARM::VZIPd16; break; 3284 case MVT::v2f32: 3285 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3286 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3287 case MVT::v16i8: Opc = ARM::VZIPq8; break; 3288 case MVT::v8f16: 3289 case MVT::v8i16: Opc = ARM::VZIPq16; break; 3290 case MVT::v4f32: 3291 case MVT::v4i32: Opc = ARM::VZIPq32; break; 3292 } 3293 SDValue Pred = getAL(CurDAG, dl); 3294 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3295 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3296 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3297 return; 3298 } 3299 case ARMISD::VUZP: { 3300 unsigned Opc = 0; 3301 EVT VT = N->getValueType(0); 3302 switch (VT.getSimpleVT().SimpleTy) { 3303 default: return; 3304 case MVT::v8i8: Opc = ARM::VUZPd8; break; 3305 case MVT::v4f16: 3306 case MVT::v4i16: Opc = ARM::VUZPd16; break; 3307 case MVT::v2f32: 3308 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3309 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3310 case MVT::v16i8: Opc = ARM::VUZPq8; break; 3311 case MVT::v8f16: 3312 case MVT::v8i16: Opc = ARM::VUZPq16; break; 3313 case MVT::v4f32: 3314 case MVT::v4i32: Opc = ARM::VUZPq32; break; 3315 } 3316 SDValue Pred = getAL(CurDAG, dl); 3317 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3318 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3319 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3320 return; 3321 } 3322 case ARMISD::VTRN: { 3323 unsigned Opc = 0; 3324 EVT VT = N->getValueType(0); 3325 switch (VT.getSimpleVT().SimpleTy) { 3326 default: return; 3327 case MVT::v8i8: Opc = ARM::VTRNd8; break; 3328 case MVT::v4f16: 3329 case MVT::v4i16: Opc = ARM::VTRNd16; break; 3330 case MVT::v2f32: 3331 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3332 case MVT::v16i8: Opc = ARM::VTRNq8; break; 3333 case MVT::v8f16: 3334 case MVT::v8i16: Opc = ARM::VTRNq16; break; 3335 case MVT::v4f32: 3336 case MVT::v4i32: Opc = ARM::VTRNq32; break; 3337 } 3338 SDValue Pred = getAL(CurDAG, dl); 3339 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3340 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3341 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3342 return; 3343 } 3344 case ARMISD::BUILD_VECTOR: { 3345 EVT VecVT = N->getValueType(0); 3346 EVT EltVT = VecVT.getVectorElementType(); 3347 unsigned NumElts = VecVT.getVectorNumElements(); 3348 if (EltVT == MVT::f64) { 3349 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 3350 ReplaceNode( 3351 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3352 return; 3353 } 3354 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 3355 if (NumElts == 2) { 3356 ReplaceNode( 3357 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3358 return; 3359 } 3360 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 3361 ReplaceNode(N, 3362 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 3363 N->getOperand(2), N->getOperand(3))); 3364 return; 3365 } 3366 3367 case ARMISD::VLD1DUP: { 3368 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 3369 ARM::VLD1DUPd32 }; 3370 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 3371 ARM::VLD1DUPq32 }; 3372 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 3373 return; 3374 } 3375 3376 case ARMISD::VLD2DUP: { 3377 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 3378 ARM::VLD2DUPd32 }; 3379 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 3380 return; 3381 } 3382 3383 case ARMISD::VLD3DUP: { 3384 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 3385 ARM::VLD3DUPd16Pseudo, 3386 ARM::VLD3DUPd32Pseudo }; 3387 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 3388 return; 3389 } 3390 3391 case ARMISD::VLD4DUP: { 3392 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 3393 ARM::VLD4DUPd16Pseudo, 3394 ARM::VLD4DUPd32Pseudo }; 3395 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 3396 return; 3397 } 3398 3399 case ARMISD::VLD1DUP_UPD: { 3400 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 3401 ARM::VLD1DUPd16wb_fixed, 3402 ARM::VLD1DUPd32wb_fixed }; 3403 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 3404 ARM::VLD1DUPq16wb_fixed, 3405 ARM::VLD1DUPq32wb_fixed }; 3406 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 3407 return; 3408 } 3409 3410 case ARMISD::VLD2DUP_UPD: { 3411 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 3412 ARM::VLD2DUPd16wb_fixed, 3413 ARM::VLD2DUPd32wb_fixed }; 3414 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes); 3415 return; 3416 } 3417 3418 case ARMISD::VLD3DUP_UPD: { 3419 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 3420 ARM::VLD3DUPd16Pseudo_UPD, 3421 ARM::VLD3DUPd32Pseudo_UPD }; 3422 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes); 3423 return; 3424 } 3425 3426 case ARMISD::VLD4DUP_UPD: { 3427 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 3428 ARM::VLD4DUPd16Pseudo_UPD, 3429 ARM::VLD4DUPd32Pseudo_UPD }; 3430 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes); 3431 return; 3432 } 3433 3434 case ARMISD::VLD1_UPD: { 3435 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 3436 ARM::VLD1d16wb_fixed, 3437 ARM::VLD1d32wb_fixed, 3438 ARM::VLD1d64wb_fixed }; 3439 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 3440 ARM::VLD1q16wb_fixed, 3441 ARM::VLD1q32wb_fixed, 3442 ARM::VLD1q64wb_fixed }; 3443 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 3444 return; 3445 } 3446 3447 case ARMISD::VLD2_UPD: { 3448 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed, 3449 ARM::VLD2d16wb_fixed, 3450 ARM::VLD2d32wb_fixed, 3451 ARM::VLD1q64wb_fixed}; 3452 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, 3453 ARM::VLD2q16PseudoWB_fixed, 3454 ARM::VLD2q32PseudoWB_fixed }; 3455 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 3456 return; 3457 } 3458 3459 case ARMISD::VLD3_UPD: { 3460 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 3461 ARM::VLD3d16Pseudo_UPD, 3462 ARM::VLD3d32Pseudo_UPD, 3463 ARM::VLD1d64TPseudoWB_fixed}; 3464 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3465 ARM::VLD3q16Pseudo_UPD, 3466 ARM::VLD3q32Pseudo_UPD }; 3467 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 3468 ARM::VLD3q16oddPseudo_UPD, 3469 ARM::VLD3q32oddPseudo_UPD }; 3470 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3471 return; 3472 } 3473 3474 case ARMISD::VLD4_UPD: { 3475 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, 3476 ARM::VLD4d16Pseudo_UPD, 3477 ARM::VLD4d32Pseudo_UPD, 3478 ARM::VLD1d64QPseudoWB_fixed}; 3479 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3480 ARM::VLD4q16Pseudo_UPD, 3481 ARM::VLD4q32Pseudo_UPD }; 3482 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, 3483 ARM::VLD4q16oddPseudo_UPD, 3484 ARM::VLD4q32oddPseudo_UPD }; 3485 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3486 return; 3487 } 3488 3489 case ARMISD::VLD2LN_UPD: { 3490 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 3491 ARM::VLD2LNd16Pseudo_UPD, 3492 ARM::VLD2LNd32Pseudo_UPD }; 3493 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 3494 ARM::VLD2LNq32Pseudo_UPD }; 3495 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 3496 return; 3497 } 3498 3499 case ARMISD::VLD3LN_UPD: { 3500 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 3501 ARM::VLD3LNd16Pseudo_UPD, 3502 ARM::VLD3LNd32Pseudo_UPD }; 3503 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 3504 ARM::VLD3LNq32Pseudo_UPD }; 3505 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 3506 return; 3507 } 3508 3509 case ARMISD::VLD4LN_UPD: { 3510 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 3511 ARM::VLD4LNd16Pseudo_UPD, 3512 ARM::VLD4LNd32Pseudo_UPD }; 3513 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 3514 ARM::VLD4LNq32Pseudo_UPD }; 3515 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 3516 return; 3517 } 3518 3519 case ARMISD::VST1_UPD: { 3520 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 3521 ARM::VST1d16wb_fixed, 3522 ARM::VST1d32wb_fixed, 3523 ARM::VST1d64wb_fixed }; 3524 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 3525 ARM::VST1q16wb_fixed, 3526 ARM::VST1q32wb_fixed, 3527 ARM::VST1q64wb_fixed }; 3528 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 3529 return; 3530 } 3531 3532 case ARMISD::VST2_UPD: { 3533 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed, 3534 ARM::VST2d16wb_fixed, 3535 ARM::VST2d32wb_fixed, 3536 ARM::VST1q64wb_fixed}; 3537 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, 3538 ARM::VST2q16PseudoWB_fixed, 3539 ARM::VST2q32PseudoWB_fixed }; 3540 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 3541 return; 3542 } 3543 3544 case ARMISD::VST3_UPD: { 3545 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 3546 ARM::VST3d16Pseudo_UPD, 3547 ARM::VST3d32Pseudo_UPD, 3548 ARM::VST1d64TPseudoWB_fixed}; 3549 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3550 ARM::VST3q16Pseudo_UPD, 3551 ARM::VST3q32Pseudo_UPD }; 3552 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 3553 ARM::VST3q16oddPseudo_UPD, 3554 ARM::VST3q32oddPseudo_UPD }; 3555 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3556 return; 3557 } 3558 3559 case ARMISD::VST4_UPD: { 3560 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD, 3561 ARM::VST4d16Pseudo_UPD, 3562 ARM::VST4d32Pseudo_UPD, 3563 ARM::VST1d64QPseudoWB_fixed}; 3564 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3565 ARM::VST4q16Pseudo_UPD, 3566 ARM::VST4q32Pseudo_UPD }; 3567 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, 3568 ARM::VST4q16oddPseudo_UPD, 3569 ARM::VST4q32oddPseudo_UPD }; 3570 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3571 return; 3572 } 3573 3574 case ARMISD::VST2LN_UPD: { 3575 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 3576 ARM::VST2LNd16Pseudo_UPD, 3577 ARM::VST2LNd32Pseudo_UPD }; 3578 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 3579 ARM::VST2LNq32Pseudo_UPD }; 3580 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 3581 return; 3582 } 3583 3584 case ARMISD::VST3LN_UPD: { 3585 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 3586 ARM::VST3LNd16Pseudo_UPD, 3587 ARM::VST3LNd32Pseudo_UPD }; 3588 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 3589 ARM::VST3LNq32Pseudo_UPD }; 3590 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 3591 return; 3592 } 3593 3594 case ARMISD::VST4LN_UPD: { 3595 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 3596 ARM::VST4LNd16Pseudo_UPD, 3597 ARM::VST4LNd32Pseudo_UPD }; 3598 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 3599 ARM::VST4LNq32Pseudo_UPD }; 3600 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 3601 return; 3602 } 3603 3604 case ISD::INTRINSIC_VOID: 3605 case ISD::INTRINSIC_W_CHAIN: { 3606 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 3607 switch (IntNo) { 3608 default: 3609 break; 3610 3611 case Intrinsic::arm_mrrc: 3612 case Intrinsic::arm_mrrc2: { 3613 SDLoc dl(N); 3614 SDValue Chain = N->getOperand(0); 3615 unsigned Opc; 3616 3617 if (Subtarget->isThumb()) 3618 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 3619 else 3620 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 3621 3622 SmallVector<SDValue, 5> Ops; 3623 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 3624 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 3625 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 3626 3627 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 3628 // instruction will always be '1111' but it is possible in assembly language to specify 3629 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 3630 if (Opc != ARM::MRRC2) { 3631 Ops.push_back(getAL(CurDAG, dl)); 3632 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3633 } 3634 3635 Ops.push_back(Chain); 3636 3637 // Writes to two registers. 3638 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 3639 3640 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 3641 return; 3642 } 3643 case Intrinsic::arm_ldaexd: 3644 case Intrinsic::arm_ldrexd: { 3645 SDLoc dl(N); 3646 SDValue Chain = N->getOperand(0); 3647 SDValue MemAddr = N->getOperand(2); 3648 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 3649 3650 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 3651 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 3652 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 3653 3654 // arm_ldrexd returns a i64 value in {i32, i32} 3655 std::vector<EVT> ResTys; 3656 if (isThumb) { 3657 ResTys.push_back(MVT::i32); 3658 ResTys.push_back(MVT::i32); 3659 } else 3660 ResTys.push_back(MVT::Untyped); 3661 ResTys.push_back(MVT::Other); 3662 3663 // Place arguments in the right order. 3664 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 3665 CurDAG->getRegister(0, MVT::i32), Chain}; 3666 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3667 // Transfer memoperands. 3668 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3669 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 3670 3671 // Remap uses. 3672 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 3673 if (!SDValue(N, 0).use_empty()) { 3674 SDValue Result; 3675 if (isThumb) 3676 Result = SDValue(Ld, 0); 3677 else { 3678 SDValue SubRegIdx = 3679 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 3680 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3681 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3682 Result = SDValue(ResNode,0); 3683 } 3684 ReplaceUses(SDValue(N, 0), Result); 3685 } 3686 if (!SDValue(N, 1).use_empty()) { 3687 SDValue Result; 3688 if (isThumb) 3689 Result = SDValue(Ld, 1); 3690 else { 3691 SDValue SubRegIdx = 3692 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 3693 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3694 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3695 Result = SDValue(ResNode,0); 3696 } 3697 ReplaceUses(SDValue(N, 1), Result); 3698 } 3699 ReplaceUses(SDValue(N, 2), OutChain); 3700 CurDAG->RemoveDeadNode(N); 3701 return; 3702 } 3703 case Intrinsic::arm_stlexd: 3704 case Intrinsic::arm_strexd: { 3705 SDLoc dl(N); 3706 SDValue Chain = N->getOperand(0); 3707 SDValue Val0 = N->getOperand(2); 3708 SDValue Val1 = N->getOperand(3); 3709 SDValue MemAddr = N->getOperand(4); 3710 3711 // Store exclusive double return a i32 value which is the return status 3712 // of the issued store. 3713 const EVT ResTys[] = {MVT::i32, MVT::Other}; 3714 3715 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 3716 // Place arguments in the right order. 3717 SmallVector<SDValue, 7> Ops; 3718 if (isThumb) { 3719 Ops.push_back(Val0); 3720 Ops.push_back(Val1); 3721 } else 3722 // arm_strexd uses GPRPair. 3723 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 3724 Ops.push_back(MemAddr); 3725 Ops.push_back(getAL(CurDAG, dl)); 3726 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3727 Ops.push_back(Chain); 3728 3729 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 3730 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 3731 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 3732 3733 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3734 // Transfer memoperands. 3735 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3736 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 3737 3738 ReplaceNode(N, St); 3739 return; 3740 } 3741 3742 case Intrinsic::arm_neon_vld1: { 3743 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 3744 ARM::VLD1d32, ARM::VLD1d64 }; 3745 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 3746 ARM::VLD1q32, ARM::VLD1q64}; 3747 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 3748 return; 3749 } 3750 3751 case Intrinsic::arm_neon_vld1x2: { 3752 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 3753 ARM::VLD1q32, ARM::VLD1q64 }; 3754 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 3755 ARM::VLD1d16QPseudo, 3756 ARM::VLD1d32QPseudo, 3757 ARM::VLD1d64QPseudo }; 3758 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 3759 return; 3760 } 3761 3762 case Intrinsic::arm_neon_vld1x3: { 3763 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 3764 ARM::VLD1d16TPseudo, 3765 ARM::VLD1d32TPseudo, 3766 ARM::VLD1d64TPseudo }; 3767 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 3768 ARM::VLD1q16LowTPseudo_UPD, 3769 ARM::VLD1q32LowTPseudo_UPD, 3770 ARM::VLD1q64LowTPseudo_UPD }; 3771 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 3772 ARM::VLD1q16HighTPseudo, 3773 ARM::VLD1q32HighTPseudo, 3774 ARM::VLD1q64HighTPseudo }; 3775 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3776 return; 3777 } 3778 3779 case Intrinsic::arm_neon_vld1x4: { 3780 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 3781 ARM::VLD1d16QPseudo, 3782 ARM::VLD1d32QPseudo, 3783 ARM::VLD1d64QPseudo }; 3784 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 3785 ARM::VLD1q16LowQPseudo_UPD, 3786 ARM::VLD1q32LowQPseudo_UPD, 3787 ARM::VLD1q64LowQPseudo_UPD }; 3788 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 3789 ARM::VLD1q16HighQPseudo, 3790 ARM::VLD1q32HighQPseudo, 3791 ARM::VLD1q64HighQPseudo }; 3792 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3793 return; 3794 } 3795 3796 case Intrinsic::arm_neon_vld2: { 3797 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 3798 ARM::VLD2d32, ARM::VLD1q64 }; 3799 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 3800 ARM::VLD2q32Pseudo }; 3801 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 3802 return; 3803 } 3804 3805 case Intrinsic::arm_neon_vld3: { 3806 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 3807 ARM::VLD3d16Pseudo, 3808 ARM::VLD3d32Pseudo, 3809 ARM::VLD1d64TPseudo }; 3810 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3811 ARM::VLD3q16Pseudo_UPD, 3812 ARM::VLD3q32Pseudo_UPD }; 3813 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 3814 ARM::VLD3q16oddPseudo, 3815 ARM::VLD3q32oddPseudo }; 3816 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3817 return; 3818 } 3819 3820 case Intrinsic::arm_neon_vld4: { 3821 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 3822 ARM::VLD4d16Pseudo, 3823 ARM::VLD4d32Pseudo, 3824 ARM::VLD1d64QPseudo }; 3825 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3826 ARM::VLD4q16Pseudo_UPD, 3827 ARM::VLD4q32Pseudo_UPD }; 3828 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 3829 ARM::VLD4q16oddPseudo, 3830 ARM::VLD4q32oddPseudo }; 3831 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3832 return; 3833 } 3834 3835 case Intrinsic::arm_neon_vld2dup: { 3836 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 3837 ARM::VLD2DUPd32, ARM::VLD1q64 }; 3838 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 3839 ARM::VLD2DUPq16EvenPseudo, 3840 ARM::VLD2DUPq32EvenPseudo }; 3841 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 3842 ARM::VLD2DUPq16OddPseudo, 3843 ARM::VLD2DUPq32OddPseudo }; 3844 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 3845 DOpcodes, QOpcodes0, QOpcodes1); 3846 return; 3847 } 3848 3849 case Intrinsic::arm_neon_vld3dup: { 3850 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 3851 ARM::VLD3DUPd16Pseudo, 3852 ARM::VLD3DUPd32Pseudo, 3853 ARM::VLD1d64TPseudo }; 3854 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 3855 ARM::VLD3DUPq16EvenPseudo, 3856 ARM::VLD3DUPq32EvenPseudo }; 3857 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 3858 ARM::VLD3DUPq16OddPseudo, 3859 ARM::VLD3DUPq32OddPseudo }; 3860 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 3861 DOpcodes, QOpcodes0, QOpcodes1); 3862 return; 3863 } 3864 3865 case Intrinsic::arm_neon_vld4dup: { 3866 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 3867 ARM::VLD4DUPd16Pseudo, 3868 ARM::VLD4DUPd32Pseudo, 3869 ARM::VLD1d64QPseudo }; 3870 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 3871 ARM::VLD4DUPq16EvenPseudo, 3872 ARM::VLD4DUPq32EvenPseudo }; 3873 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 3874 ARM::VLD4DUPq16OddPseudo, 3875 ARM::VLD4DUPq32OddPseudo }; 3876 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 3877 DOpcodes, QOpcodes0, QOpcodes1); 3878 return; 3879 } 3880 3881 case Intrinsic::arm_neon_vld2lane: { 3882 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 3883 ARM::VLD2LNd16Pseudo, 3884 ARM::VLD2LNd32Pseudo }; 3885 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 3886 ARM::VLD2LNq32Pseudo }; 3887 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 3888 return; 3889 } 3890 3891 case Intrinsic::arm_neon_vld3lane: { 3892 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 3893 ARM::VLD3LNd16Pseudo, 3894 ARM::VLD3LNd32Pseudo }; 3895 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 3896 ARM::VLD3LNq32Pseudo }; 3897 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 3898 return; 3899 } 3900 3901 case Intrinsic::arm_neon_vld4lane: { 3902 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 3903 ARM::VLD4LNd16Pseudo, 3904 ARM::VLD4LNd32Pseudo }; 3905 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 3906 ARM::VLD4LNq32Pseudo }; 3907 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 3908 return; 3909 } 3910 3911 case Intrinsic::arm_neon_vst1: { 3912 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 3913 ARM::VST1d32, ARM::VST1d64 }; 3914 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 3915 ARM::VST1q32, ARM::VST1q64 }; 3916 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 3917 return; 3918 } 3919 3920 case Intrinsic::arm_neon_vst1x2: { 3921 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 3922 ARM::VST1q32, ARM::VST1q64 }; 3923 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 3924 ARM::VST1d16QPseudo, 3925 ARM::VST1d32QPseudo, 3926 ARM::VST1d64QPseudo }; 3927 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 3928 return; 3929 } 3930 3931 case Intrinsic::arm_neon_vst1x3: { 3932 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 3933 ARM::VST1d16TPseudo, 3934 ARM::VST1d32TPseudo, 3935 ARM::VST1d64TPseudo }; 3936 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 3937 ARM::VST1q16LowTPseudo_UPD, 3938 ARM::VST1q32LowTPseudo_UPD, 3939 ARM::VST1q64LowTPseudo_UPD }; 3940 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 3941 ARM::VST1q16HighTPseudo, 3942 ARM::VST1q32HighTPseudo, 3943 ARM::VST1q64HighTPseudo }; 3944 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3945 return; 3946 } 3947 3948 case Intrinsic::arm_neon_vst1x4: { 3949 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 3950 ARM::VST1d16QPseudo, 3951 ARM::VST1d32QPseudo, 3952 ARM::VST1d64QPseudo }; 3953 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 3954 ARM::VST1q16LowQPseudo_UPD, 3955 ARM::VST1q32LowQPseudo_UPD, 3956 ARM::VST1q64LowQPseudo_UPD }; 3957 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 3958 ARM::VST1q16HighQPseudo, 3959 ARM::VST1q32HighQPseudo, 3960 ARM::VST1q64HighQPseudo }; 3961 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3962 return; 3963 } 3964 3965 case Intrinsic::arm_neon_vst2: { 3966 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 3967 ARM::VST2d32, ARM::VST1q64 }; 3968 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 3969 ARM::VST2q32Pseudo }; 3970 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 3971 return; 3972 } 3973 3974 case Intrinsic::arm_neon_vst3: { 3975 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 3976 ARM::VST3d16Pseudo, 3977 ARM::VST3d32Pseudo, 3978 ARM::VST1d64TPseudo }; 3979 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3980 ARM::VST3q16Pseudo_UPD, 3981 ARM::VST3q32Pseudo_UPD }; 3982 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 3983 ARM::VST3q16oddPseudo, 3984 ARM::VST3q32oddPseudo }; 3985 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3986 return; 3987 } 3988 3989 case Intrinsic::arm_neon_vst4: { 3990 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 3991 ARM::VST4d16Pseudo, 3992 ARM::VST4d32Pseudo, 3993 ARM::VST1d64QPseudo }; 3994 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3995 ARM::VST4q16Pseudo_UPD, 3996 ARM::VST4q32Pseudo_UPD }; 3997 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 3998 ARM::VST4q16oddPseudo, 3999 ARM::VST4q32oddPseudo }; 4000 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4001 return; 4002 } 4003 4004 case Intrinsic::arm_neon_vst2lane: { 4005 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 4006 ARM::VST2LNd16Pseudo, 4007 ARM::VST2LNd32Pseudo }; 4008 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 4009 ARM::VST2LNq32Pseudo }; 4010 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 4011 return; 4012 } 4013 4014 case Intrinsic::arm_neon_vst3lane: { 4015 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 4016 ARM::VST3LNd16Pseudo, 4017 ARM::VST3LNd32Pseudo }; 4018 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 4019 ARM::VST3LNq32Pseudo }; 4020 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 4021 return; 4022 } 4023 4024 case Intrinsic::arm_neon_vst4lane: { 4025 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 4026 ARM::VST4LNd16Pseudo, 4027 ARM::VST4LNd32Pseudo }; 4028 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 4029 ARM::VST4LNq32Pseudo }; 4030 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 4031 return; 4032 } 4033 } 4034 break; 4035 } 4036 4037 case ISD::ATOMIC_CMP_SWAP: 4038 SelectCMP_SWAP(N); 4039 return; 4040 } 4041 4042 SelectCode(N); 4043 } 4044 4045 // Inspect a register string of the form 4046 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 4047 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 4048 // and obtain the integer operands from them, adding these operands to the 4049 // provided vector. 4050 static void getIntOperandsFromRegisterString(StringRef RegString, 4051 SelectionDAG *CurDAG, 4052 const SDLoc &DL, 4053 std::vector<SDValue> &Ops) { 4054 SmallVector<StringRef, 5> Fields; 4055 RegString.split(Fields, ':'); 4056 4057 if (Fields.size() > 1) { 4058 bool AllIntFields = true; 4059 4060 for (StringRef Field : Fields) { 4061 // Need to trim out leading 'cp' characters and get the integer field. 4062 unsigned IntField; 4063 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 4064 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 4065 } 4066 4067 assert(AllIntFields && 4068 "Unexpected non-integer value in special register string."); 4069 } 4070 } 4071 4072 // Maps a Banked Register string to its mask value. The mask value returned is 4073 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 4074 // mask operand, which expresses which register is to be used, e.g. r8, and in 4075 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 4076 // was invalid. 4077 static inline int getBankedRegisterMask(StringRef RegString) { 4078 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 4079 if (!TheReg) 4080 return -1; 4081 return TheReg->Encoding; 4082 } 4083 4084 // The flags here are common to those allowed for apsr in the A class cores and 4085 // those allowed for the special registers in the M class cores. Returns a 4086 // value representing which flags were present, -1 if invalid. 4087 static inline int getMClassFlagsMask(StringRef Flags) { 4088 return StringSwitch<int>(Flags) 4089 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 4090 // correct when flags are not permitted 4091 .Case("g", 0x1) 4092 .Case("nzcvq", 0x2) 4093 .Case("nzcvqg", 0x3) 4094 .Default(-1); 4095 } 4096 4097 // Maps MClass special registers string to its value for use in the 4098 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 4099 // Returns -1 to signify that the string was invalid. 4100 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 4101 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 4102 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 4103 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 4104 return -1; 4105 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 4106 } 4107 4108 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 4109 // The mask operand contains the special register (R Bit) in bit 4, whether 4110 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 4111 // bits 3-0 contains the fields to be accessed in the special register, set by 4112 // the flags provided with the register. 4113 int Mask = 0; 4114 if (Reg == "apsr") { 4115 // The flags permitted for apsr are the same flags that are allowed in 4116 // M class registers. We get the flag value and then shift the flags into 4117 // the correct place to combine with the mask. 4118 Mask = getMClassFlagsMask(Flags); 4119 if (Mask == -1) 4120 return -1; 4121 return Mask << 2; 4122 } 4123 4124 if (Reg != "cpsr" && Reg != "spsr") { 4125 return -1; 4126 } 4127 4128 // This is the same as if the flags were "fc" 4129 if (Flags.empty() || Flags == "all") 4130 return Mask | 0x9; 4131 4132 // Inspect the supplied flags string and set the bits in the mask for 4133 // the relevant and valid flags allowed for cpsr and spsr. 4134 for (char Flag : Flags) { 4135 int FlagVal; 4136 switch (Flag) { 4137 case 'c': 4138 FlagVal = 0x1; 4139 break; 4140 case 'x': 4141 FlagVal = 0x2; 4142 break; 4143 case 's': 4144 FlagVal = 0x4; 4145 break; 4146 case 'f': 4147 FlagVal = 0x8; 4148 break; 4149 default: 4150 FlagVal = 0; 4151 } 4152 4153 // This avoids allowing strings where the same flag bit appears twice. 4154 if (!FlagVal || (Mask & FlagVal)) 4155 return -1; 4156 Mask |= FlagVal; 4157 } 4158 4159 // If the register is spsr then we need to set the R bit. 4160 if (Reg == "spsr") 4161 Mask |= 0x10; 4162 4163 return Mask; 4164 } 4165 4166 // Lower the read_register intrinsic to ARM specific DAG nodes 4167 // using the supplied metadata string to select the instruction node to use 4168 // and the registers/masks to construct as operands for the node. 4169 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 4170 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4171 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4172 bool IsThumb2 = Subtarget->isThumb2(); 4173 SDLoc DL(N); 4174 4175 std::vector<SDValue> Ops; 4176 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4177 4178 if (!Ops.empty()) { 4179 // If the special register string was constructed of fields (as defined 4180 // in the ACLE) then need to lower to MRC node (32 bit) or 4181 // MRRC node(64 bit), we can make the distinction based on the number of 4182 // operands we have. 4183 unsigned Opcode; 4184 SmallVector<EVT, 3> ResTypes; 4185 if (Ops.size() == 5){ 4186 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 4187 ResTypes.append({ MVT::i32, MVT::Other }); 4188 } else { 4189 assert(Ops.size() == 3 && 4190 "Invalid number of fields in special register string."); 4191 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 4192 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 4193 } 4194 4195 Ops.push_back(getAL(CurDAG, DL)); 4196 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4197 Ops.push_back(N->getOperand(0)); 4198 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 4199 return true; 4200 } 4201 4202 std::string SpecialReg = RegString->getString().lower(); 4203 4204 int BankedReg = getBankedRegisterMask(SpecialReg); 4205 if (BankedReg != -1) { 4206 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 4207 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4208 N->getOperand(0) }; 4209 ReplaceNode( 4210 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 4211 DL, MVT::i32, MVT::Other, Ops)); 4212 return true; 4213 } 4214 4215 // The VFP registers are read by creating SelectionDAG nodes with opcodes 4216 // corresponding to the register that is being read from. So we switch on the 4217 // string to find which opcode we need to use. 4218 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4219 .Case("fpscr", ARM::VMRS) 4220 .Case("fpexc", ARM::VMRS_FPEXC) 4221 .Case("fpsid", ARM::VMRS_FPSID) 4222 .Case("mvfr0", ARM::VMRS_MVFR0) 4223 .Case("mvfr1", ARM::VMRS_MVFR1) 4224 .Case("mvfr2", ARM::VMRS_MVFR2) 4225 .Case("fpinst", ARM::VMRS_FPINST) 4226 .Case("fpinst2", ARM::VMRS_FPINST2) 4227 .Default(0); 4228 4229 // If an opcode was found then we can lower the read to a VFP instruction. 4230 if (Opcode) { 4231 if (!Subtarget->hasVFP2Base()) 4232 return false; 4233 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 4234 return false; 4235 4236 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4237 N->getOperand(0) }; 4238 ReplaceNode(N, 4239 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 4240 return true; 4241 } 4242 4243 // If the target is M Class then need to validate that the register string 4244 // is an acceptable value, so check that a mask can be constructed from the 4245 // string. 4246 if (Subtarget->isMClass()) { 4247 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 4248 if (SYSmValue == -1) 4249 return false; 4250 4251 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4252 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4253 N->getOperand(0) }; 4254 ReplaceNode( 4255 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 4256 return true; 4257 } 4258 4259 // Here we know the target is not M Class so we need to check if it is one 4260 // of the remaining possible values which are apsr, cpsr or spsr. 4261 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 4262 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4263 N->getOperand(0) }; 4264 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 4265 DL, MVT::i32, MVT::Other, Ops)); 4266 return true; 4267 } 4268 4269 if (SpecialReg == "spsr") { 4270 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4271 N->getOperand(0) }; 4272 ReplaceNode( 4273 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 4274 MVT::i32, MVT::Other, Ops)); 4275 return true; 4276 } 4277 4278 return false; 4279 } 4280 4281 // Lower the write_register intrinsic to ARM specific DAG nodes 4282 // using the supplied metadata string to select the instruction node to use 4283 // and the registers/masks to use in the nodes 4284 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 4285 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4286 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4287 bool IsThumb2 = Subtarget->isThumb2(); 4288 SDLoc DL(N); 4289 4290 std::vector<SDValue> Ops; 4291 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4292 4293 if (!Ops.empty()) { 4294 // If the special register string was constructed of fields (as defined 4295 // in the ACLE) then need to lower to MCR node (32 bit) or 4296 // MCRR node(64 bit), we can make the distinction based on the number of 4297 // operands we have. 4298 unsigned Opcode; 4299 if (Ops.size() == 5) { 4300 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 4301 Ops.insert(Ops.begin()+2, N->getOperand(2)); 4302 } else { 4303 assert(Ops.size() == 3 && 4304 "Invalid number of fields in special register string."); 4305 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 4306 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 4307 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 4308 } 4309 4310 Ops.push_back(getAL(CurDAG, DL)); 4311 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4312 Ops.push_back(N->getOperand(0)); 4313 4314 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4315 return true; 4316 } 4317 4318 std::string SpecialReg = RegString->getString().lower(); 4319 int BankedReg = getBankedRegisterMask(SpecialReg); 4320 if (BankedReg != -1) { 4321 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 4322 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4323 N->getOperand(0) }; 4324 ReplaceNode( 4325 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 4326 DL, MVT::Other, Ops)); 4327 return true; 4328 } 4329 4330 // The VFP registers are written to by creating SelectionDAG nodes with 4331 // opcodes corresponding to the register that is being written. So we switch 4332 // on the string to find which opcode we need to use. 4333 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4334 .Case("fpscr", ARM::VMSR) 4335 .Case("fpexc", ARM::VMSR_FPEXC) 4336 .Case("fpsid", ARM::VMSR_FPSID) 4337 .Case("fpinst", ARM::VMSR_FPINST) 4338 .Case("fpinst2", ARM::VMSR_FPINST2) 4339 .Default(0); 4340 4341 if (Opcode) { 4342 if (!Subtarget->hasVFP2Base()) 4343 return false; 4344 Ops = { N->getOperand(2), getAL(CurDAG, DL), 4345 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4346 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4347 return true; 4348 } 4349 4350 std::pair<StringRef, StringRef> Fields; 4351 Fields = StringRef(SpecialReg).rsplit('_'); 4352 std::string Reg = Fields.first.str(); 4353 StringRef Flags = Fields.second; 4354 4355 // If the target was M Class then need to validate the special register value 4356 // and retrieve the mask for use in the instruction node. 4357 if (Subtarget->isMClass()) { 4358 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 4359 if (SYSmValue == -1) 4360 return false; 4361 4362 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4363 N->getOperand(2), getAL(CurDAG, DL), 4364 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4365 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 4366 return true; 4367 } 4368 4369 // We then check to see if a valid mask can be constructed for one of the 4370 // register string values permitted for the A and R class cores. These values 4371 // are apsr, spsr and cpsr; these are also valid on older cores. 4372 int Mask = getARClassRegisterMask(Reg, Flags); 4373 if (Mask != -1) { 4374 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 4375 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4376 N->getOperand(0) }; 4377 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 4378 DL, MVT::Other, Ops)); 4379 return true; 4380 } 4381 4382 return false; 4383 } 4384 4385 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 4386 std::vector<SDValue> AsmNodeOperands; 4387 unsigned Flag, Kind; 4388 bool Changed = false; 4389 unsigned NumOps = N->getNumOperands(); 4390 4391 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 4392 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 4393 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 4394 // respectively. Since there is no constraint to explicitly specify a 4395 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 4396 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 4397 // them into a GPRPair. 4398 4399 SDLoc dl(N); 4400 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 4401 : SDValue(nullptr,0); 4402 4403 SmallVector<bool, 8> OpChanged; 4404 // Glue node will be appended late. 4405 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 4406 SDValue op = N->getOperand(i); 4407 AsmNodeOperands.push_back(op); 4408 4409 if (i < InlineAsm::Op_FirstOperand) 4410 continue; 4411 4412 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 4413 Flag = C->getZExtValue(); 4414 Kind = InlineAsm::getKind(Flag); 4415 } 4416 else 4417 continue; 4418 4419 // Immediate operands to inline asm in the SelectionDAG are modeled with 4420 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 4421 // the second is a constant with the value of the immediate. If we get here 4422 // and we have a Kind_Imm, skip the next operand, and continue. 4423 if (Kind == InlineAsm::Kind_Imm) { 4424 SDValue op = N->getOperand(++i); 4425 AsmNodeOperands.push_back(op); 4426 continue; 4427 } 4428 4429 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 4430 if (NumRegs) 4431 OpChanged.push_back(false); 4432 4433 unsigned DefIdx = 0; 4434 bool IsTiedToChangedOp = false; 4435 // If it's a use that is tied with a previous def, it has no 4436 // reg class constraint. 4437 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 4438 IsTiedToChangedOp = OpChanged[DefIdx]; 4439 4440 // Memory operands to inline asm in the SelectionDAG are modeled with two 4441 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 4442 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 4443 // it doesn't get misinterpreted), and continue. We do this here because 4444 // it's important to update the OpChanged array correctly before moving on. 4445 if (Kind == InlineAsm::Kind_Mem) { 4446 SDValue op = N->getOperand(++i); 4447 AsmNodeOperands.push_back(op); 4448 continue; 4449 } 4450 4451 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 4452 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 4453 continue; 4454 4455 unsigned RC; 4456 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 4457 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 4458 || NumRegs != 2) 4459 continue; 4460 4461 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 4462 SDValue V0 = N->getOperand(i+1); 4463 SDValue V1 = N->getOperand(i+2); 4464 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 4465 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 4466 SDValue PairedReg; 4467 MachineRegisterInfo &MRI = MF->getRegInfo(); 4468 4469 if (Kind == InlineAsm::Kind_RegDef || 4470 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 4471 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 4472 // the original GPRs. 4473 4474 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 4475 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 4476 SDValue Chain = SDValue(N,0); 4477 4478 SDNode *GU = N->getGluedUser(); 4479 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 4480 Chain.getValue(1)); 4481 4482 // Extract values from a GPRPair reg and copy to the original GPR reg. 4483 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4484 RegCopy); 4485 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4486 RegCopy); 4487 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 4488 RegCopy.getValue(1)); 4489 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 4490 4491 // Update the original glue user. 4492 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 4493 Ops.push_back(T1.getValue(1)); 4494 CurDAG->UpdateNodeOperands(GU, Ops); 4495 } 4496 else { 4497 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 4498 // GPRPair and then pass the GPRPair to the inline asm. 4499 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 4500 4501 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 4502 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 4503 Chain.getValue(1)); 4504 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 4505 T0.getValue(1)); 4506 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 4507 4508 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 4509 // i32 VRs of inline asm with it. 4510 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 4511 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 4512 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 4513 4514 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 4515 Glue = Chain.getValue(1); 4516 } 4517 4518 Changed = true; 4519 4520 if(PairedReg.getNode()) { 4521 OpChanged[OpChanged.size() -1 ] = true; 4522 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 4523 if (IsTiedToChangedOp) 4524 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 4525 else 4526 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 4527 // Replace the current flag. 4528 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 4529 Flag, dl, MVT::i32); 4530 // Add the new register node and skip the original two GPRs. 4531 AsmNodeOperands.push_back(PairedReg); 4532 // Skip the next two GPRs. 4533 i += 2; 4534 } 4535 } 4536 4537 if (Glue.getNode()) 4538 AsmNodeOperands.push_back(Glue); 4539 if (!Changed) 4540 return false; 4541 4542 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 4543 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 4544 New->setNodeId(-1); 4545 ReplaceNode(N, New.getNode()); 4546 return true; 4547 } 4548 4549 4550 bool ARMDAGToDAGISel:: 4551 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 4552 std::vector<SDValue> &OutOps) { 4553 switch(ConstraintID) { 4554 default: 4555 llvm_unreachable("Unexpected asm memory constraint"); 4556 case InlineAsm::Constraint_i: 4557 // FIXME: It seems strange that 'i' is needed here since it's supposed to 4558 // be an immediate and not a memory constraint. 4559 LLVM_FALLTHROUGH; 4560 case InlineAsm::Constraint_m: 4561 case InlineAsm::Constraint_o: 4562 case InlineAsm::Constraint_Q: 4563 case InlineAsm::Constraint_Um: 4564 case InlineAsm::Constraint_Un: 4565 case InlineAsm::Constraint_Uq: 4566 case InlineAsm::Constraint_Us: 4567 case InlineAsm::Constraint_Ut: 4568 case InlineAsm::Constraint_Uv: 4569 case InlineAsm::Constraint_Uy: 4570 // Require the address to be in a register. That is safe for all ARM 4571 // variants and it is hard to do anything much smarter without knowing 4572 // how the operand is used. 4573 OutOps.push_back(Op); 4574 return false; 4575 } 4576 return true; 4577 } 4578 4579 /// createARMISelDag - This pass converts a legalized DAG into a 4580 /// ARM-specific DAG, ready for instruction scheduling. 4581 /// 4582 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 4583 CodeGenOpt::Level OptLevel) { 4584 return new ARMDAGToDAGISel(TM, OptLevel); 4585 } 4586