1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/CodeGen/TargetLowering.h" 26 #include "llvm/IR/CallingConv.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/IntrinsicsARM.h" 32 #include "llvm/IR/LLVMContext.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Target/TargetOptions.h" 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "arm-isel" 41 42 static cl::opt<bool> 43 DisableShifterOp("disable-shifter-op", cl::Hidden, 44 cl::desc("Disable isel of shifter-op"), 45 cl::init(false)); 46 47 //===--------------------------------------------------------------------===// 48 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 49 /// instructions for SelectionDAG operations. 50 /// 51 namespace { 52 53 class ARMDAGToDAGISel : public SelectionDAGISel { 54 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 55 /// make the right decision when generating code for different targets. 56 const ARMSubtarget *Subtarget; 57 58 public: 59 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 60 : SelectionDAGISel(tm, OptLevel) {} 61 62 bool runOnMachineFunction(MachineFunction &MF) override { 63 // Reset the subtarget each time through. 64 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 65 SelectionDAGISel::runOnMachineFunction(MF); 66 return true; 67 } 68 69 StringRef getPassName() const override { return "ARM Instruction Selection"; } 70 71 void PreprocessISelDAG() override; 72 73 /// getI32Imm - Return a target constant of type i32 with the specified 74 /// value. 75 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 76 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 77 } 78 79 void Select(SDNode *N) override; 80 81 bool hasNoVMLxHazardUse(SDNode *N) const; 82 bool isShifterOpProfitable(const SDValue &Shift, 83 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 84 bool SelectRegShifterOperand(SDValue N, SDValue &A, 85 SDValue &B, SDValue &C, 86 bool CheckProfitability = true); 87 bool SelectImmShifterOperand(SDValue N, SDValue &A, 88 SDValue &B, bool CheckProfitability = true); 89 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 90 SDValue &B, SDValue &C) { 91 // Don't apply the profitability check 92 return SelectRegShifterOperand(N, A, B, C, false); 93 } 94 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 95 SDValue &B) { 96 // Don't apply the profitability check 97 return SelectImmShifterOperand(N, A, B, false); 98 } 99 100 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 101 102 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 103 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 104 105 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 106 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 107 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 108 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 109 return true; 110 } 111 112 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 113 SDValue &Offset, SDValue &Opc); 114 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 115 SDValue &Offset, SDValue &Opc); 116 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 117 SDValue &Offset, SDValue &Opc); 118 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 119 bool SelectAddrMode3(SDValue N, SDValue &Base, 120 SDValue &Offset, SDValue &Opc); 121 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 122 SDValue &Offset, SDValue &Opc); 123 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 124 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 125 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 126 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 127 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 128 129 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 130 131 // Thumb Addressing Modes: 132 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 133 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 134 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 135 SDValue &OffImm); 136 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 137 SDValue &OffImm); 138 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 139 SDValue &OffImm); 140 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 141 SDValue &OffImm); 142 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 143 template <unsigned Shift> 144 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 145 146 // Thumb 2 Addressing Modes: 147 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 148 template <unsigned Shift> 149 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 150 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 151 SDValue &OffImm); 152 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 153 SDValue &OffImm); 154 template <unsigned Shift> 155 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 156 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 157 unsigned Shift); 158 template <unsigned Shift> 159 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 160 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 161 SDValue &OffReg, SDValue &ShImm); 162 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 163 164 template<int Min, int Max> 165 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 166 167 inline bool is_so_imm(unsigned Imm) const { 168 return ARM_AM::getSOImmVal(Imm) != -1; 169 } 170 171 inline bool is_so_imm_not(unsigned Imm) const { 172 return ARM_AM::getSOImmVal(~Imm) != -1; 173 } 174 175 inline bool is_t2_so_imm(unsigned Imm) const { 176 return ARM_AM::getT2SOImmVal(Imm) != -1; 177 } 178 179 inline bool is_t2_so_imm_not(unsigned Imm) const { 180 return ARM_AM::getT2SOImmVal(~Imm) != -1; 181 } 182 183 // Include the pieces autogenerated from the target description. 184 #include "ARMGenDAGISel.inc" 185 186 private: 187 void transferMemOperands(SDNode *Src, SDNode *Dst); 188 189 /// Indexed (pre/post inc/dec) load matching code for ARM. 190 bool tryARMIndexedLoad(SDNode *N); 191 bool tryT1IndexedLoad(SDNode *N); 192 bool tryT2IndexedLoad(SDNode *N); 193 bool tryMVEIndexedLoad(SDNode *N); 194 195 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 196 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 197 /// loads of D registers and even subregs and odd subregs of Q registers. 198 /// For NumVecs <= 2, QOpcodes1 is not used. 199 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 200 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 201 const uint16_t *QOpcodes1); 202 203 /// SelectVST - Select NEON store intrinsics. NumVecs should 204 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 205 /// stores of D registers and even subregs and odd subregs of Q registers. 206 /// For NumVecs <= 2, QOpcodes1 is not used. 207 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 208 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 209 const uint16_t *QOpcodes1); 210 211 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 212 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 213 /// load/store of D registers and Q registers. 214 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 215 unsigned NumVecs, const uint16_t *DOpcodes, 216 const uint16_t *QOpcodes); 217 218 /// Helper functions for setting up clusters of MVE predication operands. 219 template <typename SDValueVector> 220 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 221 SDValue PredicateMask); 222 template <typename SDValueVector> 223 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 224 SDValue PredicateMask, SDValue Inactive); 225 226 template <typename SDValueVector> 227 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 228 template <typename SDValueVector> 229 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 230 231 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 232 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 233 234 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 235 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 236 bool HasSaturationOperand); 237 238 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 239 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 240 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 241 242 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 243 /// vector lanes. 244 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 245 246 /// Select long MVE vector reductions with two vector operands 247 /// Stride is the number of vector element widths the instruction can operate 248 /// on: 249 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 250 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 251 /// Stride is used when addressing the OpcodesS array which contains multiple 252 /// opcodes for each element width. 253 /// TySize is the index into the list of element types listed above 254 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 255 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 256 size_t Stride, size_t TySize); 257 258 /// Select a 64-bit MVE vector reduction with two vector operands 259 /// arm_mve_vmlldava_[predicated] 260 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 261 const uint16_t *OpcodesU); 262 /// Select a 72-bit MVE vector rounding reduction with two vector operands 263 /// int_arm_mve_vrmlldavha[_predicated] 264 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 265 const uint16_t *OpcodesU); 266 267 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 268 /// should be 2 or 4. The opcode array specifies the instructions 269 /// used for 8, 16 and 32-bit lane sizes respectively, and each 270 /// pointer points to a set of NumVecs sub-opcodes used for the 271 /// different stages (e.g. VLD20 versus VLD21) of each load family. 272 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 273 const uint16_t *const *Opcodes, bool HasWriteback); 274 275 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 276 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 277 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 278 bool Wrapping, bool Predicated); 279 280 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 281 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 282 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 283 /// the accumulator and the immediate operand, i.e. 0 284 /// for CX1*, 1 for CX2*, 2 for CX3* 285 /// \arg \c HasAccum whether the instruction has an accumulator operand 286 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 287 bool HasAccum); 288 289 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 290 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 291 /// for loading D registers. 292 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 293 unsigned NumVecs, const uint16_t *DOpcodes, 294 const uint16_t *QOpcodes0 = nullptr, 295 const uint16_t *QOpcodes1 = nullptr); 296 297 /// Try to select SBFX/UBFX instructions for ARM. 298 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 299 300 // Select special operations if node forms integer ABS pattern 301 bool tryABSOp(SDNode *N); 302 303 bool tryReadRegister(SDNode *N); 304 bool tryWriteRegister(SDNode *N); 305 306 bool tryInlineAsm(SDNode *N); 307 308 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 309 310 void SelectCMP_SWAP(SDNode *N); 311 312 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 313 /// inline asm expressions. 314 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 315 std::vector<SDValue> &OutOps) override; 316 317 // Form pairs of consecutive R, S, D, or Q registers. 318 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 319 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 320 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 321 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 322 323 // Form sequences of 4 consecutive S, D, or Q registers. 324 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 325 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 326 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 327 328 // Get the alignment operand for a NEON VLD or VST instruction. 329 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 330 bool is64BitVector); 331 332 /// Checks if N is a multiplication by a constant where we can extract out a 333 /// power of two from the constant so that it can be used in a shift, but only 334 /// if it simplifies the materialization of the constant. Returns true if it 335 /// is, and assigns to PowerOfTwo the power of two that should be extracted 336 /// out and to NewMulConst the new constant to be multiplied by. 337 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 338 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 339 340 /// Replace N with M in CurDAG, in a way that also ensures that M gets 341 /// selected when N would have been selected. 342 void replaceDAGValue(const SDValue &N, SDValue M); 343 }; 344 } 345 346 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 347 /// operand. If so Imm will receive the 32-bit value. 348 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 349 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 350 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 351 return true; 352 } 353 return false; 354 } 355 356 // isInt32Immediate - This method tests to see if a constant operand. 357 // If so Imm will receive the 32 bit value. 358 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 359 return isInt32Immediate(N.getNode(), Imm); 360 } 361 362 // isOpcWithIntImmediate - This method tests to see if the node is a specific 363 // opcode and that it has a immediate integer right operand. 364 // If so Imm will receive the 32 bit value. 365 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 366 return N->getOpcode() == Opc && 367 isInt32Immediate(N->getOperand(1).getNode(), Imm); 368 } 369 370 /// Check whether a particular node is a constant value representable as 371 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 372 /// 373 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 374 static bool isScaledConstantInRange(SDValue Node, int Scale, 375 int RangeMin, int RangeMax, 376 int &ScaledConstant) { 377 assert(Scale > 0 && "Invalid scale!"); 378 379 // Check that this is a constant. 380 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 381 if (!C) 382 return false; 383 384 ScaledConstant = (int) C->getZExtValue(); 385 if ((ScaledConstant % Scale) != 0) 386 return false; 387 388 ScaledConstant /= Scale; 389 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 390 } 391 392 void ARMDAGToDAGISel::PreprocessISelDAG() { 393 if (!Subtarget->hasV6T2Ops()) 394 return; 395 396 bool isThumb2 = Subtarget->isThumb(); 397 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 398 E = CurDAG->allnodes_end(); I != E; ) { 399 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 400 401 if (N->getOpcode() != ISD::ADD) 402 continue; 403 404 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 405 // leading zeros, followed by consecutive set bits, followed by 1 or 2 406 // trailing zeros, e.g. 1020. 407 // Transform the expression to 408 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 409 // of trailing zeros of c2. The left shift would be folded as an shifter 410 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 411 // node (UBFX). 412 413 SDValue N0 = N->getOperand(0); 414 SDValue N1 = N->getOperand(1); 415 unsigned And_imm = 0; 416 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 417 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 418 std::swap(N0, N1); 419 } 420 if (!And_imm) 421 continue; 422 423 // Check if the AND mask is an immediate of the form: 000.....1111111100 424 unsigned TZ = countTrailingZeros(And_imm); 425 if (TZ != 1 && TZ != 2) 426 // Be conservative here. Shifter operands aren't always free. e.g. On 427 // Swift, left shifter operand of 1 / 2 for free but others are not. 428 // e.g. 429 // ubfx r3, r1, #16, #8 430 // ldr.w r3, [r0, r3, lsl #2] 431 // vs. 432 // mov.w r9, #1020 433 // and.w r2, r9, r1, lsr #14 434 // ldr r2, [r0, r2] 435 continue; 436 And_imm >>= TZ; 437 if (And_imm & (And_imm + 1)) 438 continue; 439 440 // Look for (and (srl X, c1), c2). 441 SDValue Srl = N1.getOperand(0); 442 unsigned Srl_imm = 0; 443 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 444 (Srl_imm <= 2)) 445 continue; 446 447 // Make sure first operand is not a shifter operand which would prevent 448 // folding of the left shift. 449 SDValue CPTmp0; 450 SDValue CPTmp1; 451 SDValue CPTmp2; 452 if (isThumb2) { 453 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 454 continue; 455 } else { 456 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 457 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 458 continue; 459 } 460 461 // Now make the transformation. 462 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 463 Srl.getOperand(0), 464 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 465 MVT::i32)); 466 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 467 Srl, 468 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 469 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 470 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 471 CurDAG->UpdateNodeOperands(N, N0, N1); 472 } 473 } 474 475 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 476 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 477 /// least on current ARM implementations) which should be avoidded. 478 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 479 if (OptLevel == CodeGenOpt::None) 480 return true; 481 482 if (!Subtarget->hasVMLxHazards()) 483 return true; 484 485 if (!N->hasOneUse()) 486 return false; 487 488 SDNode *Use = *N->use_begin(); 489 if (Use->getOpcode() == ISD::CopyToReg) 490 return true; 491 if (Use->isMachineOpcode()) { 492 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 493 CurDAG->getSubtarget().getInstrInfo()); 494 495 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 496 if (MCID.mayStore()) 497 return true; 498 unsigned Opcode = MCID.getOpcode(); 499 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 500 return true; 501 // vmlx feeding into another vmlx. We actually want to unfold 502 // the use later in the MLxExpansion pass. e.g. 503 // vmla 504 // vmla (stall 8 cycles) 505 // 506 // vmul (5 cycles) 507 // vadd (5 cycles) 508 // vmla 509 // This adds up to about 18 - 19 cycles. 510 // 511 // vmla 512 // vmul (stall 4 cycles) 513 // vadd adds up to about 14 cycles. 514 return TII->isFpMLxInstruction(Opcode); 515 } 516 517 return false; 518 } 519 520 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 521 ARM_AM::ShiftOpc ShOpcVal, 522 unsigned ShAmt) { 523 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 524 return true; 525 if (Shift.hasOneUse()) 526 return true; 527 // R << 2 is free. 528 return ShOpcVal == ARM_AM::lsl && 529 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 530 } 531 532 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 533 unsigned MaxShift, 534 unsigned &PowerOfTwo, 535 SDValue &NewMulConst) const { 536 assert(N.getOpcode() == ISD::MUL); 537 assert(MaxShift > 0); 538 539 // If the multiply is used in more than one place then changing the constant 540 // will make other uses incorrect, so don't. 541 if (!N.hasOneUse()) return false; 542 // Check if the multiply is by a constant 543 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 544 if (!MulConst) return false; 545 // If the constant is used in more than one place then modifying it will mean 546 // we need to materialize two constants instead of one, which is a bad idea. 547 if (!MulConst->hasOneUse()) return false; 548 unsigned MulConstVal = MulConst->getZExtValue(); 549 if (MulConstVal == 0) return false; 550 551 // Find the largest power of 2 that MulConstVal is a multiple of 552 PowerOfTwo = MaxShift; 553 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 554 --PowerOfTwo; 555 if (PowerOfTwo == 0) return false; 556 } 557 558 // Only optimise if the new cost is better 559 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 560 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 561 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 562 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 563 return NewCost < OldCost; 564 } 565 566 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 567 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 568 ReplaceUses(N, M); 569 } 570 571 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 572 SDValue &BaseReg, 573 SDValue &Opc, 574 bool CheckProfitability) { 575 if (DisableShifterOp) 576 return false; 577 578 // If N is a multiply-by-constant and it's profitable to extract a shift and 579 // use it in a shifted operand do so. 580 if (N.getOpcode() == ISD::MUL) { 581 unsigned PowerOfTwo = 0; 582 SDValue NewMulConst; 583 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 584 HandleSDNode Handle(N); 585 SDLoc Loc(N); 586 replaceDAGValue(N.getOperand(1), NewMulConst); 587 BaseReg = Handle.getValue(); 588 Opc = CurDAG->getTargetConstant( 589 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 590 return true; 591 } 592 } 593 594 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 595 596 // Don't match base register only case. That is matched to a separate 597 // lower complexity pattern with explicit register operand. 598 if (ShOpcVal == ARM_AM::no_shift) return false; 599 600 BaseReg = N.getOperand(0); 601 unsigned ShImmVal = 0; 602 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 603 if (!RHS) return false; 604 ShImmVal = RHS->getZExtValue() & 31; 605 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 606 SDLoc(N), MVT::i32); 607 return true; 608 } 609 610 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 611 SDValue &BaseReg, 612 SDValue &ShReg, 613 SDValue &Opc, 614 bool CheckProfitability) { 615 if (DisableShifterOp) 616 return false; 617 618 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 619 620 // Don't match base register only case. That is matched to a separate 621 // lower complexity pattern with explicit register operand. 622 if (ShOpcVal == ARM_AM::no_shift) return false; 623 624 BaseReg = N.getOperand(0); 625 unsigned ShImmVal = 0; 626 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 627 if (RHS) return false; 628 629 ShReg = N.getOperand(1); 630 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 631 return false; 632 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 633 SDLoc(N), MVT::i32); 634 return true; 635 } 636 637 // Determine whether an ISD::OR's operands are suitable to turn the operation 638 // into an addition, which often has more compact encodings. 639 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 640 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 641 Out = N; 642 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 643 } 644 645 646 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 647 SDValue &Base, 648 SDValue &OffImm) { 649 // Match simple R + imm12 operands. 650 651 // Base only. 652 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 653 !CurDAG->isBaseWithConstantOffset(N)) { 654 if (N.getOpcode() == ISD::FrameIndex) { 655 // Match frame index. 656 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 657 Base = CurDAG->getTargetFrameIndex( 658 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 659 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 660 return true; 661 } 662 663 if (N.getOpcode() == ARMISD::Wrapper && 664 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 665 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 666 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 667 Base = N.getOperand(0); 668 } else 669 Base = N; 670 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 671 return true; 672 } 673 674 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 675 int RHSC = (int)RHS->getSExtValue(); 676 if (N.getOpcode() == ISD::SUB) 677 RHSC = -RHSC; 678 679 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 680 Base = N.getOperand(0); 681 if (Base.getOpcode() == ISD::FrameIndex) { 682 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 683 Base = CurDAG->getTargetFrameIndex( 684 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 685 } 686 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 687 return true; 688 } 689 } 690 691 // Base only. 692 Base = N; 693 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 694 return true; 695 } 696 697 698 699 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 700 SDValue &Opc) { 701 if (N.getOpcode() == ISD::MUL && 702 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 703 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 704 // X * [3,5,9] -> X + X * [2,4,8] etc. 705 int RHSC = (int)RHS->getZExtValue(); 706 if (RHSC & 1) { 707 RHSC = RHSC & ~1; 708 ARM_AM::AddrOpc AddSub = ARM_AM::add; 709 if (RHSC < 0) { 710 AddSub = ARM_AM::sub; 711 RHSC = - RHSC; 712 } 713 if (isPowerOf2_32(RHSC)) { 714 unsigned ShAmt = Log2_32(RHSC); 715 Base = Offset = N.getOperand(0); 716 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 717 ARM_AM::lsl), 718 SDLoc(N), MVT::i32); 719 return true; 720 } 721 } 722 } 723 } 724 725 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 726 // ISD::OR that is equivalent to an ISD::ADD. 727 !CurDAG->isBaseWithConstantOffset(N)) 728 return false; 729 730 // Leave simple R +/- imm12 operands for LDRi12 731 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 732 int RHSC; 733 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 734 -0x1000+1, 0x1000, RHSC)) // 12 bits. 735 return false; 736 } 737 738 // Otherwise this is R +/- [possibly shifted] R. 739 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 740 ARM_AM::ShiftOpc ShOpcVal = 741 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 742 unsigned ShAmt = 0; 743 744 Base = N.getOperand(0); 745 Offset = N.getOperand(1); 746 747 if (ShOpcVal != ARM_AM::no_shift) { 748 // Check to see if the RHS of the shift is a constant, if not, we can't fold 749 // it. 750 if (ConstantSDNode *Sh = 751 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 752 ShAmt = Sh->getZExtValue(); 753 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 754 Offset = N.getOperand(1).getOperand(0); 755 else { 756 ShAmt = 0; 757 ShOpcVal = ARM_AM::no_shift; 758 } 759 } else { 760 ShOpcVal = ARM_AM::no_shift; 761 } 762 } 763 764 // Try matching (R shl C) + (R). 765 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 766 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 767 N.getOperand(0).hasOneUse())) { 768 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 769 if (ShOpcVal != ARM_AM::no_shift) { 770 // Check to see if the RHS of the shift is a constant, if not, we can't 771 // fold it. 772 if (ConstantSDNode *Sh = 773 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 774 ShAmt = Sh->getZExtValue(); 775 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 776 Offset = N.getOperand(0).getOperand(0); 777 Base = N.getOperand(1); 778 } else { 779 ShAmt = 0; 780 ShOpcVal = ARM_AM::no_shift; 781 } 782 } else { 783 ShOpcVal = ARM_AM::no_shift; 784 } 785 } 786 } 787 788 // If Offset is a multiply-by-constant and it's profitable to extract a shift 789 // and use it in a shifted operand do so. 790 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 791 unsigned PowerOfTwo = 0; 792 SDValue NewMulConst; 793 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 794 HandleSDNode Handle(Offset); 795 replaceDAGValue(Offset.getOperand(1), NewMulConst); 796 Offset = Handle.getValue(); 797 ShAmt = PowerOfTwo; 798 ShOpcVal = ARM_AM::lsl; 799 } 800 } 801 802 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 803 SDLoc(N), MVT::i32); 804 return true; 805 } 806 807 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 808 SDValue &Offset, SDValue &Opc) { 809 unsigned Opcode = Op->getOpcode(); 810 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 811 ? cast<LoadSDNode>(Op)->getAddressingMode() 812 : cast<StoreSDNode>(Op)->getAddressingMode(); 813 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 814 ? ARM_AM::add : ARM_AM::sub; 815 int Val; 816 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 817 return false; 818 819 Offset = N; 820 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 821 unsigned ShAmt = 0; 822 if (ShOpcVal != ARM_AM::no_shift) { 823 // Check to see if the RHS of the shift is a constant, if not, we can't fold 824 // it. 825 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 826 ShAmt = Sh->getZExtValue(); 827 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 828 Offset = N.getOperand(0); 829 else { 830 ShAmt = 0; 831 ShOpcVal = ARM_AM::no_shift; 832 } 833 } else { 834 ShOpcVal = ARM_AM::no_shift; 835 } 836 } 837 838 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 839 SDLoc(N), MVT::i32); 840 return true; 841 } 842 843 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 844 SDValue &Offset, SDValue &Opc) { 845 unsigned Opcode = Op->getOpcode(); 846 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 847 ? cast<LoadSDNode>(Op)->getAddressingMode() 848 : cast<StoreSDNode>(Op)->getAddressingMode(); 849 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 850 ? ARM_AM::add : ARM_AM::sub; 851 int Val; 852 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 853 if (AddSub == ARM_AM::sub) Val *= -1; 854 Offset = CurDAG->getRegister(0, MVT::i32); 855 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 856 return true; 857 } 858 859 return false; 860 } 861 862 863 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 864 SDValue &Offset, SDValue &Opc) { 865 unsigned Opcode = Op->getOpcode(); 866 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 867 ? cast<LoadSDNode>(Op)->getAddressingMode() 868 : cast<StoreSDNode>(Op)->getAddressingMode(); 869 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 870 ? ARM_AM::add : ARM_AM::sub; 871 int Val; 872 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 873 Offset = CurDAG->getRegister(0, MVT::i32); 874 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 875 ARM_AM::no_shift), 876 SDLoc(Op), MVT::i32); 877 return true; 878 } 879 880 return false; 881 } 882 883 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 884 Base = N; 885 return true; 886 } 887 888 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 889 SDValue &Base, SDValue &Offset, 890 SDValue &Opc) { 891 if (N.getOpcode() == ISD::SUB) { 892 // X - C is canonicalize to X + -C, no need to handle it here. 893 Base = N.getOperand(0); 894 Offset = N.getOperand(1); 895 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 896 MVT::i32); 897 return true; 898 } 899 900 if (!CurDAG->isBaseWithConstantOffset(N)) { 901 Base = N; 902 if (N.getOpcode() == ISD::FrameIndex) { 903 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 904 Base = CurDAG->getTargetFrameIndex( 905 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 906 } 907 Offset = CurDAG->getRegister(0, MVT::i32); 908 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 909 MVT::i32); 910 return true; 911 } 912 913 // If the RHS is +/- imm8, fold into addr mode. 914 int RHSC; 915 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 916 -256 + 1, 256, RHSC)) { // 8 bits. 917 Base = N.getOperand(0); 918 if (Base.getOpcode() == ISD::FrameIndex) { 919 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 920 Base = CurDAG->getTargetFrameIndex( 921 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 922 } 923 Offset = CurDAG->getRegister(0, MVT::i32); 924 925 ARM_AM::AddrOpc AddSub = ARM_AM::add; 926 if (RHSC < 0) { 927 AddSub = ARM_AM::sub; 928 RHSC = -RHSC; 929 } 930 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 931 MVT::i32); 932 return true; 933 } 934 935 Base = N.getOperand(0); 936 Offset = N.getOperand(1); 937 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 938 MVT::i32); 939 return true; 940 } 941 942 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 943 SDValue &Offset, SDValue &Opc) { 944 unsigned Opcode = Op->getOpcode(); 945 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 946 ? cast<LoadSDNode>(Op)->getAddressingMode() 947 : cast<StoreSDNode>(Op)->getAddressingMode(); 948 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 949 ? ARM_AM::add : ARM_AM::sub; 950 int Val; 951 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 952 Offset = CurDAG->getRegister(0, MVT::i32); 953 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 954 MVT::i32); 955 return true; 956 } 957 958 Offset = N; 959 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 960 MVT::i32); 961 return true; 962 } 963 964 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 965 bool FP16) { 966 if (!CurDAG->isBaseWithConstantOffset(N)) { 967 Base = N; 968 if (N.getOpcode() == ISD::FrameIndex) { 969 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 970 Base = CurDAG->getTargetFrameIndex( 971 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 972 } else if (N.getOpcode() == ARMISD::Wrapper && 973 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 974 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 975 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 976 Base = N.getOperand(0); 977 } 978 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 979 SDLoc(N), MVT::i32); 980 return true; 981 } 982 983 // If the RHS is +/- imm8, fold into addr mode. 984 int RHSC; 985 const int Scale = FP16 ? 2 : 4; 986 987 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 988 Base = N.getOperand(0); 989 if (Base.getOpcode() == ISD::FrameIndex) { 990 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 991 Base = CurDAG->getTargetFrameIndex( 992 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 993 } 994 995 ARM_AM::AddrOpc AddSub = ARM_AM::add; 996 if (RHSC < 0) { 997 AddSub = ARM_AM::sub; 998 RHSC = -RHSC; 999 } 1000 1001 if (FP16) 1002 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1003 SDLoc(N), MVT::i32); 1004 else 1005 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1006 SDLoc(N), MVT::i32); 1007 1008 return true; 1009 } 1010 1011 Base = N; 1012 1013 if (FP16) 1014 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1015 SDLoc(N), MVT::i32); 1016 else 1017 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1018 SDLoc(N), MVT::i32); 1019 1020 return true; 1021 } 1022 1023 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1024 SDValue &Base, SDValue &Offset) { 1025 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1026 } 1027 1028 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1029 SDValue &Base, SDValue &Offset) { 1030 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1031 } 1032 1033 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1034 SDValue &Align) { 1035 Addr = N; 1036 1037 unsigned Alignment = 0; 1038 1039 MemSDNode *MemN = cast<MemSDNode>(Parent); 1040 1041 if (isa<LSBaseSDNode>(MemN) || 1042 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1043 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1044 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1045 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1046 // The maximum alignment is equal to the memory size being referenced. 1047 unsigned MMOAlign = MemN->getAlignment(); 1048 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1049 if (MMOAlign >= MemSize && MemSize > 1) 1050 Alignment = MemSize; 1051 } else { 1052 // All other uses of addrmode6 are for intrinsics. For now just record 1053 // the raw alignment value; it will be refined later based on the legal 1054 // alignment operands for the intrinsic. 1055 Alignment = MemN->getAlignment(); 1056 } 1057 1058 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1059 return true; 1060 } 1061 1062 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1063 SDValue &Offset) { 1064 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1065 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1066 if (AM != ISD::POST_INC) 1067 return false; 1068 Offset = N; 1069 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1070 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1071 Offset = CurDAG->getRegister(0, MVT::i32); 1072 } 1073 return true; 1074 } 1075 1076 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1077 SDValue &Offset, SDValue &Label) { 1078 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1079 Offset = N.getOperand(0); 1080 SDValue N1 = N.getOperand(1); 1081 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1082 SDLoc(N), MVT::i32); 1083 return true; 1084 } 1085 1086 return false; 1087 } 1088 1089 1090 //===----------------------------------------------------------------------===// 1091 // Thumb Addressing Modes 1092 //===----------------------------------------------------------------------===// 1093 1094 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1095 // Negative numbers are difficult to materialise in thumb1. If we are 1096 // selecting the add of a negative, instead try to select ri with a zero 1097 // offset, so create the add node directly which will become a sub. 1098 if (N.getOpcode() != ISD::ADD) 1099 return false; 1100 1101 // Look for an imm which is not legal for ld/st, but is legal for sub. 1102 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1103 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1104 1105 return false; 1106 } 1107 1108 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1109 SDValue &Offset) { 1110 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1111 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1112 if (!NC || !NC->isNullValue()) 1113 return false; 1114 1115 Base = Offset = N; 1116 return true; 1117 } 1118 1119 Base = N.getOperand(0); 1120 Offset = N.getOperand(1); 1121 return true; 1122 } 1123 1124 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1125 SDValue &Offset) { 1126 if (shouldUseZeroOffsetLdSt(N)) 1127 return false; // Select ri instead 1128 return SelectThumbAddrModeRRSext(N, Base, Offset); 1129 } 1130 1131 bool 1132 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1133 SDValue &Base, SDValue &OffImm) { 1134 if (shouldUseZeroOffsetLdSt(N)) { 1135 Base = N; 1136 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1137 return true; 1138 } 1139 1140 if (!CurDAG->isBaseWithConstantOffset(N)) { 1141 if (N.getOpcode() == ISD::ADD) { 1142 return false; // We want to select register offset instead 1143 } else if (N.getOpcode() == ARMISD::Wrapper && 1144 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1145 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1146 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1147 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1148 Base = N.getOperand(0); 1149 } else { 1150 Base = N; 1151 } 1152 1153 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1154 return true; 1155 } 1156 1157 // If the RHS is + imm5 * scale, fold into addr mode. 1158 int RHSC; 1159 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1160 Base = N.getOperand(0); 1161 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1162 return true; 1163 } 1164 1165 // Offset is too large, so use register offset instead. 1166 return false; 1167 } 1168 1169 bool 1170 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1171 SDValue &OffImm) { 1172 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1173 } 1174 1175 bool 1176 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1177 SDValue &OffImm) { 1178 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1179 } 1180 1181 bool 1182 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1183 SDValue &OffImm) { 1184 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1185 } 1186 1187 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1188 SDValue &Base, SDValue &OffImm) { 1189 if (N.getOpcode() == ISD::FrameIndex) { 1190 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1191 // Only multiples of 4 are allowed for the offset, so the frame object 1192 // alignment must be at least 4. 1193 MachineFrameInfo &MFI = MF->getFrameInfo(); 1194 if (MFI.getObjectAlign(FI) < Align(4)) 1195 MFI.setObjectAlignment(FI, Align(4)); 1196 Base = CurDAG->getTargetFrameIndex( 1197 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1198 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1199 return true; 1200 } 1201 1202 if (!CurDAG->isBaseWithConstantOffset(N)) 1203 return false; 1204 1205 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1206 // If the RHS is + imm8 * scale, fold into addr mode. 1207 int RHSC; 1208 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1209 Base = N.getOperand(0); 1210 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1211 // Make sure the offset is inside the object, or we might fail to 1212 // allocate an emergency spill slot. (An out-of-range access is UB, but 1213 // it could show up anyway.) 1214 MachineFrameInfo &MFI = MF->getFrameInfo(); 1215 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1216 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1217 // indexed by the LHS must be 4-byte aligned. 1218 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1219 MFI.setObjectAlignment(FI, Align(4)); 1220 if (MFI.getObjectAlign(FI) >= Align(4)) { 1221 Base = CurDAG->getTargetFrameIndex( 1222 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1223 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1224 return true; 1225 } 1226 } 1227 } 1228 } 1229 1230 return false; 1231 } 1232 1233 template <unsigned Shift> 1234 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1235 SDValue &OffImm) { 1236 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1237 int RHSC; 1238 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1239 RHSC)) { 1240 Base = N.getOperand(0); 1241 if (N.getOpcode() == ISD::SUB) 1242 RHSC = -RHSC; 1243 OffImm = 1244 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1245 return true; 1246 } 1247 } 1248 1249 // Base only. 1250 Base = N; 1251 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1252 return true; 1253 } 1254 1255 1256 //===----------------------------------------------------------------------===// 1257 // Thumb 2 Addressing Modes 1258 //===----------------------------------------------------------------------===// 1259 1260 1261 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1262 SDValue &Base, SDValue &OffImm) { 1263 // Match simple R + imm12 operands. 1264 1265 // Base only. 1266 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1267 !CurDAG->isBaseWithConstantOffset(N)) { 1268 if (N.getOpcode() == ISD::FrameIndex) { 1269 // Match frame index. 1270 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1271 Base = CurDAG->getTargetFrameIndex( 1272 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1273 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1274 return true; 1275 } 1276 1277 if (N.getOpcode() == ARMISD::Wrapper && 1278 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1279 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1280 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1281 Base = N.getOperand(0); 1282 if (Base.getOpcode() == ISD::TargetConstantPool) 1283 return false; // We want to select t2LDRpci instead. 1284 } else 1285 Base = N; 1286 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1287 return true; 1288 } 1289 1290 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1291 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1292 // Let t2LDRi8 handle (R - imm8). 1293 return false; 1294 1295 int RHSC = (int)RHS->getZExtValue(); 1296 if (N.getOpcode() == ISD::SUB) 1297 RHSC = -RHSC; 1298 1299 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1300 Base = N.getOperand(0); 1301 if (Base.getOpcode() == ISD::FrameIndex) { 1302 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1303 Base = CurDAG->getTargetFrameIndex( 1304 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1305 } 1306 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1307 return true; 1308 } 1309 } 1310 1311 // Base only. 1312 Base = N; 1313 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1314 return true; 1315 } 1316 1317 template <unsigned Shift> 1318 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1319 SDValue &OffImm) { 1320 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1321 int RHSC; 1322 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1323 Base = N.getOperand(0); 1324 if (Base.getOpcode() == ISD::FrameIndex) { 1325 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1326 Base = CurDAG->getTargetFrameIndex( 1327 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1328 } 1329 1330 if (N.getOpcode() == ISD::SUB) 1331 RHSC = -RHSC; 1332 OffImm = 1333 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1334 return true; 1335 } 1336 } 1337 1338 // Base only. 1339 Base = N; 1340 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1341 return true; 1342 } 1343 1344 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1345 SDValue &Base, SDValue &OffImm) { 1346 // Match simple R - imm8 operands. 1347 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1348 !CurDAG->isBaseWithConstantOffset(N)) 1349 return false; 1350 1351 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1352 int RHSC = (int)RHS->getSExtValue(); 1353 if (N.getOpcode() == ISD::SUB) 1354 RHSC = -RHSC; 1355 1356 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1357 Base = N.getOperand(0); 1358 if (Base.getOpcode() == ISD::FrameIndex) { 1359 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1360 Base = CurDAG->getTargetFrameIndex( 1361 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1362 } 1363 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1364 return true; 1365 } 1366 } 1367 1368 return false; 1369 } 1370 1371 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1372 SDValue &OffImm){ 1373 unsigned Opcode = Op->getOpcode(); 1374 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1375 ? cast<LoadSDNode>(Op)->getAddressingMode() 1376 : cast<StoreSDNode>(Op)->getAddressingMode(); 1377 int RHSC; 1378 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1379 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1380 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1381 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1382 return true; 1383 } 1384 1385 return false; 1386 } 1387 1388 template <unsigned Shift> 1389 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1390 SDValue &OffImm) { 1391 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1392 int RHSC; 1393 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1394 RHSC)) { 1395 Base = N.getOperand(0); 1396 if (Base.getOpcode() == ISD::FrameIndex) { 1397 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1398 Base = CurDAG->getTargetFrameIndex( 1399 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1400 } 1401 1402 if (N.getOpcode() == ISD::SUB) 1403 RHSC = -RHSC; 1404 OffImm = 1405 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1406 return true; 1407 } 1408 } 1409 1410 // Base only. 1411 Base = N; 1412 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1413 return true; 1414 } 1415 1416 template <unsigned Shift> 1417 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1418 SDValue &OffImm) { 1419 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1420 } 1421 1422 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1423 SDValue &OffImm, 1424 unsigned Shift) { 1425 unsigned Opcode = Op->getOpcode(); 1426 ISD::MemIndexedMode AM; 1427 switch (Opcode) { 1428 case ISD::LOAD: 1429 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1430 break; 1431 case ISD::STORE: 1432 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1433 break; 1434 case ISD::MLOAD: 1435 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1436 break; 1437 case ISD::MSTORE: 1438 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1439 break; 1440 default: 1441 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1442 } 1443 1444 int RHSC; 1445 // 7 bit constant, shifted by Shift. 1446 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1447 OffImm = 1448 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1449 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1450 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1451 MVT::i32); 1452 return true; 1453 } 1454 return false; 1455 } 1456 1457 template <int Min, int Max> 1458 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1459 int Val; 1460 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1461 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); 1462 return true; 1463 } 1464 return false; 1465 } 1466 1467 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1468 SDValue &Base, 1469 SDValue &OffReg, SDValue &ShImm) { 1470 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1471 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1472 return false; 1473 1474 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1475 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1476 int RHSC = (int)RHS->getZExtValue(); 1477 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1478 return false; 1479 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1480 return false; 1481 } 1482 1483 // Look for (R + R) or (R + (R << [1,2,3])). 1484 unsigned ShAmt = 0; 1485 Base = N.getOperand(0); 1486 OffReg = N.getOperand(1); 1487 1488 // Swap if it is ((R << c) + R). 1489 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1490 if (ShOpcVal != ARM_AM::lsl) { 1491 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1492 if (ShOpcVal == ARM_AM::lsl) 1493 std::swap(Base, OffReg); 1494 } 1495 1496 if (ShOpcVal == ARM_AM::lsl) { 1497 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1498 // it. 1499 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1500 ShAmt = Sh->getZExtValue(); 1501 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1502 OffReg = OffReg.getOperand(0); 1503 else { 1504 ShAmt = 0; 1505 } 1506 } 1507 } 1508 1509 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1510 // and use it in a shifted operand do so. 1511 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1512 unsigned PowerOfTwo = 0; 1513 SDValue NewMulConst; 1514 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1515 HandleSDNode Handle(OffReg); 1516 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1517 OffReg = Handle.getValue(); 1518 ShAmt = PowerOfTwo; 1519 } 1520 } 1521 1522 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1523 1524 return true; 1525 } 1526 1527 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1528 SDValue &OffImm) { 1529 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1530 // instructions. 1531 Base = N; 1532 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1533 1534 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1535 return true; 1536 1537 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1538 if (!RHS) 1539 return true; 1540 1541 uint32_t RHSC = (int)RHS->getZExtValue(); 1542 if (RHSC > 1020 || RHSC % 4 != 0) 1543 return true; 1544 1545 Base = N.getOperand(0); 1546 if (Base.getOpcode() == ISD::FrameIndex) { 1547 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1548 Base = CurDAG->getTargetFrameIndex( 1549 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1550 } 1551 1552 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1553 return true; 1554 } 1555 1556 //===--------------------------------------------------------------------===// 1557 1558 /// getAL - Returns a ARMCC::AL immediate node. 1559 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1560 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1561 } 1562 1563 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1564 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1565 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1566 } 1567 1568 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1569 LoadSDNode *LD = cast<LoadSDNode>(N); 1570 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1571 if (AM == ISD::UNINDEXED) 1572 return false; 1573 1574 EVT LoadedVT = LD->getMemoryVT(); 1575 SDValue Offset, AMOpc; 1576 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1577 unsigned Opcode = 0; 1578 bool Match = false; 1579 if (LoadedVT == MVT::i32 && isPre && 1580 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1581 Opcode = ARM::LDR_PRE_IMM; 1582 Match = true; 1583 } else if (LoadedVT == MVT::i32 && !isPre && 1584 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1585 Opcode = ARM::LDR_POST_IMM; 1586 Match = true; 1587 } else if (LoadedVT == MVT::i32 && 1588 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1589 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1590 Match = true; 1591 1592 } else if (LoadedVT == MVT::i16 && 1593 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1594 Match = true; 1595 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1596 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1597 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1598 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1599 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1600 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1601 Match = true; 1602 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1603 } 1604 } else { 1605 if (isPre && 1606 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1607 Match = true; 1608 Opcode = ARM::LDRB_PRE_IMM; 1609 } else if (!isPre && 1610 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1611 Match = true; 1612 Opcode = ARM::LDRB_POST_IMM; 1613 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1614 Match = true; 1615 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1616 } 1617 } 1618 } 1619 1620 if (Match) { 1621 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1622 SDValue Chain = LD->getChain(); 1623 SDValue Base = LD->getBasePtr(); 1624 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1625 CurDAG->getRegister(0, MVT::i32), Chain }; 1626 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1627 MVT::Other, Ops); 1628 transferMemOperands(N, New); 1629 ReplaceNode(N, New); 1630 return true; 1631 } else { 1632 SDValue Chain = LD->getChain(); 1633 SDValue Base = LD->getBasePtr(); 1634 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1635 CurDAG->getRegister(0, MVT::i32), Chain }; 1636 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1637 MVT::Other, Ops); 1638 transferMemOperands(N, New); 1639 ReplaceNode(N, New); 1640 return true; 1641 } 1642 } 1643 1644 return false; 1645 } 1646 1647 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1648 LoadSDNode *LD = cast<LoadSDNode>(N); 1649 EVT LoadedVT = LD->getMemoryVT(); 1650 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1651 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1652 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1653 return false; 1654 1655 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1656 if (!COffs || COffs->getZExtValue() != 4) 1657 return false; 1658 1659 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1660 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1661 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1662 // ISel. 1663 SDValue Chain = LD->getChain(); 1664 SDValue Base = LD->getBasePtr(); 1665 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1666 CurDAG->getRegister(0, MVT::i32), Chain }; 1667 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1668 MVT::i32, MVT::Other, Ops); 1669 transferMemOperands(N, New); 1670 ReplaceNode(N, New); 1671 return true; 1672 } 1673 1674 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1675 LoadSDNode *LD = cast<LoadSDNode>(N); 1676 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1677 if (AM == ISD::UNINDEXED) 1678 return false; 1679 1680 EVT LoadedVT = LD->getMemoryVT(); 1681 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1682 SDValue Offset; 1683 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1684 unsigned Opcode = 0; 1685 bool Match = false; 1686 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1687 switch (LoadedVT.getSimpleVT().SimpleTy) { 1688 case MVT::i32: 1689 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1690 break; 1691 case MVT::i16: 1692 if (isSExtLd) 1693 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1694 else 1695 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1696 break; 1697 case MVT::i8: 1698 case MVT::i1: 1699 if (isSExtLd) 1700 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1701 else 1702 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1703 break; 1704 default: 1705 return false; 1706 } 1707 Match = true; 1708 } 1709 1710 if (Match) { 1711 SDValue Chain = LD->getChain(); 1712 SDValue Base = LD->getBasePtr(); 1713 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1714 CurDAG->getRegister(0, MVT::i32), Chain }; 1715 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1716 MVT::Other, Ops); 1717 transferMemOperands(N, New); 1718 ReplaceNode(N, New); 1719 return true; 1720 } 1721 1722 return false; 1723 } 1724 1725 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1726 EVT LoadedVT; 1727 unsigned Opcode = 0; 1728 bool isSExtLd, isPre; 1729 unsigned Align; 1730 ARMVCC::VPTCodes Pred; 1731 SDValue PredReg; 1732 SDValue Chain, Base, Offset; 1733 1734 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1735 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1736 if (AM == ISD::UNINDEXED) 1737 return false; 1738 LoadedVT = LD->getMemoryVT(); 1739 if (!LoadedVT.isVector()) 1740 return false; 1741 1742 Chain = LD->getChain(); 1743 Base = LD->getBasePtr(); 1744 Offset = LD->getOffset(); 1745 Align = LD->getAlignment(); 1746 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1747 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1748 Pred = ARMVCC::None; 1749 PredReg = CurDAG->getRegister(0, MVT::i32); 1750 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1751 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1752 if (AM == ISD::UNINDEXED) 1753 return false; 1754 LoadedVT = LD->getMemoryVT(); 1755 if (!LoadedVT.isVector()) 1756 return false; 1757 1758 Chain = LD->getChain(); 1759 Base = LD->getBasePtr(); 1760 Offset = LD->getOffset(); 1761 Align = LD->getAlignment(); 1762 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1763 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1764 Pred = ARMVCC::Then; 1765 PredReg = LD->getMask(); 1766 } else 1767 llvm_unreachable("Expected a Load or a Masked Load!"); 1768 1769 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1770 // as opposed to a vldrw.32). This can allow extra addressing modes or 1771 // alignments for what is otherwise an equivalent instruction. 1772 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1773 1774 SDValue NewOffset; 1775 if (Align >= 2 && LoadedVT == MVT::v4i16 && 1776 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1777 if (isSExtLd) 1778 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1779 else 1780 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1781 } else if (LoadedVT == MVT::v8i8 && 1782 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1783 if (isSExtLd) 1784 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1785 else 1786 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1787 } else if (LoadedVT == MVT::v4i8 && 1788 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1789 if (isSExtLd) 1790 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1791 else 1792 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1793 } else if (Align >= 4 && 1794 (CanChangeType || LoadedVT == MVT::v4i32 || 1795 LoadedVT == MVT::v4f32) && 1796 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1797 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1798 else if (Align >= 2 && 1799 (CanChangeType || LoadedVT == MVT::v8i16 || 1800 LoadedVT == MVT::v8f16) && 1801 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1802 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1803 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1804 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1805 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1806 else 1807 return false; 1808 1809 SDValue Ops[] = {Base, NewOffset, 1810 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg, 1811 Chain}; 1812 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1813 N->getValueType(0), MVT::Other, Ops); 1814 transferMemOperands(N, New); 1815 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1816 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1817 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1818 CurDAG->RemoveDeadNode(N); 1819 return true; 1820 } 1821 1822 /// Form a GPRPair pseudo register from a pair of GPR regs. 1823 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1824 SDLoc dl(V0.getNode()); 1825 SDValue RegClass = 1826 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1827 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1828 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1829 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1830 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1831 } 1832 1833 /// Form a D register from a pair of S registers. 1834 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1835 SDLoc dl(V0.getNode()); 1836 SDValue RegClass = 1837 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1838 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1839 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1840 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1841 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1842 } 1843 1844 /// Form a quad register from a pair of D registers. 1845 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1846 SDLoc dl(V0.getNode()); 1847 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1848 MVT::i32); 1849 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1850 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1851 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1852 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1853 } 1854 1855 /// Form 4 consecutive D registers from a pair of Q registers. 1856 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1857 SDLoc dl(V0.getNode()); 1858 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1859 MVT::i32); 1860 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1861 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1862 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1863 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1864 } 1865 1866 /// Form 4 consecutive S registers. 1867 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1868 SDValue V2, SDValue V3) { 1869 SDLoc dl(V0.getNode()); 1870 SDValue RegClass = 1871 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1872 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1873 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1874 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1875 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1876 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1877 V2, SubReg2, V3, SubReg3 }; 1878 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1879 } 1880 1881 /// Form 4 consecutive D registers. 1882 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1883 SDValue V2, SDValue V3) { 1884 SDLoc dl(V0.getNode()); 1885 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1886 MVT::i32); 1887 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1888 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1889 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1890 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1891 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1892 V2, SubReg2, V3, SubReg3 }; 1893 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1894 } 1895 1896 /// Form 4 consecutive Q registers. 1897 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1898 SDValue V2, SDValue V3) { 1899 SDLoc dl(V0.getNode()); 1900 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1901 MVT::i32); 1902 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1903 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1904 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1905 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1906 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1907 V2, SubReg2, V3, SubReg3 }; 1908 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1909 } 1910 1911 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1912 /// of a NEON VLD or VST instruction. The supported values depend on the 1913 /// number of registers being loaded. 1914 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1915 unsigned NumVecs, bool is64BitVector) { 1916 unsigned NumRegs = NumVecs; 1917 if (!is64BitVector && NumVecs < 3) 1918 NumRegs *= 2; 1919 1920 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1921 if (Alignment >= 32 && NumRegs == 4) 1922 Alignment = 32; 1923 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1924 Alignment = 16; 1925 else if (Alignment >= 8) 1926 Alignment = 8; 1927 else 1928 Alignment = 0; 1929 1930 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1931 } 1932 1933 static bool isVLDfixed(unsigned Opc) 1934 { 1935 switch (Opc) { 1936 default: return false; 1937 case ARM::VLD1d8wb_fixed : return true; 1938 case ARM::VLD1d16wb_fixed : return true; 1939 case ARM::VLD1d64Qwb_fixed : return true; 1940 case ARM::VLD1d32wb_fixed : return true; 1941 case ARM::VLD1d64wb_fixed : return true; 1942 case ARM::VLD1d64TPseudoWB_fixed : return true; 1943 case ARM::VLD1d64QPseudoWB_fixed : return true; 1944 case ARM::VLD1q8wb_fixed : return true; 1945 case ARM::VLD1q16wb_fixed : return true; 1946 case ARM::VLD1q32wb_fixed : return true; 1947 case ARM::VLD1q64wb_fixed : return true; 1948 case ARM::VLD1DUPd8wb_fixed : return true; 1949 case ARM::VLD1DUPd16wb_fixed : return true; 1950 case ARM::VLD1DUPd32wb_fixed : return true; 1951 case ARM::VLD1DUPq8wb_fixed : return true; 1952 case ARM::VLD1DUPq16wb_fixed : return true; 1953 case ARM::VLD1DUPq32wb_fixed : return true; 1954 case ARM::VLD2d8wb_fixed : return true; 1955 case ARM::VLD2d16wb_fixed : return true; 1956 case ARM::VLD2d32wb_fixed : return true; 1957 case ARM::VLD2q8PseudoWB_fixed : return true; 1958 case ARM::VLD2q16PseudoWB_fixed : return true; 1959 case ARM::VLD2q32PseudoWB_fixed : return true; 1960 case ARM::VLD2DUPd8wb_fixed : return true; 1961 case ARM::VLD2DUPd16wb_fixed : return true; 1962 case ARM::VLD2DUPd32wb_fixed : return true; 1963 } 1964 } 1965 1966 static bool isVSTfixed(unsigned Opc) 1967 { 1968 switch (Opc) { 1969 default: return false; 1970 case ARM::VST1d8wb_fixed : return true; 1971 case ARM::VST1d16wb_fixed : return true; 1972 case ARM::VST1d32wb_fixed : return true; 1973 case ARM::VST1d64wb_fixed : return true; 1974 case ARM::VST1q8wb_fixed : return true; 1975 case ARM::VST1q16wb_fixed : return true; 1976 case ARM::VST1q32wb_fixed : return true; 1977 case ARM::VST1q64wb_fixed : return true; 1978 case ARM::VST1d64TPseudoWB_fixed : return true; 1979 case ARM::VST1d64QPseudoWB_fixed : return true; 1980 case ARM::VST2d8wb_fixed : return true; 1981 case ARM::VST2d16wb_fixed : return true; 1982 case ARM::VST2d32wb_fixed : return true; 1983 case ARM::VST2q8PseudoWB_fixed : return true; 1984 case ARM::VST2q16PseudoWB_fixed : return true; 1985 case ARM::VST2q32PseudoWB_fixed : return true; 1986 } 1987 } 1988 1989 // Get the register stride update opcode of a VLD/VST instruction that 1990 // is otherwise equivalent to the given fixed stride updating instruction. 1991 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1992 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 1993 && "Incorrect fixed stride updating instruction."); 1994 switch (Opc) { 1995 default: break; 1996 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1997 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 1998 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 1999 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2000 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2001 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2002 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2003 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2004 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2005 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2006 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2007 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2008 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2009 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2010 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2011 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2012 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2013 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2014 2015 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2016 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2017 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2018 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2019 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2020 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2021 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2022 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2023 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2024 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2025 2026 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2027 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2028 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2029 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2030 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2031 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2032 2033 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2034 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2035 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2036 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2037 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2038 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2039 2040 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2041 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2042 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2043 } 2044 return Opc; // If not one we handle, return it unchanged. 2045 } 2046 2047 /// Returns true if the given increment is a Constant known to be equal to the 2048 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2049 /// be used. 2050 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2051 auto C = dyn_cast<ConstantSDNode>(Inc); 2052 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2053 } 2054 2055 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2056 const uint16_t *DOpcodes, 2057 const uint16_t *QOpcodes0, 2058 const uint16_t *QOpcodes1) { 2059 assert(Subtarget->hasNEON()); 2060 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2061 SDLoc dl(N); 2062 2063 SDValue MemAddr, Align; 2064 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2065 // nodes are not intrinsics. 2066 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2067 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2068 return; 2069 2070 SDValue Chain = N->getOperand(0); 2071 EVT VT = N->getValueType(0); 2072 bool is64BitVector = VT.is64BitVector(); 2073 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2074 2075 unsigned OpcodeIndex; 2076 switch (VT.getSimpleVT().SimpleTy) { 2077 default: llvm_unreachable("unhandled vld type"); 2078 // Double-register operations: 2079 case MVT::v8i8: OpcodeIndex = 0; break; 2080 case MVT::v4f16: 2081 case MVT::v4i16: OpcodeIndex = 1; break; 2082 case MVT::v2f32: 2083 case MVT::v2i32: OpcodeIndex = 2; break; 2084 case MVT::v1i64: OpcodeIndex = 3; break; 2085 // Quad-register operations: 2086 case MVT::v16i8: OpcodeIndex = 0; break; 2087 case MVT::v8f16: 2088 case MVT::v8i16: OpcodeIndex = 1; break; 2089 case MVT::v4f32: 2090 case MVT::v4i32: OpcodeIndex = 2; break; 2091 case MVT::v2f64: 2092 case MVT::v2i64: OpcodeIndex = 3; break; 2093 } 2094 2095 EVT ResTy; 2096 if (NumVecs == 1) 2097 ResTy = VT; 2098 else { 2099 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2100 if (!is64BitVector) 2101 ResTyElts *= 2; 2102 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2103 } 2104 std::vector<EVT> ResTys; 2105 ResTys.push_back(ResTy); 2106 if (isUpdating) 2107 ResTys.push_back(MVT::i32); 2108 ResTys.push_back(MVT::Other); 2109 2110 SDValue Pred = getAL(CurDAG, dl); 2111 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2112 SDNode *VLd; 2113 SmallVector<SDValue, 7> Ops; 2114 2115 // Double registers and VLD1/VLD2 quad registers are directly supported. 2116 if (is64BitVector || NumVecs <= 2) { 2117 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2118 QOpcodes0[OpcodeIndex]); 2119 Ops.push_back(MemAddr); 2120 Ops.push_back(Align); 2121 if (isUpdating) { 2122 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2123 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2124 if (!IsImmUpdate) { 2125 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2126 // check for the opcode rather than the number of vector elements. 2127 if (isVLDfixed(Opc)) 2128 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2129 Ops.push_back(Inc); 2130 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2131 // the operands if not such an opcode. 2132 } else if (!isVLDfixed(Opc)) 2133 Ops.push_back(Reg0); 2134 } 2135 Ops.push_back(Pred); 2136 Ops.push_back(Reg0); 2137 Ops.push_back(Chain); 2138 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2139 2140 } else { 2141 // Otherwise, quad registers are loaded with two separate instructions, 2142 // where one loads the even registers and the other loads the odd registers. 2143 EVT AddrTy = MemAddr.getValueType(); 2144 2145 // Load the even subregs. This is always an updating load, so that it 2146 // provides the address to the second load for the odd subregs. 2147 SDValue ImplDef = 2148 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2149 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2150 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2151 ResTy, AddrTy, MVT::Other, OpsA); 2152 Chain = SDValue(VLdA, 2); 2153 2154 // Load the odd subregs. 2155 Ops.push_back(SDValue(VLdA, 1)); 2156 Ops.push_back(Align); 2157 if (isUpdating) { 2158 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2159 assert(isa<ConstantSDNode>(Inc.getNode()) && 2160 "only constant post-increment update allowed for VLD3/4"); 2161 (void)Inc; 2162 Ops.push_back(Reg0); 2163 } 2164 Ops.push_back(SDValue(VLdA, 0)); 2165 Ops.push_back(Pred); 2166 Ops.push_back(Reg0); 2167 Ops.push_back(Chain); 2168 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2169 } 2170 2171 // Transfer memoperands. 2172 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2173 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2174 2175 if (NumVecs == 1) { 2176 ReplaceNode(N, VLd); 2177 return; 2178 } 2179 2180 // Extract out the subregisters. 2181 SDValue SuperReg = SDValue(VLd, 0); 2182 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2183 ARM::qsub_3 == ARM::qsub_0 + 3, 2184 "Unexpected subreg numbering"); 2185 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2186 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2187 ReplaceUses(SDValue(N, Vec), 2188 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2189 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2190 if (isUpdating) 2191 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2192 CurDAG->RemoveDeadNode(N); 2193 } 2194 2195 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2196 const uint16_t *DOpcodes, 2197 const uint16_t *QOpcodes0, 2198 const uint16_t *QOpcodes1) { 2199 assert(Subtarget->hasNEON()); 2200 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2201 SDLoc dl(N); 2202 2203 SDValue MemAddr, Align; 2204 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2205 // nodes are not intrinsics. 2206 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2207 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2208 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2209 return; 2210 2211 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2212 2213 SDValue Chain = N->getOperand(0); 2214 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2215 bool is64BitVector = VT.is64BitVector(); 2216 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2217 2218 unsigned OpcodeIndex; 2219 switch (VT.getSimpleVT().SimpleTy) { 2220 default: llvm_unreachable("unhandled vst type"); 2221 // Double-register operations: 2222 case MVT::v8i8: OpcodeIndex = 0; break; 2223 case MVT::v4f16: 2224 case MVT::v4i16: OpcodeIndex = 1; break; 2225 case MVT::v2f32: 2226 case MVT::v2i32: OpcodeIndex = 2; break; 2227 case MVT::v1i64: OpcodeIndex = 3; break; 2228 // Quad-register operations: 2229 case MVT::v16i8: OpcodeIndex = 0; break; 2230 case MVT::v8f16: 2231 case MVT::v8i16: OpcodeIndex = 1; break; 2232 case MVT::v4f32: 2233 case MVT::v4i32: OpcodeIndex = 2; break; 2234 case MVT::v2f64: 2235 case MVT::v2i64: OpcodeIndex = 3; break; 2236 } 2237 2238 std::vector<EVT> ResTys; 2239 if (isUpdating) 2240 ResTys.push_back(MVT::i32); 2241 ResTys.push_back(MVT::Other); 2242 2243 SDValue Pred = getAL(CurDAG, dl); 2244 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2245 SmallVector<SDValue, 7> Ops; 2246 2247 // Double registers and VST1/VST2 quad registers are directly supported. 2248 if (is64BitVector || NumVecs <= 2) { 2249 SDValue SrcReg; 2250 if (NumVecs == 1) { 2251 SrcReg = N->getOperand(Vec0Idx); 2252 } else if (is64BitVector) { 2253 // Form a REG_SEQUENCE to force register allocation. 2254 SDValue V0 = N->getOperand(Vec0Idx + 0); 2255 SDValue V1 = N->getOperand(Vec0Idx + 1); 2256 if (NumVecs == 2) 2257 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2258 else { 2259 SDValue V2 = N->getOperand(Vec0Idx + 2); 2260 // If it's a vst3, form a quad D-register and leave the last part as 2261 // an undef. 2262 SDValue V3 = (NumVecs == 3) 2263 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2264 : N->getOperand(Vec0Idx + 3); 2265 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2266 } 2267 } else { 2268 // Form a QQ register. 2269 SDValue Q0 = N->getOperand(Vec0Idx); 2270 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2271 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2272 } 2273 2274 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2275 QOpcodes0[OpcodeIndex]); 2276 Ops.push_back(MemAddr); 2277 Ops.push_back(Align); 2278 if (isUpdating) { 2279 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2280 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2281 if (!IsImmUpdate) { 2282 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2283 // check for the opcode rather than the number of vector elements. 2284 if (isVSTfixed(Opc)) 2285 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2286 Ops.push_back(Inc); 2287 } 2288 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2289 // the operands if not such an opcode. 2290 else if (!isVSTfixed(Opc)) 2291 Ops.push_back(Reg0); 2292 } 2293 Ops.push_back(SrcReg); 2294 Ops.push_back(Pred); 2295 Ops.push_back(Reg0); 2296 Ops.push_back(Chain); 2297 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2298 2299 // Transfer memoperands. 2300 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2301 2302 ReplaceNode(N, VSt); 2303 return; 2304 } 2305 2306 // Otherwise, quad registers are stored with two separate instructions, 2307 // where one stores the even registers and the other stores the odd registers. 2308 2309 // Form the QQQQ REG_SEQUENCE. 2310 SDValue V0 = N->getOperand(Vec0Idx + 0); 2311 SDValue V1 = N->getOperand(Vec0Idx + 1); 2312 SDValue V2 = N->getOperand(Vec0Idx + 2); 2313 SDValue V3 = (NumVecs == 3) 2314 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2315 : N->getOperand(Vec0Idx + 3); 2316 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2317 2318 // Store the even D registers. This is always an updating store, so that it 2319 // provides the address to the second store for the odd subregs. 2320 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2321 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2322 MemAddr.getValueType(), 2323 MVT::Other, OpsA); 2324 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2325 Chain = SDValue(VStA, 1); 2326 2327 // Store the odd D registers. 2328 Ops.push_back(SDValue(VStA, 0)); 2329 Ops.push_back(Align); 2330 if (isUpdating) { 2331 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2332 assert(isa<ConstantSDNode>(Inc.getNode()) && 2333 "only constant post-increment update allowed for VST3/4"); 2334 (void)Inc; 2335 Ops.push_back(Reg0); 2336 } 2337 Ops.push_back(RegSeq); 2338 Ops.push_back(Pred); 2339 Ops.push_back(Reg0); 2340 Ops.push_back(Chain); 2341 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2342 Ops); 2343 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2344 ReplaceNode(N, VStB); 2345 } 2346 2347 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2348 unsigned NumVecs, 2349 const uint16_t *DOpcodes, 2350 const uint16_t *QOpcodes) { 2351 assert(Subtarget->hasNEON()); 2352 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2353 SDLoc dl(N); 2354 2355 SDValue MemAddr, Align; 2356 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2357 // nodes are not intrinsics. 2358 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2359 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2360 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2361 return; 2362 2363 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2364 2365 SDValue Chain = N->getOperand(0); 2366 unsigned Lane = 2367 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2368 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2369 bool is64BitVector = VT.is64BitVector(); 2370 2371 unsigned Alignment = 0; 2372 if (NumVecs != 3) { 2373 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2374 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2375 if (Alignment > NumBytes) 2376 Alignment = NumBytes; 2377 if (Alignment < 8 && Alignment < NumBytes) 2378 Alignment = 0; 2379 // Alignment must be a power of two; make sure of that. 2380 Alignment = (Alignment & -Alignment); 2381 if (Alignment == 1) 2382 Alignment = 0; 2383 } 2384 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2385 2386 unsigned OpcodeIndex; 2387 switch (VT.getSimpleVT().SimpleTy) { 2388 default: llvm_unreachable("unhandled vld/vst lane type"); 2389 // Double-register operations: 2390 case MVT::v8i8: OpcodeIndex = 0; break; 2391 case MVT::v4f16: 2392 case MVT::v4i16: OpcodeIndex = 1; break; 2393 case MVT::v2f32: 2394 case MVT::v2i32: OpcodeIndex = 2; break; 2395 // Quad-register operations: 2396 case MVT::v8f16: 2397 case MVT::v8i16: OpcodeIndex = 0; break; 2398 case MVT::v4f32: 2399 case MVT::v4i32: OpcodeIndex = 1; break; 2400 } 2401 2402 std::vector<EVT> ResTys; 2403 if (IsLoad) { 2404 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2405 if (!is64BitVector) 2406 ResTyElts *= 2; 2407 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2408 MVT::i64, ResTyElts)); 2409 } 2410 if (isUpdating) 2411 ResTys.push_back(MVT::i32); 2412 ResTys.push_back(MVT::Other); 2413 2414 SDValue Pred = getAL(CurDAG, dl); 2415 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2416 2417 SmallVector<SDValue, 8> Ops; 2418 Ops.push_back(MemAddr); 2419 Ops.push_back(Align); 2420 if (isUpdating) { 2421 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2422 bool IsImmUpdate = 2423 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2424 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2425 } 2426 2427 SDValue SuperReg; 2428 SDValue V0 = N->getOperand(Vec0Idx + 0); 2429 SDValue V1 = N->getOperand(Vec0Idx + 1); 2430 if (NumVecs == 2) { 2431 if (is64BitVector) 2432 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2433 else 2434 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2435 } else { 2436 SDValue V2 = N->getOperand(Vec0Idx + 2); 2437 SDValue V3 = (NumVecs == 3) 2438 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2439 : N->getOperand(Vec0Idx + 3); 2440 if (is64BitVector) 2441 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2442 else 2443 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2444 } 2445 Ops.push_back(SuperReg); 2446 Ops.push_back(getI32Imm(Lane, dl)); 2447 Ops.push_back(Pred); 2448 Ops.push_back(Reg0); 2449 Ops.push_back(Chain); 2450 2451 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2452 QOpcodes[OpcodeIndex]); 2453 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2454 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2455 if (!IsLoad) { 2456 ReplaceNode(N, VLdLn); 2457 return; 2458 } 2459 2460 // Extract the subregisters. 2461 SuperReg = SDValue(VLdLn, 0); 2462 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2463 ARM::qsub_3 == ARM::qsub_0 + 3, 2464 "Unexpected subreg numbering"); 2465 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2466 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2467 ReplaceUses(SDValue(N, Vec), 2468 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2469 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2470 if (isUpdating) 2471 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2472 CurDAG->RemoveDeadNode(N); 2473 } 2474 2475 template <typename SDValueVector> 2476 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2477 SDValue PredicateMask) { 2478 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2479 Ops.push_back(PredicateMask); 2480 } 2481 2482 template <typename SDValueVector> 2483 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2484 SDValue PredicateMask, 2485 SDValue Inactive) { 2486 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2487 Ops.push_back(PredicateMask); 2488 Ops.push_back(Inactive); 2489 } 2490 2491 template <typename SDValueVector> 2492 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2493 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2494 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2495 } 2496 2497 template <typename SDValueVector> 2498 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2499 EVT InactiveTy) { 2500 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2501 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2502 Ops.push_back(SDValue( 2503 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2504 } 2505 2506 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2507 bool Predicated) { 2508 SDLoc Loc(N); 2509 SmallVector<SDValue, 8> Ops; 2510 2511 uint16_t Opcode; 2512 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2513 case 32: 2514 Opcode = Opcodes[0]; 2515 break; 2516 case 64: 2517 Opcode = Opcodes[1]; 2518 break; 2519 default: 2520 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2521 } 2522 2523 Ops.push_back(N->getOperand(2)); // vector of base addresses 2524 2525 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2526 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2527 2528 if (Predicated) 2529 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2530 else 2531 AddEmptyMVEPredicateToOps(Ops, Loc); 2532 2533 Ops.push_back(N->getOperand(0)); // chain 2534 2535 SmallVector<EVT, 8> VTs; 2536 VTs.push_back(N->getValueType(1)); 2537 VTs.push_back(N->getValueType(0)); 2538 VTs.push_back(N->getValueType(2)); 2539 2540 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2541 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2542 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2543 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2544 CurDAG->RemoveDeadNode(N); 2545 } 2546 2547 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2548 bool Immediate, 2549 bool HasSaturationOperand) { 2550 SDLoc Loc(N); 2551 SmallVector<SDValue, 8> Ops; 2552 2553 // Two 32-bit halves of the value to be shifted 2554 Ops.push_back(N->getOperand(1)); 2555 Ops.push_back(N->getOperand(2)); 2556 2557 // The shift count 2558 if (Immediate) { 2559 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2560 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2561 } else { 2562 Ops.push_back(N->getOperand(3)); 2563 } 2564 2565 // The immediate saturation operand, if any 2566 if (HasSaturationOperand) { 2567 int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); 2568 int SatBit = (SatOp == 64 ? 0 : 1); 2569 Ops.push_back(getI32Imm(SatBit, Loc)); 2570 } 2571 2572 // MVE scalar shifts are IT-predicable, so include the standard 2573 // predicate arguments. 2574 Ops.push_back(getAL(CurDAG, Loc)); 2575 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2576 2577 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2578 } 2579 2580 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2581 uint16_t OpcodeWithNoCarry, 2582 bool Add, bool Predicated) { 2583 SDLoc Loc(N); 2584 SmallVector<SDValue, 8> Ops; 2585 uint16_t Opcode; 2586 2587 unsigned FirstInputOp = Predicated ? 2 : 1; 2588 2589 // Two input vectors and the input carry flag 2590 Ops.push_back(N->getOperand(FirstInputOp)); 2591 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2592 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2593 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2594 uint32_t CarryMask = 1 << 29; 2595 uint32_t CarryExpected = Add ? 0 : CarryMask; 2596 if (CarryInConstant && 2597 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2598 Opcode = OpcodeWithNoCarry; 2599 } else { 2600 Ops.push_back(CarryIn); 2601 Opcode = OpcodeWithCarry; 2602 } 2603 2604 if (Predicated) 2605 AddMVEPredicateToOps(Ops, Loc, 2606 N->getOperand(FirstInputOp + 3), // predicate 2607 N->getOperand(FirstInputOp - 1)); // inactive 2608 else 2609 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2610 2611 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2612 } 2613 2614 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2615 SDLoc Loc(N); 2616 SmallVector<SDValue, 8> Ops; 2617 2618 // One vector input, followed by a 32-bit word of bits to shift in 2619 // and then an immediate shift count 2620 Ops.push_back(N->getOperand(1)); 2621 Ops.push_back(N->getOperand(2)); 2622 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2623 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2624 2625 if (Predicated) 2626 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2627 else 2628 AddEmptyMVEPredicateToOps(Ops, Loc); 2629 2630 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops)); 2631 } 2632 2633 static bool SDValueToConstBool(SDValue SDVal) { 2634 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2635 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2636 uint64_t Value = SDValConstant->getZExtValue(); 2637 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2638 return Value; 2639 } 2640 2641 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2642 const uint16_t *OpcodesS, 2643 const uint16_t *OpcodesU, 2644 size_t Stride, size_t TySize) { 2645 assert(TySize < Stride && "Invalid TySize"); 2646 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2647 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2648 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2649 if (IsUnsigned) { 2650 assert(!IsSub && 2651 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2652 assert(!IsExchange && 2653 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2654 } 2655 2656 auto OpIsZero = [N](size_t OpNo) { 2657 if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo))) 2658 if (OpConst->getZExtValue() == 0) 2659 return true; 2660 return false; 2661 }; 2662 2663 // If the input accumulator value is not zero, select an instruction with 2664 // accumulator, otherwise select an instruction without accumulator 2665 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2666 2667 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2668 if (IsSub) 2669 Opcodes += 4 * Stride; 2670 if (IsExchange) 2671 Opcodes += 2 * Stride; 2672 if (IsAccum) 2673 Opcodes += Stride; 2674 uint16_t Opcode = Opcodes[TySize]; 2675 2676 SDLoc Loc(N); 2677 SmallVector<SDValue, 8> Ops; 2678 // Push the accumulator operands, if they are used 2679 if (IsAccum) { 2680 Ops.push_back(N->getOperand(4)); 2681 Ops.push_back(N->getOperand(5)); 2682 } 2683 // Push the two vector operands 2684 Ops.push_back(N->getOperand(6)); 2685 Ops.push_back(N->getOperand(7)); 2686 2687 if (Predicated) 2688 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2689 else 2690 AddEmptyMVEPredicateToOps(Ops, Loc); 2691 2692 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2693 } 2694 2695 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2696 const uint16_t *OpcodesS, 2697 const uint16_t *OpcodesU) { 2698 EVT VecTy = N->getOperand(6).getValueType(); 2699 size_t SizeIndex; 2700 switch (VecTy.getVectorElementType().getSizeInBits()) { 2701 case 16: 2702 SizeIndex = 0; 2703 break; 2704 case 32: 2705 SizeIndex = 1; 2706 break; 2707 default: 2708 llvm_unreachable("bad vector element size"); 2709 } 2710 2711 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2712 } 2713 2714 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2715 const uint16_t *OpcodesS, 2716 const uint16_t *OpcodesU) { 2717 assert( 2718 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2719 32 && 2720 "bad vector element size"); 2721 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2722 } 2723 2724 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2725 const uint16_t *const *Opcodes, 2726 bool HasWriteback) { 2727 EVT VT = N->getValueType(0); 2728 SDLoc Loc(N); 2729 2730 const uint16_t *OurOpcodes; 2731 switch (VT.getVectorElementType().getSizeInBits()) { 2732 case 8: 2733 OurOpcodes = Opcodes[0]; 2734 break; 2735 case 16: 2736 OurOpcodes = Opcodes[1]; 2737 break; 2738 case 32: 2739 OurOpcodes = Opcodes[2]; 2740 break; 2741 default: 2742 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2743 } 2744 2745 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2746 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2747 unsigned PtrOperand = HasWriteback ? 1 : 2; 2748 2749 auto Data = SDValue( 2750 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2751 SDValue Chain = N->getOperand(0); 2752 // Add a MVE_VLDn instruction for each Vec, except the last 2753 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2754 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2755 auto LoadInst = 2756 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2757 Data = SDValue(LoadInst, 0); 2758 Chain = SDValue(LoadInst, 1); 2759 } 2760 // The last may need a writeback on it 2761 if (HasWriteback) 2762 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2763 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2764 auto LoadInst = 2765 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2766 2767 unsigned i; 2768 for (i = 0; i < NumVecs; i++) 2769 ReplaceUses(SDValue(N, i), 2770 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2771 SDValue(LoadInst, 0))); 2772 if (HasWriteback) 2773 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2774 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2775 CurDAG->RemoveDeadNode(N); 2776 } 2777 2778 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2779 bool Wrapping, bool Predicated) { 2780 EVT VT = N->getValueType(0); 2781 SDLoc Loc(N); 2782 2783 uint16_t Opcode; 2784 switch (VT.getScalarSizeInBits()) { 2785 case 8: 2786 Opcode = Opcodes[0]; 2787 break; 2788 case 16: 2789 Opcode = Opcodes[1]; 2790 break; 2791 case 32: 2792 Opcode = Opcodes[2]; 2793 break; 2794 default: 2795 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2796 } 2797 2798 SmallVector<SDValue, 8> Ops; 2799 unsigned OpIdx = 1; 2800 2801 SDValue Inactive; 2802 if (Predicated) 2803 Inactive = N->getOperand(OpIdx++); 2804 2805 Ops.push_back(N->getOperand(OpIdx++)); // base 2806 if (Wrapping) 2807 Ops.push_back(N->getOperand(OpIdx++)); // limit 2808 2809 SDValue ImmOp = N->getOperand(OpIdx++); // step 2810 int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue(); 2811 Ops.push_back(getI32Imm(ImmValue, Loc)); 2812 2813 if (Predicated) 2814 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2815 else 2816 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2817 2818 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2819 } 2820 2821 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2822 size_t NumExtraOps, bool HasAccum) { 2823 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2824 SDLoc Loc(N); 2825 SmallVector<SDValue, 8> Ops; 2826 2827 unsigned OpIdx = 1; 2828 2829 // Convert and append the immediate operand designating the coprocessor. 2830 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2831 uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue(); 2832 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2833 2834 // For accumulating variants copy the low and high order parts of the 2835 // accumulator into a register pair and add it to the operand vector. 2836 if (HasAccum) { 2837 SDValue AccLo = N->getOperand(OpIdx++); 2838 SDValue AccHi = N->getOperand(OpIdx++); 2839 if (IsBigEndian) 2840 std::swap(AccLo, AccHi); 2841 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2842 } 2843 2844 // Copy extra operands as-is. 2845 for (size_t I = 0; I < NumExtraOps; I++) 2846 Ops.push_back(N->getOperand(OpIdx++)); 2847 2848 // Convert and append the immediate operand 2849 SDValue Imm = N->getOperand(OpIdx); 2850 uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue(); 2851 Ops.push_back(getI32Imm(ImmVal, Loc)); 2852 2853 // Accumulating variants are IT-predicable, add predicate operands. 2854 if (HasAccum) { 2855 SDValue Pred = getAL(CurDAG, Loc); 2856 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2857 Ops.push_back(Pred); 2858 Ops.push_back(PredReg); 2859 } 2860 2861 // Create the CDE intruction 2862 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2863 SDValue ResultPair = SDValue(InstrNode, 0); 2864 2865 // The original intrinsic had two outputs, and the output of the dual-register 2866 // CDE instruction is a register pair. We need to extract the two subregisters 2867 // and replace all uses of the original outputs with the extracted 2868 // subregisters. 2869 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2870 if (IsBigEndian) 2871 std::swap(SubRegs[0], SubRegs[1]); 2872 2873 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2874 if (SDValue(N, ResIdx).use_empty()) 2875 continue; 2876 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2877 MVT::i32, ResultPair); 2878 ReplaceUses(SDValue(N, ResIdx), SubReg); 2879 } 2880 2881 CurDAG->RemoveDeadNode(N); 2882 } 2883 2884 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2885 bool isUpdating, unsigned NumVecs, 2886 const uint16_t *DOpcodes, 2887 const uint16_t *QOpcodes0, 2888 const uint16_t *QOpcodes1) { 2889 assert(Subtarget->hasNEON()); 2890 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2891 SDLoc dl(N); 2892 2893 SDValue MemAddr, Align; 2894 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2895 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2896 return; 2897 2898 SDValue Chain = N->getOperand(0); 2899 EVT VT = N->getValueType(0); 2900 bool is64BitVector = VT.is64BitVector(); 2901 2902 unsigned Alignment = 0; 2903 if (NumVecs != 3) { 2904 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2905 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2906 if (Alignment > NumBytes) 2907 Alignment = NumBytes; 2908 if (Alignment < 8 && Alignment < NumBytes) 2909 Alignment = 0; 2910 // Alignment must be a power of two; make sure of that. 2911 Alignment = (Alignment & -Alignment); 2912 if (Alignment == 1) 2913 Alignment = 0; 2914 } 2915 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2916 2917 unsigned OpcodeIndex; 2918 switch (VT.getSimpleVT().SimpleTy) { 2919 default: llvm_unreachable("unhandled vld-dup type"); 2920 case MVT::v8i8: 2921 case MVT::v16i8: OpcodeIndex = 0; break; 2922 case MVT::v4i16: 2923 case MVT::v8i16: 2924 case MVT::v4f16: 2925 case MVT::v8f16: 2926 OpcodeIndex = 1; break; 2927 case MVT::v2f32: 2928 case MVT::v2i32: 2929 case MVT::v4f32: 2930 case MVT::v4i32: OpcodeIndex = 2; break; 2931 case MVT::v1f64: 2932 case MVT::v1i64: OpcodeIndex = 3; break; 2933 } 2934 2935 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2936 if (!is64BitVector) 2937 ResTyElts *= 2; 2938 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2939 2940 std::vector<EVT> ResTys; 2941 ResTys.push_back(ResTy); 2942 if (isUpdating) 2943 ResTys.push_back(MVT::i32); 2944 ResTys.push_back(MVT::Other); 2945 2946 SDValue Pred = getAL(CurDAG, dl); 2947 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2948 2949 SDNode *VLdDup; 2950 if (is64BitVector || NumVecs == 1) { 2951 SmallVector<SDValue, 6> Ops; 2952 Ops.push_back(MemAddr); 2953 Ops.push_back(Align); 2954 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] : 2955 QOpcodes0[OpcodeIndex]; 2956 if (isUpdating) { 2957 // fixed-stride update instructions don't have an explicit writeback 2958 // operand. It's implicit in the opcode itself. 2959 SDValue Inc = N->getOperand(2); 2960 bool IsImmUpdate = 2961 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2962 if (NumVecs <= 2 && !IsImmUpdate) 2963 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2964 if (!IsImmUpdate) 2965 Ops.push_back(Inc); 2966 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2967 else if (NumVecs > 2) 2968 Ops.push_back(Reg0); 2969 } 2970 Ops.push_back(Pred); 2971 Ops.push_back(Reg0); 2972 Ops.push_back(Chain); 2973 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2974 } else if (NumVecs == 2) { 2975 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain }; 2976 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2977 dl, ResTys, OpsA); 2978 2979 Chain = SDValue(VLdA, 1); 2980 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain }; 2981 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2982 } else { 2983 SDValue ImplDef = 2984 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2985 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain }; 2986 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2987 dl, ResTys, OpsA); 2988 2989 SDValue SuperReg = SDValue(VLdA, 0); 2990 Chain = SDValue(VLdA, 1); 2991 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain }; 2992 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2993 } 2994 2995 // Transfer memoperands. 2996 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2997 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 2998 2999 // Extract the subregisters. 3000 if (NumVecs == 1) { 3001 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3002 } else { 3003 SDValue SuperReg = SDValue(VLdDup, 0); 3004 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3005 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3006 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3007 ReplaceUses(SDValue(N, Vec), 3008 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3009 } 3010 } 3011 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3012 if (isUpdating) 3013 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3014 CurDAG->RemoveDeadNode(N); 3015 } 3016 3017 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3018 if (!Subtarget->hasV6T2Ops()) 3019 return false; 3020 3021 unsigned Opc = isSigned 3022 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3023 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3024 SDLoc dl(N); 3025 3026 // For unsigned extracts, check for a shift right and mask 3027 unsigned And_imm = 0; 3028 if (N->getOpcode() == ISD::AND) { 3029 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3030 3031 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3032 if (And_imm & (And_imm + 1)) 3033 return false; 3034 3035 unsigned Srl_imm = 0; 3036 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3037 Srl_imm)) { 3038 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3039 3040 // Mask off the unnecessary bits of the AND immediate; normally 3041 // DAGCombine will do this, but that might not happen if 3042 // targetShrinkDemandedConstant chooses a different immediate. 3043 And_imm &= -1U >> Srl_imm; 3044 3045 // Note: The width operand is encoded as width-1. 3046 unsigned Width = countTrailingOnes(And_imm) - 1; 3047 unsigned LSB = Srl_imm; 3048 3049 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3050 3051 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3052 // It's cheaper to use a right shift to extract the top bits. 3053 if (Subtarget->isThumb()) { 3054 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3055 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3056 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3057 getAL(CurDAG, dl), Reg0, Reg0 }; 3058 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3059 return true; 3060 } 3061 3062 // ARM models shift instructions as MOVsi with shifter operand. 3063 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3064 SDValue ShOpc = 3065 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3066 MVT::i32); 3067 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3068 getAL(CurDAG, dl), Reg0, Reg0 }; 3069 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3070 return true; 3071 } 3072 3073 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3074 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3075 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3076 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3077 getAL(CurDAG, dl), Reg0 }; 3078 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3079 return true; 3080 } 3081 } 3082 return false; 3083 } 3084 3085 // Otherwise, we're looking for a shift of a shift 3086 unsigned Shl_imm = 0; 3087 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3088 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3089 unsigned Srl_imm = 0; 3090 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3091 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3092 // Note: The width operand is encoded as width-1. 3093 unsigned Width = 32 - Srl_imm - 1; 3094 int LSB = Srl_imm - Shl_imm; 3095 if (LSB < 0) 3096 return false; 3097 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3098 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3099 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3100 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3101 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3102 getAL(CurDAG, dl), Reg0 }; 3103 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3104 return true; 3105 } 3106 } 3107 3108 // Or we are looking for a shift of an and, with a mask operand 3109 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3110 isShiftedMask_32(And_imm)) { 3111 unsigned Srl_imm = 0; 3112 unsigned LSB = countTrailingZeros(And_imm); 3113 // Shift must be the same as the ands lsb 3114 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3115 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3116 unsigned MSB = 31 - countLeadingZeros(And_imm); 3117 // Note: The width operand is encoded as width-1. 3118 unsigned Width = MSB - LSB; 3119 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3120 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3121 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3122 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3123 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3124 getAL(CurDAG, dl), Reg0 }; 3125 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3126 return true; 3127 } 3128 } 3129 3130 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3131 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3132 unsigned LSB = 0; 3133 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3134 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3135 return false; 3136 3137 if (LSB + Width > 32) 3138 return false; 3139 3140 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3141 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3142 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3143 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3144 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3145 getAL(CurDAG, dl), Reg0 }; 3146 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3147 return true; 3148 } 3149 3150 return false; 3151 } 3152 3153 /// Target-specific DAG combining for ISD::XOR. 3154 /// Target-independent combining lowers SELECT_CC nodes of the form 3155 /// select_cc setg[ge] X, 0, X, -X 3156 /// select_cc setgt X, -1, X, -X 3157 /// select_cc setl[te] X, 0, -X, X 3158 /// select_cc setlt X, 1, -X, X 3159 /// which represent Integer ABS into: 3160 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 3161 /// ARM instruction selection detects the latter and matches it to 3162 /// ARM::ABS or ARM::t2ABS machine node. 3163 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3164 SDValue XORSrc0 = N->getOperand(0); 3165 SDValue XORSrc1 = N->getOperand(1); 3166 EVT VT = N->getValueType(0); 3167 3168 if (Subtarget->isThumb1Only()) 3169 return false; 3170 3171 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 3172 return false; 3173 3174 SDValue ADDSrc0 = XORSrc0.getOperand(0); 3175 SDValue ADDSrc1 = XORSrc0.getOperand(1); 3176 SDValue SRASrc0 = XORSrc1.getOperand(0); 3177 SDValue SRASrc1 = XORSrc1.getOperand(1); 3178 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3179 EVT XType = SRASrc0.getValueType(); 3180 unsigned Size = XType.getSizeInBits() - 1; 3181 3182 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 3183 XType.isInteger() && SRAConstant != nullptr && 3184 Size == SRAConstant->getZExtValue()) { 3185 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3186 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 3187 return true; 3188 } 3189 3190 return false; 3191 } 3192 3193 /// We've got special pseudo-instructions for these 3194 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3195 unsigned Opcode; 3196 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3197 if (MemTy == MVT::i8) 3198 Opcode = ARM::CMP_SWAP_8; 3199 else if (MemTy == MVT::i16) 3200 Opcode = ARM::CMP_SWAP_16; 3201 else if (MemTy == MVT::i32) 3202 Opcode = ARM::CMP_SWAP_32; 3203 else 3204 llvm_unreachable("Unknown AtomicCmpSwap type"); 3205 3206 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3207 N->getOperand(0)}; 3208 SDNode *CmpSwap = CurDAG->getMachineNode( 3209 Opcode, SDLoc(N), 3210 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3211 3212 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3213 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3214 3215 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3216 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3217 CurDAG->RemoveDeadNode(N); 3218 } 3219 3220 static Optional<std::pair<unsigned, unsigned>> 3221 getContiguousRangeOfSetBits(const APInt &A) { 3222 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 3223 unsigned LastOne = A.countTrailingZeros(); 3224 if (A.countPopulation() != (FirstOne - LastOne + 1)) 3225 return Optional<std::pair<unsigned,unsigned>>(); 3226 return std::make_pair(FirstOne, LastOne); 3227 } 3228 3229 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3230 assert(N->getOpcode() == ARMISD::CMPZ); 3231 SwitchEQNEToPLMI = false; 3232 3233 if (!Subtarget->isThumb()) 3234 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3235 // LSR don't exist as standalone instructions - they need the barrel shifter. 3236 return; 3237 3238 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3239 SDValue And = N->getOperand(0); 3240 if (!And->hasOneUse()) 3241 return; 3242 3243 SDValue Zero = N->getOperand(1); 3244 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 3245 And->getOpcode() != ISD::AND) 3246 return; 3247 SDValue X = And.getOperand(0); 3248 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3249 3250 if (!C) 3251 return; 3252 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3253 if (!Range) 3254 return; 3255 3256 // There are several ways to lower this: 3257 SDNode *NewN; 3258 SDLoc dl(N); 3259 3260 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3261 if (Subtarget->isThumb2()) { 3262 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3263 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3264 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3265 CurDAG->getRegister(0, MVT::i32) }; 3266 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3267 } else { 3268 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3269 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3270 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3271 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3272 } 3273 }; 3274 3275 if (Range->second == 0) { 3276 // 1. Mask includes the LSB -> Simply shift the top N bits off 3277 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3278 ReplaceNode(And.getNode(), NewN); 3279 } else if (Range->first == 31) { 3280 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3281 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3282 ReplaceNode(And.getNode(), NewN); 3283 } else if (Range->first == Range->second) { 3284 // 3. Only one bit is set. We can shift this into the sign bit and use a 3285 // PL/MI comparison. 3286 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3287 ReplaceNode(And.getNode(), NewN); 3288 3289 SwitchEQNEToPLMI = true; 3290 } else if (!Subtarget->hasV6T2Ops()) { 3291 // 4. Do a double shift to clear bottom and top bits, but only in 3292 // thumb-1 mode as in thumb-2 we can use UBFX. 3293 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3294 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3295 Range->second + (31 - Range->first)); 3296 ReplaceNode(And.getNode(), NewN); 3297 } 3298 3299 } 3300 3301 void ARMDAGToDAGISel::Select(SDNode *N) { 3302 SDLoc dl(N); 3303 3304 if (N->isMachineOpcode()) { 3305 N->setNodeId(-1); 3306 return; // Already selected. 3307 } 3308 3309 switch (N->getOpcode()) { 3310 default: break; 3311 case ISD::STORE: { 3312 // For Thumb1, match an sp-relative store in C++. This is a little 3313 // unfortunate, but I don't think I can make the chain check work 3314 // otherwise. (The chain of the store has to be the same as the chain 3315 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3316 // a direct reference to "SP".) 3317 // 3318 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3319 // a different addressing mode from other four-byte stores. 3320 // 3321 // This pattern usually comes up with call arguments. 3322 StoreSDNode *ST = cast<StoreSDNode>(N); 3323 SDValue Ptr = ST->getBasePtr(); 3324 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3325 int RHSC = 0; 3326 if (Ptr.getOpcode() == ISD::ADD && 3327 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3328 Ptr = Ptr.getOperand(0); 3329 3330 if (Ptr.getOpcode() == ISD::CopyFromReg && 3331 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3332 Ptr.getOperand(0) == ST->getChain()) { 3333 SDValue Ops[] = {ST->getValue(), 3334 CurDAG->getRegister(ARM::SP, MVT::i32), 3335 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3336 getAL(CurDAG, dl), 3337 CurDAG->getRegister(0, MVT::i32), 3338 ST->getChain()}; 3339 MachineSDNode *ResNode = 3340 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3341 MachineMemOperand *MemOp = ST->getMemOperand(); 3342 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3343 ReplaceNode(N, ResNode); 3344 return; 3345 } 3346 } 3347 break; 3348 } 3349 case ISD::WRITE_REGISTER: 3350 if (tryWriteRegister(N)) 3351 return; 3352 break; 3353 case ISD::READ_REGISTER: 3354 if (tryReadRegister(N)) 3355 return; 3356 break; 3357 case ISD::INLINEASM: 3358 case ISD::INLINEASM_BR: 3359 if (tryInlineAsm(N)) 3360 return; 3361 break; 3362 case ISD::XOR: 3363 // Select special operations if XOR node forms integer ABS pattern 3364 if (tryABSOp(N)) 3365 return; 3366 // Other cases are autogenerated. 3367 break; 3368 case ISD::Constant: { 3369 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 3370 // If we can't materialize the constant we need to use a literal pool 3371 if (ConstantMaterializationCost(Val, Subtarget) > 2) { 3372 SDValue CPIdx = CurDAG->getTargetConstantPool( 3373 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3374 TLI->getPointerTy(CurDAG->getDataLayout())); 3375 3376 SDNode *ResNode; 3377 if (Subtarget->isThumb()) { 3378 SDValue Ops[] = { 3379 CPIdx, 3380 getAL(CurDAG, dl), 3381 CurDAG->getRegister(0, MVT::i32), 3382 CurDAG->getEntryNode() 3383 }; 3384 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3385 Ops); 3386 } else { 3387 SDValue Ops[] = { 3388 CPIdx, 3389 CurDAG->getTargetConstant(0, dl, MVT::i32), 3390 getAL(CurDAG, dl), 3391 CurDAG->getRegister(0, MVT::i32), 3392 CurDAG->getEntryNode() 3393 }; 3394 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3395 Ops); 3396 } 3397 // Annotate the Node with memory operand information so that MachineInstr 3398 // queries work properly. This e.g. gives the register allocation the 3399 // required information for rematerialization. 3400 MachineFunction& MF = CurDAG->getMachineFunction(); 3401 MachineMemOperand *MemOp = 3402 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3403 MachineMemOperand::MOLoad, 4, Align(4)); 3404 3405 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3406 3407 ReplaceNode(N, ResNode); 3408 return; 3409 } 3410 3411 // Other cases are autogenerated. 3412 break; 3413 } 3414 case ISD::FrameIndex: { 3415 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3416 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3417 SDValue TFI = CurDAG->getTargetFrameIndex( 3418 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3419 if (Subtarget->isThumb1Only()) { 3420 // Set the alignment of the frame object to 4, to avoid having to generate 3421 // more than one ADD 3422 MachineFrameInfo &MFI = MF->getFrameInfo(); 3423 if (MFI.getObjectAlign(FI) < Align(4)) 3424 MFI.setObjectAlignment(FI, Align(4)); 3425 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3426 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3427 return; 3428 } else { 3429 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3430 ARM::t2ADDri : ARM::ADDri); 3431 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3432 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3433 CurDAG->getRegister(0, MVT::i32) }; 3434 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3435 return; 3436 } 3437 } 3438 case ISD::SRL: 3439 if (tryV6T2BitfieldExtractOp(N, false)) 3440 return; 3441 break; 3442 case ISD::SIGN_EXTEND_INREG: 3443 case ISD::SRA: 3444 if (tryV6T2BitfieldExtractOp(N, true)) 3445 return; 3446 break; 3447 case ISD::MUL: 3448 if (Subtarget->isThumb1Only()) 3449 break; 3450 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3451 unsigned RHSV = C->getZExtValue(); 3452 if (!RHSV) break; 3453 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3454 unsigned ShImm = Log2_32(RHSV-1); 3455 if (ShImm >= 32) 3456 break; 3457 SDValue V = N->getOperand(0); 3458 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3459 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3460 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3461 if (Subtarget->isThumb()) { 3462 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3463 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3464 return; 3465 } else { 3466 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3467 Reg0 }; 3468 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3469 return; 3470 } 3471 } 3472 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3473 unsigned ShImm = Log2_32(RHSV+1); 3474 if (ShImm >= 32) 3475 break; 3476 SDValue V = N->getOperand(0); 3477 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3478 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3479 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3480 if (Subtarget->isThumb()) { 3481 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3482 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3483 return; 3484 } else { 3485 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3486 Reg0 }; 3487 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3488 return; 3489 } 3490 } 3491 } 3492 break; 3493 case ISD::AND: { 3494 // Check for unsigned bitfield extract 3495 if (tryV6T2BitfieldExtractOp(N, false)) 3496 return; 3497 3498 // If an immediate is used in an AND node, it is possible that the immediate 3499 // can be more optimally materialized when negated. If this is the case we 3500 // can negate the immediate and use a BIC instead. 3501 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3502 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3503 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3504 3505 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3506 // immediate can be negated and fit in the immediate operand of 3507 // a t2BIC, don't do any manual transform here as this can be 3508 // handled by the generic ISel machinery. 3509 bool PreferImmediateEncoding = 3510 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3511 if (!PreferImmediateEncoding && 3512 ConstantMaterializationCost(Imm, Subtarget) > 3513 ConstantMaterializationCost(~Imm, Subtarget)) { 3514 // The current immediate costs more to materialize than a negated 3515 // immediate, so negate the immediate and use a BIC. 3516 SDValue NewImm = 3517 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 3518 // If the new constant didn't exist before, reposition it in the topological 3519 // ordering so it is just before N. Otherwise, don't touch its location. 3520 if (NewImm->getNodeId() == -1) 3521 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3522 3523 if (!Subtarget->hasThumb2()) { 3524 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3525 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3526 CurDAG->getRegister(0, MVT::i32)}; 3527 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3528 return; 3529 } else { 3530 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3531 CurDAG->getRegister(0, MVT::i32), 3532 CurDAG->getRegister(0, MVT::i32)}; 3533 ReplaceNode(N, 3534 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3535 return; 3536 } 3537 } 3538 } 3539 3540 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3541 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3542 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3543 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3544 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3545 EVT VT = N->getValueType(0); 3546 if (VT != MVT::i32) 3547 break; 3548 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3549 ? ARM::t2MOVTi16 3550 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3551 if (!Opc) 3552 break; 3553 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3554 N1C = dyn_cast<ConstantSDNode>(N1); 3555 if (!N1C) 3556 break; 3557 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3558 SDValue N2 = N0.getOperand(1); 3559 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3560 if (!N2C) 3561 break; 3562 unsigned N1CVal = N1C->getZExtValue(); 3563 unsigned N2CVal = N2C->getZExtValue(); 3564 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3565 (N1CVal & 0xffffU) == 0xffffU && 3566 (N2CVal & 0xffffU) == 0x0U) { 3567 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3568 dl, MVT::i32); 3569 SDValue Ops[] = { N0.getOperand(0), Imm16, 3570 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3571 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3572 return; 3573 } 3574 } 3575 3576 break; 3577 } 3578 case ARMISD::UMAAL: { 3579 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3580 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3581 N->getOperand(2), N->getOperand(3), 3582 getAL(CurDAG, dl), 3583 CurDAG->getRegister(0, MVT::i32) }; 3584 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3585 return; 3586 } 3587 case ARMISD::UMLAL:{ 3588 if (Subtarget->isThumb()) { 3589 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3590 N->getOperand(3), getAL(CurDAG, dl), 3591 CurDAG->getRegister(0, MVT::i32)}; 3592 ReplaceNode( 3593 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3594 return; 3595 }else{ 3596 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3597 N->getOperand(3), getAL(CurDAG, dl), 3598 CurDAG->getRegister(0, MVT::i32), 3599 CurDAG->getRegister(0, MVT::i32) }; 3600 ReplaceNode(N, CurDAG->getMachineNode( 3601 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3602 MVT::i32, MVT::i32, Ops)); 3603 return; 3604 } 3605 } 3606 case ARMISD::SMLAL:{ 3607 if (Subtarget->isThumb()) { 3608 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3609 N->getOperand(3), getAL(CurDAG, dl), 3610 CurDAG->getRegister(0, MVT::i32)}; 3611 ReplaceNode( 3612 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3613 return; 3614 }else{ 3615 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3616 N->getOperand(3), getAL(CurDAG, dl), 3617 CurDAG->getRegister(0, MVT::i32), 3618 CurDAG->getRegister(0, MVT::i32) }; 3619 ReplaceNode(N, CurDAG->getMachineNode( 3620 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3621 MVT::i32, MVT::i32, Ops)); 3622 return; 3623 } 3624 } 3625 case ARMISD::SUBE: { 3626 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3627 break; 3628 // Look for a pattern to match SMMLS 3629 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3630 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3631 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3632 !SDValue(N, 1).use_empty()) 3633 break; 3634 3635 if (Subtarget->isThumb()) 3636 assert(Subtarget->hasThumb2() && 3637 "This pattern should not be generated for Thumb"); 3638 3639 SDValue SmulLoHi = N->getOperand(1); 3640 SDValue Subc = N->getOperand(2); 3641 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 3642 3643 if (!Zero || Zero->getZExtValue() != 0 || 3644 Subc.getOperand(1) != SmulLoHi.getValue(0) || 3645 N->getOperand(1) != SmulLoHi.getValue(1) || 3646 N->getOperand(2) != Subc.getValue(1)) 3647 break; 3648 3649 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3650 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3651 N->getOperand(0), getAL(CurDAG, dl), 3652 CurDAG->getRegister(0, MVT::i32) }; 3653 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3654 return; 3655 } 3656 case ISD::LOAD: { 3657 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3658 return; 3659 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3660 if (tryT2IndexedLoad(N)) 3661 return; 3662 } else if (Subtarget->isThumb()) { 3663 if (tryT1IndexedLoad(N)) 3664 return; 3665 } else if (tryARMIndexedLoad(N)) 3666 return; 3667 // Other cases are autogenerated. 3668 break; 3669 } 3670 case ISD::MLOAD: 3671 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3672 return; 3673 // Other cases are autogenerated. 3674 break; 3675 case ARMISD::WLS: 3676 case ARMISD::LE: { 3677 SDValue Ops[] = { N->getOperand(1), 3678 N->getOperand(2), 3679 N->getOperand(0) }; 3680 unsigned Opc = N->getOpcode() == ARMISD::WLS ? 3681 ARM::t2WhileLoopStart : ARM::t2LoopEnd; 3682 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 3683 ReplaceUses(N, New); 3684 CurDAG->RemoveDeadNode(N); 3685 return; 3686 } 3687 case ARMISD::LDRD: { 3688 if (Subtarget->isThumb2()) 3689 break; // TableGen handles isel in this case. 3690 SDValue Base, RegOffset, ImmOffset; 3691 const SDValue &Chain = N->getOperand(0); 3692 const SDValue &Addr = N->getOperand(1); 3693 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 3694 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 3695 // The register-offset variant of LDRD mandates that the register 3696 // allocated to RegOffset is not reused in any of the remaining operands. 3697 // This restriction is currently not enforced. Therefore emitting this 3698 // variant is explicitly avoided. 3699 Base = Addr; 3700 RegOffset = CurDAG->getRegister(0, MVT::i32); 3701 } 3702 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 3703 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 3704 {MVT::Untyped, MVT::Other}, Ops); 3705 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 3706 SDValue(New, 0)); 3707 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 3708 SDValue(New, 0)); 3709 transferMemOperands(N, New); 3710 ReplaceUses(SDValue(N, 0), Lo); 3711 ReplaceUses(SDValue(N, 1), Hi); 3712 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 3713 CurDAG->RemoveDeadNode(N); 3714 return; 3715 } 3716 case ARMISD::STRD: { 3717 if (Subtarget->isThumb2()) 3718 break; // TableGen handles isel in this case. 3719 SDValue Base, RegOffset, ImmOffset; 3720 const SDValue &Chain = N->getOperand(0); 3721 const SDValue &Addr = N->getOperand(3); 3722 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 3723 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 3724 // The register-offset variant of STRD mandates that the register 3725 // allocated to RegOffset is not reused in any of the remaining operands. 3726 // This restriction is currently not enforced. Therefore emitting this 3727 // variant is explicitly avoided. 3728 Base = Addr; 3729 RegOffset = CurDAG->getRegister(0, MVT::i32); 3730 } 3731 SDNode *RegPair = 3732 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 3733 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 3734 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 3735 transferMemOperands(N, New); 3736 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 3737 CurDAG->RemoveDeadNode(N); 3738 return; 3739 } 3740 case ARMISD::LOOP_DEC: { 3741 SDValue Ops[] = { N->getOperand(1), 3742 N->getOperand(2), 3743 N->getOperand(0) }; 3744 SDNode *Dec = 3745 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3746 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 3747 ReplaceUses(N, Dec); 3748 CurDAG->RemoveDeadNode(N); 3749 return; 3750 } 3751 case ARMISD::BRCOND: { 3752 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3753 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3754 // Pattern complexity = 6 cost = 1 size = 0 3755 3756 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3757 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 3758 // Pattern complexity = 6 cost = 1 size = 0 3759 3760 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3761 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3762 // Pattern complexity = 6 cost = 1 size = 0 3763 3764 unsigned Opc = Subtarget->isThumb() ? 3765 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 3766 SDValue Chain = N->getOperand(0); 3767 SDValue N1 = N->getOperand(1); 3768 SDValue N2 = N->getOperand(2); 3769 SDValue N3 = N->getOperand(3); 3770 SDValue InFlag = N->getOperand(4); 3771 assert(N1.getOpcode() == ISD::BasicBlock); 3772 assert(N2.getOpcode() == ISD::Constant); 3773 assert(N3.getOpcode() == ISD::Register); 3774 3775 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 3776 3777 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3778 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 3779 SDValue Int = InFlag.getOperand(0); 3780 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 3781 3782 // Handle low-overhead loops. 3783 if (ID == Intrinsic::loop_decrement_reg) { 3784 SDValue Elements = Int.getOperand(2); 3785 SDValue Size = CurDAG->getTargetConstant( 3786 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 3787 MVT::i32); 3788 3789 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 3790 SDNode *LoopDec = 3791 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3792 CurDAG->getVTList(MVT::i32, MVT::Other), 3793 Args); 3794 ReplaceUses(Int.getNode(), LoopDec); 3795 3796 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 3797 SDNode *LoopEnd = 3798 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 3799 3800 ReplaceUses(N, LoopEnd); 3801 CurDAG->RemoveDeadNode(N); 3802 CurDAG->RemoveDeadNode(InFlag.getNode()); 3803 CurDAG->RemoveDeadNode(Int.getNode()); 3804 return; 3805 } 3806 } 3807 3808 bool SwitchEQNEToPLMI; 3809 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3810 InFlag = N->getOperand(4); 3811 3812 if (SwitchEQNEToPLMI) { 3813 switch ((ARMCC::CondCodes)CC) { 3814 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3815 case ARMCC::NE: 3816 CC = (unsigned)ARMCC::MI; 3817 break; 3818 case ARMCC::EQ: 3819 CC = (unsigned)ARMCC::PL; 3820 break; 3821 } 3822 } 3823 } 3824 3825 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 3826 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 3827 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 3828 MVT::Glue, Ops); 3829 Chain = SDValue(ResNode, 0); 3830 if (N->getNumValues() == 2) { 3831 InFlag = SDValue(ResNode, 1); 3832 ReplaceUses(SDValue(N, 1), InFlag); 3833 } 3834 ReplaceUses(SDValue(N, 0), 3835 SDValue(Chain.getNode(), Chain.getResNo())); 3836 CurDAG->RemoveDeadNode(N); 3837 return; 3838 } 3839 3840 case ARMISD::CMPZ: { 3841 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 3842 // This allows us to avoid materializing the expensive negative constant. 3843 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 3844 // for its glue output. 3845 SDValue X = N->getOperand(0); 3846 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 3847 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 3848 int64_t Addend = -C->getSExtValue(); 3849 3850 SDNode *Add = nullptr; 3851 // ADDS can be better than CMN if the immediate fits in a 3852 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 3853 // Outside that range we can just use a CMN which is 32-bit but has a 3854 // 12-bit immediate range. 3855 if (Addend < 1<<8) { 3856 if (Subtarget->isThumb2()) { 3857 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3858 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3859 CurDAG->getRegister(0, MVT::i32) }; 3860 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 3861 } else { 3862 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 3863 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 3864 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3865 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3866 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3867 } 3868 } 3869 if (Add) { 3870 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 3871 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 3872 } 3873 } 3874 // Other cases are autogenerated. 3875 break; 3876 } 3877 3878 case ARMISD::CMOV: { 3879 SDValue InFlag = N->getOperand(4); 3880 3881 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3882 bool SwitchEQNEToPLMI; 3883 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3884 3885 if (SwitchEQNEToPLMI) { 3886 SDValue ARMcc = N->getOperand(2); 3887 ARMCC::CondCodes CC = 3888 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 3889 3890 switch (CC) { 3891 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3892 case ARMCC::NE: 3893 CC = ARMCC::MI; 3894 break; 3895 case ARMCC::EQ: 3896 CC = ARMCC::PL; 3897 break; 3898 } 3899 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 3900 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 3901 N->getOperand(3), N->getOperand(4)}; 3902 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 3903 } 3904 3905 } 3906 // Other cases are autogenerated. 3907 break; 3908 } 3909 3910 case ARMISD::VZIP: { 3911 unsigned Opc = 0; 3912 EVT VT = N->getValueType(0); 3913 switch (VT.getSimpleVT().SimpleTy) { 3914 default: return; 3915 case MVT::v8i8: Opc = ARM::VZIPd8; break; 3916 case MVT::v4f16: 3917 case MVT::v4i16: Opc = ARM::VZIPd16; break; 3918 case MVT::v2f32: 3919 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3920 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3921 case MVT::v16i8: Opc = ARM::VZIPq8; break; 3922 case MVT::v8f16: 3923 case MVT::v8i16: Opc = ARM::VZIPq16; break; 3924 case MVT::v4f32: 3925 case MVT::v4i32: Opc = ARM::VZIPq32; break; 3926 } 3927 SDValue Pred = getAL(CurDAG, dl); 3928 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3929 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3930 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3931 return; 3932 } 3933 case ARMISD::VUZP: { 3934 unsigned Opc = 0; 3935 EVT VT = N->getValueType(0); 3936 switch (VT.getSimpleVT().SimpleTy) { 3937 default: return; 3938 case MVT::v8i8: Opc = ARM::VUZPd8; break; 3939 case MVT::v4f16: 3940 case MVT::v4i16: Opc = ARM::VUZPd16; break; 3941 case MVT::v2f32: 3942 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3943 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3944 case MVT::v16i8: Opc = ARM::VUZPq8; break; 3945 case MVT::v8f16: 3946 case MVT::v8i16: Opc = ARM::VUZPq16; break; 3947 case MVT::v4f32: 3948 case MVT::v4i32: Opc = ARM::VUZPq32; break; 3949 } 3950 SDValue Pred = getAL(CurDAG, dl); 3951 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3952 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3953 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3954 return; 3955 } 3956 case ARMISD::VTRN: { 3957 unsigned Opc = 0; 3958 EVT VT = N->getValueType(0); 3959 switch (VT.getSimpleVT().SimpleTy) { 3960 default: return; 3961 case MVT::v8i8: Opc = ARM::VTRNd8; break; 3962 case MVT::v4f16: 3963 case MVT::v4i16: Opc = ARM::VTRNd16; break; 3964 case MVT::v2f32: 3965 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3966 case MVT::v16i8: Opc = ARM::VTRNq8; break; 3967 case MVT::v8f16: 3968 case MVT::v8i16: Opc = ARM::VTRNq16; break; 3969 case MVT::v4f32: 3970 case MVT::v4i32: Opc = ARM::VTRNq32; break; 3971 } 3972 SDValue Pred = getAL(CurDAG, dl); 3973 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3974 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3975 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3976 return; 3977 } 3978 case ARMISD::BUILD_VECTOR: { 3979 EVT VecVT = N->getValueType(0); 3980 EVT EltVT = VecVT.getVectorElementType(); 3981 unsigned NumElts = VecVT.getVectorNumElements(); 3982 if (EltVT == MVT::f64) { 3983 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 3984 ReplaceNode( 3985 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3986 return; 3987 } 3988 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 3989 if (NumElts == 2) { 3990 ReplaceNode( 3991 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3992 return; 3993 } 3994 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 3995 ReplaceNode(N, 3996 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 3997 N->getOperand(2), N->getOperand(3))); 3998 return; 3999 } 4000 4001 case ARMISD::VLD1DUP: { 4002 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4003 ARM::VLD1DUPd32 }; 4004 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4005 ARM::VLD1DUPq32 }; 4006 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4007 return; 4008 } 4009 4010 case ARMISD::VLD2DUP: { 4011 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4012 ARM::VLD2DUPd32 }; 4013 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4014 return; 4015 } 4016 4017 case ARMISD::VLD3DUP: { 4018 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4019 ARM::VLD3DUPd16Pseudo, 4020 ARM::VLD3DUPd32Pseudo }; 4021 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4022 return; 4023 } 4024 4025 case ARMISD::VLD4DUP: { 4026 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4027 ARM::VLD4DUPd16Pseudo, 4028 ARM::VLD4DUPd32Pseudo }; 4029 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4030 return; 4031 } 4032 4033 case ARMISD::VLD1DUP_UPD: { 4034 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4035 ARM::VLD1DUPd16wb_fixed, 4036 ARM::VLD1DUPd32wb_fixed }; 4037 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4038 ARM::VLD1DUPq16wb_fixed, 4039 ARM::VLD1DUPq32wb_fixed }; 4040 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4041 return; 4042 } 4043 4044 case ARMISD::VLD2DUP_UPD: { 4045 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 4046 ARM::VLD2DUPd16wb_fixed, 4047 ARM::VLD2DUPd32wb_fixed }; 4048 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes); 4049 return; 4050 } 4051 4052 case ARMISD::VLD3DUP_UPD: { 4053 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4054 ARM::VLD3DUPd16Pseudo_UPD, 4055 ARM::VLD3DUPd32Pseudo_UPD }; 4056 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes); 4057 return; 4058 } 4059 4060 case ARMISD::VLD4DUP_UPD: { 4061 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4062 ARM::VLD4DUPd16Pseudo_UPD, 4063 ARM::VLD4DUPd32Pseudo_UPD }; 4064 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes); 4065 return; 4066 } 4067 4068 case ARMISD::VLD1_UPD: { 4069 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4070 ARM::VLD1d16wb_fixed, 4071 ARM::VLD1d32wb_fixed, 4072 ARM::VLD1d64wb_fixed }; 4073 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4074 ARM::VLD1q16wb_fixed, 4075 ARM::VLD1q32wb_fixed, 4076 ARM::VLD1q64wb_fixed }; 4077 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4078 return; 4079 } 4080 4081 case ARMISD::VLD2_UPD: { 4082 if (Subtarget->hasNEON()) { 4083 static const uint16_t DOpcodes[] = { 4084 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4085 ARM::VLD1q64wb_fixed}; 4086 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4087 ARM::VLD2q16PseudoWB_fixed, 4088 ARM::VLD2q32PseudoWB_fixed}; 4089 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4090 } else { 4091 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4092 ARM::MVE_VLD21_8_wb}; 4093 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4094 ARM::MVE_VLD21_16_wb}; 4095 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4096 ARM::MVE_VLD21_32_wb}; 4097 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4098 SelectMVE_VLD(N, 2, Opcodes, true); 4099 } 4100 return; 4101 } 4102 4103 case ARMISD::VLD3_UPD: { 4104 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4105 ARM::VLD3d16Pseudo_UPD, 4106 ARM::VLD3d32Pseudo_UPD, 4107 ARM::VLD1d64TPseudoWB_fixed}; 4108 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4109 ARM::VLD3q16Pseudo_UPD, 4110 ARM::VLD3q32Pseudo_UPD }; 4111 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4112 ARM::VLD3q16oddPseudo_UPD, 4113 ARM::VLD3q32oddPseudo_UPD }; 4114 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4115 return; 4116 } 4117 4118 case ARMISD::VLD4_UPD: { 4119 if (Subtarget->hasNEON()) { 4120 static const uint16_t DOpcodes[] = { 4121 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4122 ARM::VLD1d64QPseudoWB_fixed}; 4123 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4124 ARM::VLD4q16Pseudo_UPD, 4125 ARM::VLD4q32Pseudo_UPD}; 4126 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4127 ARM::VLD4q16oddPseudo_UPD, 4128 ARM::VLD4q32oddPseudo_UPD}; 4129 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4130 } else { 4131 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4132 ARM::MVE_VLD42_8, 4133 ARM::MVE_VLD43_8_wb}; 4134 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4135 ARM::MVE_VLD42_16, 4136 ARM::MVE_VLD43_16_wb}; 4137 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4138 ARM::MVE_VLD42_32, 4139 ARM::MVE_VLD43_32_wb}; 4140 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4141 SelectMVE_VLD(N, 4, Opcodes, true); 4142 } 4143 return; 4144 } 4145 4146 case ARMISD::VLD2LN_UPD: { 4147 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4148 ARM::VLD2LNd16Pseudo_UPD, 4149 ARM::VLD2LNd32Pseudo_UPD }; 4150 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4151 ARM::VLD2LNq32Pseudo_UPD }; 4152 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4153 return; 4154 } 4155 4156 case ARMISD::VLD3LN_UPD: { 4157 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4158 ARM::VLD3LNd16Pseudo_UPD, 4159 ARM::VLD3LNd32Pseudo_UPD }; 4160 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4161 ARM::VLD3LNq32Pseudo_UPD }; 4162 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4163 return; 4164 } 4165 4166 case ARMISD::VLD4LN_UPD: { 4167 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4168 ARM::VLD4LNd16Pseudo_UPD, 4169 ARM::VLD4LNd32Pseudo_UPD }; 4170 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4171 ARM::VLD4LNq32Pseudo_UPD }; 4172 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4173 return; 4174 } 4175 4176 case ARMISD::VST1_UPD: { 4177 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4178 ARM::VST1d16wb_fixed, 4179 ARM::VST1d32wb_fixed, 4180 ARM::VST1d64wb_fixed }; 4181 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4182 ARM::VST1q16wb_fixed, 4183 ARM::VST1q32wb_fixed, 4184 ARM::VST1q64wb_fixed }; 4185 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4186 return; 4187 } 4188 4189 case ARMISD::VST2_UPD: { 4190 if (Subtarget->hasNEON()) { 4191 static const uint16_t DOpcodes[] = { 4192 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4193 ARM::VST1q64wb_fixed}; 4194 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4195 ARM::VST2q16PseudoWB_fixed, 4196 ARM::VST2q32PseudoWB_fixed}; 4197 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4198 return; 4199 } 4200 break; 4201 } 4202 4203 case ARMISD::VST3_UPD: { 4204 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4205 ARM::VST3d16Pseudo_UPD, 4206 ARM::VST3d32Pseudo_UPD, 4207 ARM::VST1d64TPseudoWB_fixed}; 4208 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4209 ARM::VST3q16Pseudo_UPD, 4210 ARM::VST3q32Pseudo_UPD }; 4211 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4212 ARM::VST3q16oddPseudo_UPD, 4213 ARM::VST3q32oddPseudo_UPD }; 4214 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4215 return; 4216 } 4217 4218 case ARMISD::VST4_UPD: { 4219 if (Subtarget->hasNEON()) { 4220 static const uint16_t DOpcodes[] = { 4221 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4222 ARM::VST1d64QPseudoWB_fixed}; 4223 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4224 ARM::VST4q16Pseudo_UPD, 4225 ARM::VST4q32Pseudo_UPD}; 4226 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4227 ARM::VST4q16oddPseudo_UPD, 4228 ARM::VST4q32oddPseudo_UPD}; 4229 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4230 return; 4231 } 4232 break; 4233 } 4234 4235 case ARMISD::VST2LN_UPD: { 4236 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4237 ARM::VST2LNd16Pseudo_UPD, 4238 ARM::VST2LNd32Pseudo_UPD }; 4239 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4240 ARM::VST2LNq32Pseudo_UPD }; 4241 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4242 return; 4243 } 4244 4245 case ARMISD::VST3LN_UPD: { 4246 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4247 ARM::VST3LNd16Pseudo_UPD, 4248 ARM::VST3LNd32Pseudo_UPD }; 4249 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4250 ARM::VST3LNq32Pseudo_UPD }; 4251 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4252 return; 4253 } 4254 4255 case ARMISD::VST4LN_UPD: { 4256 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4257 ARM::VST4LNd16Pseudo_UPD, 4258 ARM::VST4LNd32Pseudo_UPD }; 4259 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4260 ARM::VST4LNq32Pseudo_UPD }; 4261 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4262 return; 4263 } 4264 4265 case ISD::INTRINSIC_VOID: 4266 case ISD::INTRINSIC_W_CHAIN: { 4267 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 4268 switch (IntNo) { 4269 default: 4270 break; 4271 4272 case Intrinsic::arm_mrrc: 4273 case Intrinsic::arm_mrrc2: { 4274 SDLoc dl(N); 4275 SDValue Chain = N->getOperand(0); 4276 unsigned Opc; 4277 4278 if (Subtarget->isThumb()) 4279 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4280 else 4281 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4282 4283 SmallVector<SDValue, 5> Ops; 4284 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 4285 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 4286 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 4287 4288 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4289 // instruction will always be '1111' but it is possible in assembly language to specify 4290 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4291 if (Opc != ARM::MRRC2) { 4292 Ops.push_back(getAL(CurDAG, dl)); 4293 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4294 } 4295 4296 Ops.push_back(Chain); 4297 4298 // Writes to two registers. 4299 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4300 4301 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4302 return; 4303 } 4304 case Intrinsic::arm_ldaexd: 4305 case Intrinsic::arm_ldrexd: { 4306 SDLoc dl(N); 4307 SDValue Chain = N->getOperand(0); 4308 SDValue MemAddr = N->getOperand(2); 4309 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4310 4311 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4312 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4313 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4314 4315 // arm_ldrexd returns a i64 value in {i32, i32} 4316 std::vector<EVT> ResTys; 4317 if (isThumb) { 4318 ResTys.push_back(MVT::i32); 4319 ResTys.push_back(MVT::i32); 4320 } else 4321 ResTys.push_back(MVT::Untyped); 4322 ResTys.push_back(MVT::Other); 4323 4324 // Place arguments in the right order. 4325 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4326 CurDAG->getRegister(0, MVT::i32), Chain}; 4327 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4328 // Transfer memoperands. 4329 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4330 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4331 4332 // Remap uses. 4333 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4334 if (!SDValue(N, 0).use_empty()) { 4335 SDValue Result; 4336 if (isThumb) 4337 Result = SDValue(Ld, 0); 4338 else { 4339 SDValue SubRegIdx = 4340 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4341 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4342 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4343 Result = SDValue(ResNode,0); 4344 } 4345 ReplaceUses(SDValue(N, 0), Result); 4346 } 4347 if (!SDValue(N, 1).use_empty()) { 4348 SDValue Result; 4349 if (isThumb) 4350 Result = SDValue(Ld, 1); 4351 else { 4352 SDValue SubRegIdx = 4353 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4354 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4355 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4356 Result = SDValue(ResNode,0); 4357 } 4358 ReplaceUses(SDValue(N, 1), Result); 4359 } 4360 ReplaceUses(SDValue(N, 2), OutChain); 4361 CurDAG->RemoveDeadNode(N); 4362 return; 4363 } 4364 case Intrinsic::arm_stlexd: 4365 case Intrinsic::arm_strexd: { 4366 SDLoc dl(N); 4367 SDValue Chain = N->getOperand(0); 4368 SDValue Val0 = N->getOperand(2); 4369 SDValue Val1 = N->getOperand(3); 4370 SDValue MemAddr = N->getOperand(4); 4371 4372 // Store exclusive double return a i32 value which is the return status 4373 // of the issued store. 4374 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4375 4376 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4377 // Place arguments in the right order. 4378 SmallVector<SDValue, 7> Ops; 4379 if (isThumb) { 4380 Ops.push_back(Val0); 4381 Ops.push_back(Val1); 4382 } else 4383 // arm_strexd uses GPRPair. 4384 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4385 Ops.push_back(MemAddr); 4386 Ops.push_back(getAL(CurDAG, dl)); 4387 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4388 Ops.push_back(Chain); 4389 4390 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4391 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4392 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4393 4394 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4395 // Transfer memoperands. 4396 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4397 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4398 4399 ReplaceNode(N, St); 4400 return; 4401 } 4402 4403 case Intrinsic::arm_neon_vld1: { 4404 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4405 ARM::VLD1d32, ARM::VLD1d64 }; 4406 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4407 ARM::VLD1q32, ARM::VLD1q64}; 4408 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4409 return; 4410 } 4411 4412 case Intrinsic::arm_neon_vld1x2: { 4413 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4414 ARM::VLD1q32, ARM::VLD1q64 }; 4415 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4416 ARM::VLD1d16QPseudo, 4417 ARM::VLD1d32QPseudo, 4418 ARM::VLD1d64QPseudo }; 4419 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4420 return; 4421 } 4422 4423 case Intrinsic::arm_neon_vld1x3: { 4424 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4425 ARM::VLD1d16TPseudo, 4426 ARM::VLD1d32TPseudo, 4427 ARM::VLD1d64TPseudo }; 4428 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4429 ARM::VLD1q16LowTPseudo_UPD, 4430 ARM::VLD1q32LowTPseudo_UPD, 4431 ARM::VLD1q64LowTPseudo_UPD }; 4432 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4433 ARM::VLD1q16HighTPseudo, 4434 ARM::VLD1q32HighTPseudo, 4435 ARM::VLD1q64HighTPseudo }; 4436 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4437 return; 4438 } 4439 4440 case Intrinsic::arm_neon_vld1x4: { 4441 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4442 ARM::VLD1d16QPseudo, 4443 ARM::VLD1d32QPseudo, 4444 ARM::VLD1d64QPseudo }; 4445 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4446 ARM::VLD1q16LowQPseudo_UPD, 4447 ARM::VLD1q32LowQPseudo_UPD, 4448 ARM::VLD1q64LowQPseudo_UPD }; 4449 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4450 ARM::VLD1q16HighQPseudo, 4451 ARM::VLD1q32HighQPseudo, 4452 ARM::VLD1q64HighQPseudo }; 4453 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4454 return; 4455 } 4456 4457 case Intrinsic::arm_neon_vld2: { 4458 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4459 ARM::VLD2d32, ARM::VLD1q64 }; 4460 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4461 ARM::VLD2q32Pseudo }; 4462 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4463 return; 4464 } 4465 4466 case Intrinsic::arm_neon_vld3: { 4467 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4468 ARM::VLD3d16Pseudo, 4469 ARM::VLD3d32Pseudo, 4470 ARM::VLD1d64TPseudo }; 4471 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4472 ARM::VLD3q16Pseudo_UPD, 4473 ARM::VLD3q32Pseudo_UPD }; 4474 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4475 ARM::VLD3q16oddPseudo, 4476 ARM::VLD3q32oddPseudo }; 4477 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4478 return; 4479 } 4480 4481 case Intrinsic::arm_neon_vld4: { 4482 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4483 ARM::VLD4d16Pseudo, 4484 ARM::VLD4d32Pseudo, 4485 ARM::VLD1d64QPseudo }; 4486 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4487 ARM::VLD4q16Pseudo_UPD, 4488 ARM::VLD4q32Pseudo_UPD }; 4489 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4490 ARM::VLD4q16oddPseudo, 4491 ARM::VLD4q32oddPseudo }; 4492 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4493 return; 4494 } 4495 4496 case Intrinsic::arm_neon_vld2dup: { 4497 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4498 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4499 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4500 ARM::VLD2DUPq16EvenPseudo, 4501 ARM::VLD2DUPq32EvenPseudo }; 4502 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4503 ARM::VLD2DUPq16OddPseudo, 4504 ARM::VLD2DUPq32OddPseudo }; 4505 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4506 DOpcodes, QOpcodes0, QOpcodes1); 4507 return; 4508 } 4509 4510 case Intrinsic::arm_neon_vld3dup: { 4511 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4512 ARM::VLD3DUPd16Pseudo, 4513 ARM::VLD3DUPd32Pseudo, 4514 ARM::VLD1d64TPseudo }; 4515 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4516 ARM::VLD3DUPq16EvenPseudo, 4517 ARM::VLD3DUPq32EvenPseudo }; 4518 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4519 ARM::VLD3DUPq16OddPseudo, 4520 ARM::VLD3DUPq32OddPseudo }; 4521 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4522 DOpcodes, QOpcodes0, QOpcodes1); 4523 return; 4524 } 4525 4526 case Intrinsic::arm_neon_vld4dup: { 4527 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4528 ARM::VLD4DUPd16Pseudo, 4529 ARM::VLD4DUPd32Pseudo, 4530 ARM::VLD1d64QPseudo }; 4531 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4532 ARM::VLD4DUPq16EvenPseudo, 4533 ARM::VLD4DUPq32EvenPseudo }; 4534 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4535 ARM::VLD4DUPq16OddPseudo, 4536 ARM::VLD4DUPq32OddPseudo }; 4537 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4538 DOpcodes, QOpcodes0, QOpcodes1); 4539 return; 4540 } 4541 4542 case Intrinsic::arm_neon_vld2lane: { 4543 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4544 ARM::VLD2LNd16Pseudo, 4545 ARM::VLD2LNd32Pseudo }; 4546 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 4547 ARM::VLD2LNq32Pseudo }; 4548 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 4549 return; 4550 } 4551 4552 case Intrinsic::arm_neon_vld3lane: { 4553 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 4554 ARM::VLD3LNd16Pseudo, 4555 ARM::VLD3LNd32Pseudo }; 4556 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 4557 ARM::VLD3LNq32Pseudo }; 4558 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 4559 return; 4560 } 4561 4562 case Intrinsic::arm_neon_vld4lane: { 4563 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 4564 ARM::VLD4LNd16Pseudo, 4565 ARM::VLD4LNd32Pseudo }; 4566 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 4567 ARM::VLD4LNq32Pseudo }; 4568 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 4569 return; 4570 } 4571 4572 case Intrinsic::arm_neon_vst1: { 4573 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 4574 ARM::VST1d32, ARM::VST1d64 }; 4575 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4576 ARM::VST1q32, ARM::VST1q64 }; 4577 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 4578 return; 4579 } 4580 4581 case Intrinsic::arm_neon_vst1x2: { 4582 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4583 ARM::VST1q32, ARM::VST1q64 }; 4584 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 4585 ARM::VST1d16QPseudo, 4586 ARM::VST1d32QPseudo, 4587 ARM::VST1d64QPseudo }; 4588 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4589 return; 4590 } 4591 4592 case Intrinsic::arm_neon_vst1x3: { 4593 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 4594 ARM::VST1d16TPseudo, 4595 ARM::VST1d32TPseudo, 4596 ARM::VST1d64TPseudo }; 4597 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4598 ARM::VST1q16LowTPseudo_UPD, 4599 ARM::VST1q32LowTPseudo_UPD, 4600 ARM::VST1q64LowTPseudo_UPD }; 4601 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 4602 ARM::VST1q16HighTPseudo, 4603 ARM::VST1q32HighTPseudo, 4604 ARM::VST1q64HighTPseudo }; 4605 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4606 return; 4607 } 4608 4609 case Intrinsic::arm_neon_vst1x4: { 4610 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 4611 ARM::VST1d16QPseudo, 4612 ARM::VST1d32QPseudo, 4613 ARM::VST1d64QPseudo }; 4614 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4615 ARM::VST1q16LowQPseudo_UPD, 4616 ARM::VST1q32LowQPseudo_UPD, 4617 ARM::VST1q64LowQPseudo_UPD }; 4618 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 4619 ARM::VST1q16HighQPseudo, 4620 ARM::VST1q32HighQPseudo, 4621 ARM::VST1q64HighQPseudo }; 4622 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4623 return; 4624 } 4625 4626 case Intrinsic::arm_neon_vst2: { 4627 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 4628 ARM::VST2d32, ARM::VST1q64 }; 4629 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 4630 ARM::VST2q32Pseudo }; 4631 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4632 return; 4633 } 4634 4635 case Intrinsic::arm_neon_vst3: { 4636 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 4637 ARM::VST3d16Pseudo, 4638 ARM::VST3d32Pseudo, 4639 ARM::VST1d64TPseudo }; 4640 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4641 ARM::VST3q16Pseudo_UPD, 4642 ARM::VST3q32Pseudo_UPD }; 4643 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 4644 ARM::VST3q16oddPseudo, 4645 ARM::VST3q32oddPseudo }; 4646 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4647 return; 4648 } 4649 4650 case Intrinsic::arm_neon_vst4: { 4651 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 4652 ARM::VST4d16Pseudo, 4653 ARM::VST4d32Pseudo, 4654 ARM::VST1d64QPseudo }; 4655 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 4656 ARM::VST4q16Pseudo_UPD, 4657 ARM::VST4q32Pseudo_UPD }; 4658 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 4659 ARM::VST4q16oddPseudo, 4660 ARM::VST4q32oddPseudo }; 4661 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4662 return; 4663 } 4664 4665 case Intrinsic::arm_neon_vst2lane: { 4666 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 4667 ARM::VST2LNd16Pseudo, 4668 ARM::VST2LNd32Pseudo }; 4669 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 4670 ARM::VST2LNq32Pseudo }; 4671 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 4672 return; 4673 } 4674 4675 case Intrinsic::arm_neon_vst3lane: { 4676 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 4677 ARM::VST3LNd16Pseudo, 4678 ARM::VST3LNd32Pseudo }; 4679 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 4680 ARM::VST3LNq32Pseudo }; 4681 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 4682 return; 4683 } 4684 4685 case Intrinsic::arm_neon_vst4lane: { 4686 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 4687 ARM::VST4LNd16Pseudo, 4688 ARM::VST4LNd32Pseudo }; 4689 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 4690 ARM::VST4LNq32Pseudo }; 4691 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 4692 return; 4693 } 4694 4695 case Intrinsic::arm_mve_vldr_gather_base_wb: 4696 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 4697 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 4698 ARM::MVE_VLDRDU64_qi_pre}; 4699 SelectMVE_WB(N, Opcodes, 4700 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 4701 return; 4702 } 4703 4704 case Intrinsic::arm_mve_vld2q: { 4705 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 4706 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4707 ARM::MVE_VLD21_16}; 4708 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4709 ARM::MVE_VLD21_32}; 4710 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4711 SelectMVE_VLD(N, 2, Opcodes, false); 4712 return; 4713 } 4714 4715 case Intrinsic::arm_mve_vld4q: { 4716 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4717 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 4718 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4719 ARM::MVE_VLD42_16, 4720 ARM::MVE_VLD43_16}; 4721 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4722 ARM::MVE_VLD42_32, 4723 ARM::MVE_VLD43_32}; 4724 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4725 SelectMVE_VLD(N, 4, Opcodes, false); 4726 return; 4727 } 4728 } 4729 break; 4730 } 4731 4732 case ISD::INTRINSIC_WO_CHAIN: { 4733 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 4734 switch (IntNo) { 4735 default: 4736 break; 4737 4738 case Intrinsic::arm_mve_urshrl: 4739 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 4740 return; 4741 case Intrinsic::arm_mve_uqshll: 4742 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 4743 return; 4744 case Intrinsic::arm_mve_srshrl: 4745 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 4746 return; 4747 case Intrinsic::arm_mve_sqshll: 4748 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 4749 return; 4750 case Intrinsic::arm_mve_uqrshll: 4751 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 4752 return; 4753 case Intrinsic::arm_mve_sqrshrl: 4754 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 4755 return; 4756 4757 case Intrinsic::arm_mve_vadc: 4758 case Intrinsic::arm_mve_vadc_predicated: 4759 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 4760 IntNo == Intrinsic::arm_mve_vadc_predicated); 4761 return; 4762 case Intrinsic::arm_mve_vsbc: 4763 case Intrinsic::arm_mve_vsbc_predicated: 4764 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true, 4765 IntNo == Intrinsic::arm_mve_vsbc_predicated); 4766 return; 4767 case Intrinsic::arm_mve_vshlc: 4768 case Intrinsic::arm_mve_vshlc_predicated: 4769 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 4770 return; 4771 4772 case Intrinsic::arm_mve_vmlldava: 4773 case Intrinsic::arm_mve_vmlldava_predicated: { 4774 static const uint16_t OpcodesU[] = { 4775 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 4776 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 4777 }; 4778 static const uint16_t OpcodesS[] = { 4779 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 4780 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 4781 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 4782 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 4783 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 4784 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 4785 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 4786 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 4787 }; 4788 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 4789 OpcodesS, OpcodesU); 4790 return; 4791 } 4792 4793 case Intrinsic::arm_mve_vrmlldavha: 4794 case Intrinsic::arm_mve_vrmlldavha_predicated: { 4795 static const uint16_t OpcodesU[] = { 4796 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 4797 }; 4798 static const uint16_t OpcodesS[] = { 4799 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 4800 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 4801 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 4802 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 4803 }; 4804 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 4805 OpcodesS, OpcodesU); 4806 return; 4807 } 4808 4809 case Intrinsic::arm_mve_vidup: 4810 case Intrinsic::arm_mve_vidup_predicated: { 4811 static const uint16_t Opcodes[] = { 4812 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 4813 }; 4814 SelectMVE_VxDUP(N, Opcodes, false, 4815 IntNo == Intrinsic::arm_mve_vidup_predicated); 4816 return; 4817 } 4818 4819 case Intrinsic::arm_mve_vddup: 4820 case Intrinsic::arm_mve_vddup_predicated: { 4821 static const uint16_t Opcodes[] = { 4822 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 4823 }; 4824 SelectMVE_VxDUP(N, Opcodes, false, 4825 IntNo == Intrinsic::arm_mve_vddup_predicated); 4826 return; 4827 } 4828 4829 case Intrinsic::arm_mve_viwdup: 4830 case Intrinsic::arm_mve_viwdup_predicated: { 4831 static const uint16_t Opcodes[] = { 4832 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 4833 }; 4834 SelectMVE_VxDUP(N, Opcodes, true, 4835 IntNo == Intrinsic::arm_mve_viwdup_predicated); 4836 return; 4837 } 4838 4839 case Intrinsic::arm_mve_vdwdup: 4840 case Intrinsic::arm_mve_vdwdup_predicated: { 4841 static const uint16_t Opcodes[] = { 4842 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 4843 }; 4844 SelectMVE_VxDUP(N, Opcodes, true, 4845 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 4846 return; 4847 } 4848 4849 case Intrinsic::arm_cde_cx1d: 4850 case Intrinsic::arm_cde_cx1da: 4851 case Intrinsic::arm_cde_cx2d: 4852 case Intrinsic::arm_cde_cx2da: 4853 case Intrinsic::arm_cde_cx3d: 4854 case Intrinsic::arm_cde_cx3da: { 4855 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 4856 IntNo == Intrinsic::arm_cde_cx2da || 4857 IntNo == Intrinsic::arm_cde_cx3da; 4858 size_t NumExtraOps; 4859 uint16_t Opcode; 4860 switch (IntNo) { 4861 case Intrinsic::arm_cde_cx1d: 4862 case Intrinsic::arm_cde_cx1da: 4863 NumExtraOps = 0; 4864 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 4865 break; 4866 case Intrinsic::arm_cde_cx2d: 4867 case Intrinsic::arm_cde_cx2da: 4868 NumExtraOps = 1; 4869 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 4870 break; 4871 case Intrinsic::arm_cde_cx3d: 4872 case Intrinsic::arm_cde_cx3da: 4873 NumExtraOps = 2; 4874 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 4875 break; 4876 default: 4877 llvm_unreachable("Unexpected opcode"); 4878 } 4879 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 4880 return; 4881 } 4882 } 4883 break; 4884 } 4885 4886 case ISD::ATOMIC_CMP_SWAP: 4887 SelectCMP_SWAP(N); 4888 return; 4889 } 4890 4891 SelectCode(N); 4892 } 4893 4894 // Inspect a register string of the form 4895 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 4896 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 4897 // and obtain the integer operands from them, adding these operands to the 4898 // provided vector. 4899 static void getIntOperandsFromRegisterString(StringRef RegString, 4900 SelectionDAG *CurDAG, 4901 const SDLoc &DL, 4902 std::vector<SDValue> &Ops) { 4903 SmallVector<StringRef, 5> Fields; 4904 RegString.split(Fields, ':'); 4905 4906 if (Fields.size() > 1) { 4907 bool AllIntFields = true; 4908 4909 for (StringRef Field : Fields) { 4910 // Need to trim out leading 'cp' characters and get the integer field. 4911 unsigned IntField; 4912 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 4913 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 4914 } 4915 4916 assert(AllIntFields && 4917 "Unexpected non-integer value in special register string."); 4918 } 4919 } 4920 4921 // Maps a Banked Register string to its mask value. The mask value returned is 4922 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 4923 // mask operand, which expresses which register is to be used, e.g. r8, and in 4924 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 4925 // was invalid. 4926 static inline int getBankedRegisterMask(StringRef RegString) { 4927 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 4928 if (!TheReg) 4929 return -1; 4930 return TheReg->Encoding; 4931 } 4932 4933 // The flags here are common to those allowed for apsr in the A class cores and 4934 // those allowed for the special registers in the M class cores. Returns a 4935 // value representing which flags were present, -1 if invalid. 4936 static inline int getMClassFlagsMask(StringRef Flags) { 4937 return StringSwitch<int>(Flags) 4938 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 4939 // correct when flags are not permitted 4940 .Case("g", 0x1) 4941 .Case("nzcvq", 0x2) 4942 .Case("nzcvqg", 0x3) 4943 .Default(-1); 4944 } 4945 4946 // Maps MClass special registers string to its value for use in the 4947 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 4948 // Returns -1 to signify that the string was invalid. 4949 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 4950 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 4951 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 4952 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 4953 return -1; 4954 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 4955 } 4956 4957 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 4958 // The mask operand contains the special register (R Bit) in bit 4, whether 4959 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 4960 // bits 3-0 contains the fields to be accessed in the special register, set by 4961 // the flags provided with the register. 4962 int Mask = 0; 4963 if (Reg == "apsr") { 4964 // The flags permitted for apsr are the same flags that are allowed in 4965 // M class registers. We get the flag value and then shift the flags into 4966 // the correct place to combine with the mask. 4967 Mask = getMClassFlagsMask(Flags); 4968 if (Mask == -1) 4969 return -1; 4970 return Mask << 2; 4971 } 4972 4973 if (Reg != "cpsr" && Reg != "spsr") { 4974 return -1; 4975 } 4976 4977 // This is the same as if the flags were "fc" 4978 if (Flags.empty() || Flags == "all") 4979 return Mask | 0x9; 4980 4981 // Inspect the supplied flags string and set the bits in the mask for 4982 // the relevant and valid flags allowed for cpsr and spsr. 4983 for (char Flag : Flags) { 4984 int FlagVal; 4985 switch (Flag) { 4986 case 'c': 4987 FlagVal = 0x1; 4988 break; 4989 case 'x': 4990 FlagVal = 0x2; 4991 break; 4992 case 's': 4993 FlagVal = 0x4; 4994 break; 4995 case 'f': 4996 FlagVal = 0x8; 4997 break; 4998 default: 4999 FlagVal = 0; 5000 } 5001 5002 // This avoids allowing strings where the same flag bit appears twice. 5003 if (!FlagVal || (Mask & FlagVal)) 5004 return -1; 5005 Mask |= FlagVal; 5006 } 5007 5008 // If the register is spsr then we need to set the R bit. 5009 if (Reg == "spsr") 5010 Mask |= 0x10; 5011 5012 return Mask; 5013 } 5014 5015 // Lower the read_register intrinsic to ARM specific DAG nodes 5016 // using the supplied metadata string to select the instruction node to use 5017 // and the registers/masks to construct as operands for the node. 5018 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5019 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5020 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5021 bool IsThumb2 = Subtarget->isThumb2(); 5022 SDLoc DL(N); 5023 5024 std::vector<SDValue> Ops; 5025 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5026 5027 if (!Ops.empty()) { 5028 // If the special register string was constructed of fields (as defined 5029 // in the ACLE) then need to lower to MRC node (32 bit) or 5030 // MRRC node(64 bit), we can make the distinction based on the number of 5031 // operands we have. 5032 unsigned Opcode; 5033 SmallVector<EVT, 3> ResTypes; 5034 if (Ops.size() == 5){ 5035 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5036 ResTypes.append({ MVT::i32, MVT::Other }); 5037 } else { 5038 assert(Ops.size() == 3 && 5039 "Invalid number of fields in special register string."); 5040 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5041 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5042 } 5043 5044 Ops.push_back(getAL(CurDAG, DL)); 5045 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5046 Ops.push_back(N->getOperand(0)); 5047 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5048 return true; 5049 } 5050 5051 std::string SpecialReg = RegString->getString().lower(); 5052 5053 int BankedReg = getBankedRegisterMask(SpecialReg); 5054 if (BankedReg != -1) { 5055 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5056 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5057 N->getOperand(0) }; 5058 ReplaceNode( 5059 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5060 DL, MVT::i32, MVT::Other, Ops)); 5061 return true; 5062 } 5063 5064 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5065 // corresponding to the register that is being read from. So we switch on the 5066 // string to find which opcode we need to use. 5067 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5068 .Case("fpscr", ARM::VMRS) 5069 .Case("fpexc", ARM::VMRS_FPEXC) 5070 .Case("fpsid", ARM::VMRS_FPSID) 5071 .Case("mvfr0", ARM::VMRS_MVFR0) 5072 .Case("mvfr1", ARM::VMRS_MVFR1) 5073 .Case("mvfr2", ARM::VMRS_MVFR2) 5074 .Case("fpinst", ARM::VMRS_FPINST) 5075 .Case("fpinst2", ARM::VMRS_FPINST2) 5076 .Default(0); 5077 5078 // If an opcode was found then we can lower the read to a VFP instruction. 5079 if (Opcode) { 5080 if (!Subtarget->hasVFP2Base()) 5081 return false; 5082 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5083 return false; 5084 5085 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5086 N->getOperand(0) }; 5087 ReplaceNode(N, 5088 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5089 return true; 5090 } 5091 5092 // If the target is M Class then need to validate that the register string 5093 // is an acceptable value, so check that a mask can be constructed from the 5094 // string. 5095 if (Subtarget->isMClass()) { 5096 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5097 if (SYSmValue == -1) 5098 return false; 5099 5100 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5101 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5102 N->getOperand(0) }; 5103 ReplaceNode( 5104 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5105 return true; 5106 } 5107 5108 // Here we know the target is not M Class so we need to check if it is one 5109 // of the remaining possible values which are apsr, cpsr or spsr. 5110 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5111 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5112 N->getOperand(0) }; 5113 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5114 DL, MVT::i32, MVT::Other, Ops)); 5115 return true; 5116 } 5117 5118 if (SpecialReg == "spsr") { 5119 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5120 N->getOperand(0) }; 5121 ReplaceNode( 5122 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5123 MVT::i32, MVT::Other, Ops)); 5124 return true; 5125 } 5126 5127 return false; 5128 } 5129 5130 // Lower the write_register intrinsic to ARM specific DAG nodes 5131 // using the supplied metadata string to select the instruction node to use 5132 // and the registers/masks to use in the nodes 5133 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5134 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5135 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5136 bool IsThumb2 = Subtarget->isThumb2(); 5137 SDLoc DL(N); 5138 5139 std::vector<SDValue> Ops; 5140 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5141 5142 if (!Ops.empty()) { 5143 // If the special register string was constructed of fields (as defined 5144 // in the ACLE) then need to lower to MCR node (32 bit) or 5145 // MCRR node(64 bit), we can make the distinction based on the number of 5146 // operands we have. 5147 unsigned Opcode; 5148 if (Ops.size() == 5) { 5149 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5150 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5151 } else { 5152 assert(Ops.size() == 3 && 5153 "Invalid number of fields in special register string."); 5154 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5155 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5156 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5157 } 5158 5159 Ops.push_back(getAL(CurDAG, DL)); 5160 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5161 Ops.push_back(N->getOperand(0)); 5162 5163 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5164 return true; 5165 } 5166 5167 std::string SpecialReg = RegString->getString().lower(); 5168 int BankedReg = getBankedRegisterMask(SpecialReg); 5169 if (BankedReg != -1) { 5170 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5171 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5172 N->getOperand(0) }; 5173 ReplaceNode( 5174 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5175 DL, MVT::Other, Ops)); 5176 return true; 5177 } 5178 5179 // The VFP registers are written to by creating SelectionDAG nodes with 5180 // opcodes corresponding to the register that is being written. So we switch 5181 // on the string to find which opcode we need to use. 5182 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5183 .Case("fpscr", ARM::VMSR) 5184 .Case("fpexc", ARM::VMSR_FPEXC) 5185 .Case("fpsid", ARM::VMSR_FPSID) 5186 .Case("fpinst", ARM::VMSR_FPINST) 5187 .Case("fpinst2", ARM::VMSR_FPINST2) 5188 .Default(0); 5189 5190 if (Opcode) { 5191 if (!Subtarget->hasVFP2Base()) 5192 return false; 5193 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5194 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5195 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5196 return true; 5197 } 5198 5199 std::pair<StringRef, StringRef> Fields; 5200 Fields = StringRef(SpecialReg).rsplit('_'); 5201 std::string Reg = Fields.first.str(); 5202 StringRef Flags = Fields.second; 5203 5204 // If the target was M Class then need to validate the special register value 5205 // and retrieve the mask for use in the instruction node. 5206 if (Subtarget->isMClass()) { 5207 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5208 if (SYSmValue == -1) 5209 return false; 5210 5211 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5212 N->getOperand(2), getAL(CurDAG, DL), 5213 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5214 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5215 return true; 5216 } 5217 5218 // We then check to see if a valid mask can be constructed for one of the 5219 // register string values permitted for the A and R class cores. These values 5220 // are apsr, spsr and cpsr; these are also valid on older cores. 5221 int Mask = getARClassRegisterMask(Reg, Flags); 5222 if (Mask != -1) { 5223 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5224 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5225 N->getOperand(0) }; 5226 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5227 DL, MVT::Other, Ops)); 5228 return true; 5229 } 5230 5231 return false; 5232 } 5233 5234 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5235 std::vector<SDValue> AsmNodeOperands; 5236 unsigned Flag, Kind; 5237 bool Changed = false; 5238 unsigned NumOps = N->getNumOperands(); 5239 5240 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5241 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5242 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5243 // respectively. Since there is no constraint to explicitly specify a 5244 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5245 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5246 // them into a GPRPair. 5247 5248 SDLoc dl(N); 5249 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 5250 : SDValue(nullptr,0); 5251 5252 SmallVector<bool, 8> OpChanged; 5253 // Glue node will be appended late. 5254 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5255 SDValue op = N->getOperand(i); 5256 AsmNodeOperands.push_back(op); 5257 5258 if (i < InlineAsm::Op_FirstOperand) 5259 continue; 5260 5261 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 5262 Flag = C->getZExtValue(); 5263 Kind = InlineAsm::getKind(Flag); 5264 } 5265 else 5266 continue; 5267 5268 // Immediate operands to inline asm in the SelectionDAG are modeled with 5269 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 5270 // the second is a constant with the value of the immediate. If we get here 5271 // and we have a Kind_Imm, skip the next operand, and continue. 5272 if (Kind == InlineAsm::Kind_Imm) { 5273 SDValue op = N->getOperand(++i); 5274 AsmNodeOperands.push_back(op); 5275 continue; 5276 } 5277 5278 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 5279 if (NumRegs) 5280 OpChanged.push_back(false); 5281 5282 unsigned DefIdx = 0; 5283 bool IsTiedToChangedOp = false; 5284 // If it's a use that is tied with a previous def, it has no 5285 // reg class constraint. 5286 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 5287 IsTiedToChangedOp = OpChanged[DefIdx]; 5288 5289 // Memory operands to inline asm in the SelectionDAG are modeled with two 5290 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 5291 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 5292 // it doesn't get misinterpreted), and continue. We do this here because 5293 // it's important to update the OpChanged array correctly before moving on. 5294 if (Kind == InlineAsm::Kind_Mem) { 5295 SDValue op = N->getOperand(++i); 5296 AsmNodeOperands.push_back(op); 5297 continue; 5298 } 5299 5300 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 5301 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 5302 continue; 5303 5304 unsigned RC; 5305 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 5306 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5307 || NumRegs != 2) 5308 continue; 5309 5310 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5311 SDValue V0 = N->getOperand(i+1); 5312 SDValue V1 = N->getOperand(i+2); 5313 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5314 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5315 SDValue PairedReg; 5316 MachineRegisterInfo &MRI = MF->getRegInfo(); 5317 5318 if (Kind == InlineAsm::Kind_RegDef || 5319 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 5320 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5321 // the original GPRs. 5322 5323 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5324 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5325 SDValue Chain = SDValue(N,0); 5326 5327 SDNode *GU = N->getGluedUser(); 5328 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5329 Chain.getValue(1)); 5330 5331 // Extract values from a GPRPair reg and copy to the original GPR reg. 5332 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5333 RegCopy); 5334 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5335 RegCopy); 5336 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5337 RegCopy.getValue(1)); 5338 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5339 5340 // Update the original glue user. 5341 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5342 Ops.push_back(T1.getValue(1)); 5343 CurDAG->UpdateNodeOperands(GU, Ops); 5344 } 5345 else { 5346 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 5347 // GPRPair and then pass the GPRPair to the inline asm. 5348 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5349 5350 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5351 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5352 Chain.getValue(1)); 5353 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5354 T0.getValue(1)); 5355 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5356 5357 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5358 // i32 VRs of inline asm with it. 5359 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5360 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5361 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5362 5363 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5364 Glue = Chain.getValue(1); 5365 } 5366 5367 Changed = true; 5368 5369 if(PairedReg.getNode()) { 5370 OpChanged[OpChanged.size() -1 ] = true; 5371 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 5372 if (IsTiedToChangedOp) 5373 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 5374 else 5375 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 5376 // Replace the current flag. 5377 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5378 Flag, dl, MVT::i32); 5379 // Add the new register node and skip the original two GPRs. 5380 AsmNodeOperands.push_back(PairedReg); 5381 // Skip the next two GPRs. 5382 i += 2; 5383 } 5384 } 5385 5386 if (Glue.getNode()) 5387 AsmNodeOperands.push_back(Glue); 5388 if (!Changed) 5389 return false; 5390 5391 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5392 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5393 New->setNodeId(-1); 5394 ReplaceNode(N, New.getNode()); 5395 return true; 5396 } 5397 5398 5399 bool ARMDAGToDAGISel:: 5400 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 5401 std::vector<SDValue> &OutOps) { 5402 switch(ConstraintID) { 5403 default: 5404 llvm_unreachable("Unexpected asm memory constraint"); 5405 case InlineAsm::Constraint_m: 5406 case InlineAsm::Constraint_o: 5407 case InlineAsm::Constraint_Q: 5408 case InlineAsm::Constraint_Um: 5409 case InlineAsm::Constraint_Un: 5410 case InlineAsm::Constraint_Uq: 5411 case InlineAsm::Constraint_Us: 5412 case InlineAsm::Constraint_Ut: 5413 case InlineAsm::Constraint_Uv: 5414 case InlineAsm::Constraint_Uy: 5415 // Require the address to be in a register. That is safe for all ARM 5416 // variants and it is hard to do anything much smarter without knowing 5417 // how the operand is used. 5418 OutOps.push_back(Op); 5419 return false; 5420 } 5421 return true; 5422 } 5423 5424 /// createARMISelDag - This pass converts a legalized DAG into a 5425 /// ARM-specific DAG, ready for instruction scheduling. 5426 /// 5427 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5428 CodeGenOpt::Level OptLevel) { 5429 return new ARMDAGToDAGISel(TM, OptLevel); 5430 } 5431