1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/CodeGen/TargetLowering.h" 26 #include "llvm/IR/CallingConv.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/IntrinsicsARM.h" 32 #include "llvm/IR/LLVMContext.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Target/TargetOptions.h" 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "arm-isel" 41 42 static cl::opt<bool> 43 DisableShifterOp("disable-shifter-op", cl::Hidden, 44 cl::desc("Disable isel of shifter-op"), 45 cl::init(false)); 46 47 //===--------------------------------------------------------------------===// 48 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 49 /// instructions for SelectionDAG operations. 50 /// 51 namespace { 52 53 class ARMDAGToDAGISel : public SelectionDAGISel { 54 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 55 /// make the right decision when generating code for different targets. 56 const ARMSubtarget *Subtarget; 57 58 public: 59 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 60 : SelectionDAGISel(tm, OptLevel) {} 61 62 bool runOnMachineFunction(MachineFunction &MF) override { 63 // Reset the subtarget each time through. 64 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 65 SelectionDAGISel::runOnMachineFunction(MF); 66 return true; 67 } 68 69 StringRef getPassName() const override { return "ARM Instruction Selection"; } 70 71 void PreprocessISelDAG() override; 72 73 /// getI32Imm - Return a target constant of type i32 with the specified 74 /// value. 75 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 76 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 77 } 78 79 void Select(SDNode *N) override; 80 81 bool hasNoVMLxHazardUse(SDNode *N) const; 82 bool isShifterOpProfitable(const SDValue &Shift, 83 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 84 bool SelectRegShifterOperand(SDValue N, SDValue &A, 85 SDValue &B, SDValue &C, 86 bool CheckProfitability = true); 87 bool SelectImmShifterOperand(SDValue N, SDValue &A, 88 SDValue &B, bool CheckProfitability = true); 89 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, 90 SDValue &C) { 91 // Don't apply the profitability check 92 return SelectRegShifterOperand(N, A, B, C, false); 93 } 94 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) { 95 // Don't apply the profitability check 96 return SelectImmShifterOperand(N, A, B, false); 97 } 98 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) { 99 if (!N.hasOneUse()) 100 return false; 101 return SelectImmShifterOperand(N, A, B, false); 102 } 103 104 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 105 106 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 107 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 108 109 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 110 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 111 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 112 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 113 return true; 114 } 115 116 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 117 SDValue &Offset, SDValue &Opc); 118 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 119 SDValue &Offset, SDValue &Opc); 120 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 121 SDValue &Offset, SDValue &Opc); 122 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 123 bool SelectAddrMode3(SDValue N, SDValue &Base, 124 SDValue &Offset, SDValue &Opc); 125 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 126 SDValue &Offset, SDValue &Opc); 127 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 128 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 129 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 130 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 131 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 132 133 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 134 135 // Thumb Addressing Modes: 136 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 137 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 138 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 139 SDValue &OffImm); 140 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 141 SDValue &OffImm); 142 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 143 SDValue &OffImm); 144 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 145 SDValue &OffImm); 146 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 147 template <unsigned Shift> 148 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 149 150 // Thumb 2 Addressing Modes: 151 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 152 template <unsigned Shift> 153 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 154 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 155 SDValue &OffImm); 156 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 157 SDValue &OffImm); 158 template <unsigned Shift> 159 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 160 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 161 unsigned Shift); 162 template <unsigned Shift> 163 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 164 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 165 SDValue &OffReg, SDValue &ShImm); 166 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 167 168 template<int Min, int Max> 169 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 170 171 inline bool is_so_imm(unsigned Imm) const { 172 return ARM_AM::getSOImmVal(Imm) != -1; 173 } 174 175 inline bool is_so_imm_not(unsigned Imm) const { 176 return ARM_AM::getSOImmVal(~Imm) != -1; 177 } 178 179 inline bool is_t2_so_imm(unsigned Imm) const { 180 return ARM_AM::getT2SOImmVal(Imm) != -1; 181 } 182 183 inline bool is_t2_so_imm_not(unsigned Imm) const { 184 return ARM_AM::getT2SOImmVal(~Imm) != -1; 185 } 186 187 // Include the pieces autogenerated from the target description. 188 #include "ARMGenDAGISel.inc" 189 190 private: 191 void transferMemOperands(SDNode *Src, SDNode *Dst); 192 193 /// Indexed (pre/post inc/dec) load matching code for ARM. 194 bool tryARMIndexedLoad(SDNode *N); 195 bool tryT1IndexedLoad(SDNode *N); 196 bool tryT2IndexedLoad(SDNode *N); 197 bool tryMVEIndexedLoad(SDNode *N); 198 199 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 200 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 201 /// loads of D registers and even subregs and odd subregs of Q registers. 202 /// For NumVecs <= 2, QOpcodes1 is not used. 203 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 204 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 205 const uint16_t *QOpcodes1); 206 207 /// SelectVST - Select NEON store intrinsics. NumVecs should 208 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 209 /// stores of D registers and even subregs and odd subregs of Q registers. 210 /// For NumVecs <= 2, QOpcodes1 is not used. 211 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 212 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 213 const uint16_t *QOpcodes1); 214 215 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 216 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 217 /// load/store of D registers and Q registers. 218 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 219 unsigned NumVecs, const uint16_t *DOpcodes, 220 const uint16_t *QOpcodes); 221 222 /// Helper functions for setting up clusters of MVE predication operands. 223 template <typename SDValueVector> 224 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 225 SDValue PredicateMask); 226 template <typename SDValueVector> 227 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 228 SDValue PredicateMask, SDValue Inactive); 229 230 template <typename SDValueVector> 231 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 232 template <typename SDValueVector> 233 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 234 235 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 236 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 237 238 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 239 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 240 bool HasSaturationOperand); 241 242 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 243 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 244 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 245 246 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 247 /// vector lanes. 248 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 249 250 /// Select long MVE vector reductions with two vector operands 251 /// Stride is the number of vector element widths the instruction can operate 252 /// on: 253 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 254 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 255 /// Stride is used when addressing the OpcodesS array which contains multiple 256 /// opcodes for each element width. 257 /// TySize is the index into the list of element types listed above 258 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 259 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 260 size_t Stride, size_t TySize); 261 262 /// Select a 64-bit MVE vector reduction with two vector operands 263 /// arm_mve_vmlldava_[predicated] 264 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 265 const uint16_t *OpcodesU); 266 /// Select a 72-bit MVE vector rounding reduction with two vector operands 267 /// int_arm_mve_vrmlldavha[_predicated] 268 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 269 const uint16_t *OpcodesU); 270 271 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 272 /// should be 2 or 4. The opcode array specifies the instructions 273 /// used for 8, 16 and 32-bit lane sizes respectively, and each 274 /// pointer points to a set of NumVecs sub-opcodes used for the 275 /// different stages (e.g. VLD20 versus VLD21) of each load family. 276 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 277 const uint16_t *const *Opcodes, bool HasWriteback); 278 279 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 280 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 281 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 282 bool Wrapping, bool Predicated); 283 284 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 285 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 286 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 287 /// the accumulator and the immediate operand, i.e. 0 288 /// for CX1*, 1 for CX2*, 2 for CX3* 289 /// \arg \c HasAccum whether the instruction has an accumulator operand 290 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 291 bool HasAccum); 292 293 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 294 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 295 /// for loading D registers. 296 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 297 unsigned NumVecs, const uint16_t *DOpcodes, 298 const uint16_t *QOpcodes0 = nullptr, 299 const uint16_t *QOpcodes1 = nullptr); 300 301 /// Try to select SBFX/UBFX instructions for ARM. 302 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 303 304 bool tryInsertVectorElt(SDNode *N); 305 306 // Select special operations if node forms integer ABS pattern 307 bool tryABSOp(SDNode *N); 308 309 bool tryReadRegister(SDNode *N); 310 bool tryWriteRegister(SDNode *N); 311 312 bool tryInlineAsm(SDNode *N); 313 314 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 315 316 void SelectCMP_SWAP(SDNode *N); 317 318 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 319 /// inline asm expressions. 320 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 321 std::vector<SDValue> &OutOps) override; 322 323 // Form pairs of consecutive R, S, D, or Q registers. 324 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 325 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 326 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 327 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 328 329 // Form sequences of 4 consecutive S, D, or Q registers. 330 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 331 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 332 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 333 334 // Get the alignment operand for a NEON VLD or VST instruction. 335 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 336 bool is64BitVector); 337 338 /// Checks if N is a multiplication by a constant where we can extract out a 339 /// power of two from the constant so that it can be used in a shift, but only 340 /// if it simplifies the materialization of the constant. Returns true if it 341 /// is, and assigns to PowerOfTwo the power of two that should be extracted 342 /// out and to NewMulConst the new constant to be multiplied by. 343 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 344 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 345 346 /// Replace N with M in CurDAG, in a way that also ensures that M gets 347 /// selected when N would have been selected. 348 void replaceDAGValue(const SDValue &N, SDValue M); 349 }; 350 } 351 352 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 353 /// operand. If so Imm will receive the 32-bit value. 354 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 355 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 356 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 357 return true; 358 } 359 return false; 360 } 361 362 // isInt32Immediate - This method tests to see if a constant operand. 363 // If so Imm will receive the 32 bit value. 364 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 365 return isInt32Immediate(N.getNode(), Imm); 366 } 367 368 // isOpcWithIntImmediate - This method tests to see if the node is a specific 369 // opcode and that it has a immediate integer right operand. 370 // If so Imm will receive the 32 bit value. 371 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 372 return N->getOpcode() == Opc && 373 isInt32Immediate(N->getOperand(1).getNode(), Imm); 374 } 375 376 /// Check whether a particular node is a constant value representable as 377 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 378 /// 379 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 380 static bool isScaledConstantInRange(SDValue Node, int Scale, 381 int RangeMin, int RangeMax, 382 int &ScaledConstant) { 383 assert(Scale > 0 && "Invalid scale!"); 384 385 // Check that this is a constant. 386 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 387 if (!C) 388 return false; 389 390 ScaledConstant = (int) C->getZExtValue(); 391 if ((ScaledConstant % Scale) != 0) 392 return false; 393 394 ScaledConstant /= Scale; 395 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 396 } 397 398 void ARMDAGToDAGISel::PreprocessISelDAG() { 399 if (!Subtarget->hasV6T2Ops()) 400 return; 401 402 bool isThumb2 = Subtarget->isThumb(); 403 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 404 E = CurDAG->allnodes_end(); I != E; ) { 405 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 406 407 if (N->getOpcode() != ISD::ADD) 408 continue; 409 410 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 411 // leading zeros, followed by consecutive set bits, followed by 1 or 2 412 // trailing zeros, e.g. 1020. 413 // Transform the expression to 414 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 415 // of trailing zeros of c2. The left shift would be folded as an shifter 416 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 417 // node (UBFX). 418 419 SDValue N0 = N->getOperand(0); 420 SDValue N1 = N->getOperand(1); 421 unsigned And_imm = 0; 422 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 423 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 424 std::swap(N0, N1); 425 } 426 if (!And_imm) 427 continue; 428 429 // Check if the AND mask is an immediate of the form: 000.....1111111100 430 unsigned TZ = countTrailingZeros(And_imm); 431 if (TZ != 1 && TZ != 2) 432 // Be conservative here. Shifter operands aren't always free. e.g. On 433 // Swift, left shifter operand of 1 / 2 for free but others are not. 434 // e.g. 435 // ubfx r3, r1, #16, #8 436 // ldr.w r3, [r0, r3, lsl #2] 437 // vs. 438 // mov.w r9, #1020 439 // and.w r2, r9, r1, lsr #14 440 // ldr r2, [r0, r2] 441 continue; 442 And_imm >>= TZ; 443 if (And_imm & (And_imm + 1)) 444 continue; 445 446 // Look for (and (srl X, c1), c2). 447 SDValue Srl = N1.getOperand(0); 448 unsigned Srl_imm = 0; 449 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 450 (Srl_imm <= 2)) 451 continue; 452 453 // Make sure first operand is not a shifter operand which would prevent 454 // folding of the left shift. 455 SDValue CPTmp0; 456 SDValue CPTmp1; 457 SDValue CPTmp2; 458 if (isThumb2) { 459 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 460 continue; 461 } else { 462 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 463 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 464 continue; 465 } 466 467 // Now make the transformation. 468 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 469 Srl.getOperand(0), 470 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 471 MVT::i32)); 472 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 473 Srl, 474 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 475 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 476 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 477 CurDAG->UpdateNodeOperands(N, N0, N1); 478 } 479 } 480 481 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 482 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 483 /// least on current ARM implementations) which should be avoidded. 484 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 485 if (OptLevel == CodeGenOpt::None) 486 return true; 487 488 if (!Subtarget->hasVMLxHazards()) 489 return true; 490 491 if (!N->hasOneUse()) 492 return false; 493 494 SDNode *Use = *N->use_begin(); 495 if (Use->getOpcode() == ISD::CopyToReg) 496 return true; 497 if (Use->isMachineOpcode()) { 498 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 499 CurDAG->getSubtarget().getInstrInfo()); 500 501 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 502 if (MCID.mayStore()) 503 return true; 504 unsigned Opcode = MCID.getOpcode(); 505 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 506 return true; 507 // vmlx feeding into another vmlx. We actually want to unfold 508 // the use later in the MLxExpansion pass. e.g. 509 // vmla 510 // vmla (stall 8 cycles) 511 // 512 // vmul (5 cycles) 513 // vadd (5 cycles) 514 // vmla 515 // This adds up to about 18 - 19 cycles. 516 // 517 // vmla 518 // vmul (stall 4 cycles) 519 // vadd adds up to about 14 cycles. 520 return TII->isFpMLxInstruction(Opcode); 521 } 522 523 return false; 524 } 525 526 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 527 ARM_AM::ShiftOpc ShOpcVal, 528 unsigned ShAmt) { 529 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 530 return true; 531 if (Shift.hasOneUse()) 532 return true; 533 // R << 2 is free. 534 return ShOpcVal == ARM_AM::lsl && 535 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 536 } 537 538 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 539 unsigned MaxShift, 540 unsigned &PowerOfTwo, 541 SDValue &NewMulConst) const { 542 assert(N.getOpcode() == ISD::MUL); 543 assert(MaxShift > 0); 544 545 // If the multiply is used in more than one place then changing the constant 546 // will make other uses incorrect, so don't. 547 if (!N.hasOneUse()) return false; 548 // Check if the multiply is by a constant 549 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 550 if (!MulConst) return false; 551 // If the constant is used in more than one place then modifying it will mean 552 // we need to materialize two constants instead of one, which is a bad idea. 553 if (!MulConst->hasOneUse()) return false; 554 unsigned MulConstVal = MulConst->getZExtValue(); 555 if (MulConstVal == 0) return false; 556 557 // Find the largest power of 2 that MulConstVal is a multiple of 558 PowerOfTwo = MaxShift; 559 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 560 --PowerOfTwo; 561 if (PowerOfTwo == 0) return false; 562 } 563 564 // Only optimise if the new cost is better 565 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 566 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 567 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 568 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 569 return NewCost < OldCost; 570 } 571 572 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 573 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 574 ReplaceUses(N, M); 575 } 576 577 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 578 SDValue &BaseReg, 579 SDValue &Opc, 580 bool CheckProfitability) { 581 if (DisableShifterOp) 582 return false; 583 584 // If N is a multiply-by-constant and it's profitable to extract a shift and 585 // use it in a shifted operand do so. 586 if (N.getOpcode() == ISD::MUL) { 587 unsigned PowerOfTwo = 0; 588 SDValue NewMulConst; 589 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 590 HandleSDNode Handle(N); 591 SDLoc Loc(N); 592 replaceDAGValue(N.getOperand(1), NewMulConst); 593 BaseReg = Handle.getValue(); 594 Opc = CurDAG->getTargetConstant( 595 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 596 return true; 597 } 598 } 599 600 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 601 602 // Don't match base register only case. That is matched to a separate 603 // lower complexity pattern with explicit register operand. 604 if (ShOpcVal == ARM_AM::no_shift) return false; 605 606 BaseReg = N.getOperand(0); 607 unsigned ShImmVal = 0; 608 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 609 if (!RHS) return false; 610 ShImmVal = RHS->getZExtValue() & 31; 611 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 612 SDLoc(N), MVT::i32); 613 return true; 614 } 615 616 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 617 SDValue &BaseReg, 618 SDValue &ShReg, 619 SDValue &Opc, 620 bool CheckProfitability) { 621 if (DisableShifterOp) 622 return false; 623 624 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 625 626 // Don't match base register only case. That is matched to a separate 627 // lower complexity pattern with explicit register operand. 628 if (ShOpcVal == ARM_AM::no_shift) return false; 629 630 BaseReg = N.getOperand(0); 631 unsigned ShImmVal = 0; 632 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 633 if (RHS) return false; 634 635 ShReg = N.getOperand(1); 636 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 637 return false; 638 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 639 SDLoc(N), MVT::i32); 640 return true; 641 } 642 643 // Determine whether an ISD::OR's operands are suitable to turn the operation 644 // into an addition, which often has more compact encodings. 645 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 646 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 647 Out = N; 648 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 649 } 650 651 652 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 653 SDValue &Base, 654 SDValue &OffImm) { 655 // Match simple R + imm12 operands. 656 657 // Base only. 658 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 659 !CurDAG->isBaseWithConstantOffset(N)) { 660 if (N.getOpcode() == ISD::FrameIndex) { 661 // Match frame index. 662 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 663 Base = CurDAG->getTargetFrameIndex( 664 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 665 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 666 return true; 667 } 668 669 if (N.getOpcode() == ARMISD::Wrapper && 670 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 671 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 672 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 673 Base = N.getOperand(0); 674 } else 675 Base = N; 676 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 677 return true; 678 } 679 680 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 681 int RHSC = (int)RHS->getSExtValue(); 682 if (N.getOpcode() == ISD::SUB) 683 RHSC = -RHSC; 684 685 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 686 Base = N.getOperand(0); 687 if (Base.getOpcode() == ISD::FrameIndex) { 688 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 689 Base = CurDAG->getTargetFrameIndex( 690 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 691 } 692 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 693 return true; 694 } 695 } 696 697 // Base only. 698 Base = N; 699 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 700 return true; 701 } 702 703 704 705 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 706 SDValue &Opc) { 707 if (N.getOpcode() == ISD::MUL && 708 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 709 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 710 // X * [3,5,9] -> X + X * [2,4,8] etc. 711 int RHSC = (int)RHS->getZExtValue(); 712 if (RHSC & 1) { 713 RHSC = RHSC & ~1; 714 ARM_AM::AddrOpc AddSub = ARM_AM::add; 715 if (RHSC < 0) { 716 AddSub = ARM_AM::sub; 717 RHSC = - RHSC; 718 } 719 if (isPowerOf2_32(RHSC)) { 720 unsigned ShAmt = Log2_32(RHSC); 721 Base = Offset = N.getOperand(0); 722 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 723 ARM_AM::lsl), 724 SDLoc(N), MVT::i32); 725 return true; 726 } 727 } 728 } 729 } 730 731 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 732 // ISD::OR that is equivalent to an ISD::ADD. 733 !CurDAG->isBaseWithConstantOffset(N)) 734 return false; 735 736 // Leave simple R +/- imm12 operands for LDRi12 737 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 738 int RHSC; 739 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 740 -0x1000+1, 0x1000, RHSC)) // 12 bits. 741 return false; 742 } 743 744 // Otherwise this is R +/- [possibly shifted] R. 745 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 746 ARM_AM::ShiftOpc ShOpcVal = 747 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 748 unsigned ShAmt = 0; 749 750 Base = N.getOperand(0); 751 Offset = N.getOperand(1); 752 753 if (ShOpcVal != ARM_AM::no_shift) { 754 // Check to see if the RHS of the shift is a constant, if not, we can't fold 755 // it. 756 if (ConstantSDNode *Sh = 757 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 758 ShAmt = Sh->getZExtValue(); 759 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 760 Offset = N.getOperand(1).getOperand(0); 761 else { 762 ShAmt = 0; 763 ShOpcVal = ARM_AM::no_shift; 764 } 765 } else { 766 ShOpcVal = ARM_AM::no_shift; 767 } 768 } 769 770 // Try matching (R shl C) + (R). 771 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 772 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 773 N.getOperand(0).hasOneUse())) { 774 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 775 if (ShOpcVal != ARM_AM::no_shift) { 776 // Check to see if the RHS of the shift is a constant, if not, we can't 777 // fold it. 778 if (ConstantSDNode *Sh = 779 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 780 ShAmt = Sh->getZExtValue(); 781 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 782 Offset = N.getOperand(0).getOperand(0); 783 Base = N.getOperand(1); 784 } else { 785 ShAmt = 0; 786 ShOpcVal = ARM_AM::no_shift; 787 } 788 } else { 789 ShOpcVal = ARM_AM::no_shift; 790 } 791 } 792 } 793 794 // If Offset is a multiply-by-constant and it's profitable to extract a shift 795 // and use it in a shifted operand do so. 796 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 797 unsigned PowerOfTwo = 0; 798 SDValue NewMulConst; 799 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 800 HandleSDNode Handle(Offset); 801 replaceDAGValue(Offset.getOperand(1), NewMulConst); 802 Offset = Handle.getValue(); 803 ShAmt = PowerOfTwo; 804 ShOpcVal = ARM_AM::lsl; 805 } 806 } 807 808 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 809 SDLoc(N), MVT::i32); 810 return true; 811 } 812 813 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 814 SDValue &Offset, SDValue &Opc) { 815 unsigned Opcode = Op->getOpcode(); 816 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 817 ? cast<LoadSDNode>(Op)->getAddressingMode() 818 : cast<StoreSDNode>(Op)->getAddressingMode(); 819 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 820 ? ARM_AM::add : ARM_AM::sub; 821 int Val; 822 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 823 return false; 824 825 Offset = N; 826 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 827 unsigned ShAmt = 0; 828 if (ShOpcVal != ARM_AM::no_shift) { 829 // Check to see if the RHS of the shift is a constant, if not, we can't fold 830 // it. 831 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 832 ShAmt = Sh->getZExtValue(); 833 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 834 Offset = N.getOperand(0); 835 else { 836 ShAmt = 0; 837 ShOpcVal = ARM_AM::no_shift; 838 } 839 } else { 840 ShOpcVal = ARM_AM::no_shift; 841 } 842 } 843 844 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 845 SDLoc(N), MVT::i32); 846 return true; 847 } 848 849 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 850 SDValue &Offset, SDValue &Opc) { 851 unsigned Opcode = Op->getOpcode(); 852 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 853 ? cast<LoadSDNode>(Op)->getAddressingMode() 854 : cast<StoreSDNode>(Op)->getAddressingMode(); 855 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 856 ? ARM_AM::add : ARM_AM::sub; 857 int Val; 858 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 859 if (AddSub == ARM_AM::sub) Val *= -1; 860 Offset = CurDAG->getRegister(0, MVT::i32); 861 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 862 return true; 863 } 864 865 return false; 866 } 867 868 869 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 870 SDValue &Offset, SDValue &Opc) { 871 unsigned Opcode = Op->getOpcode(); 872 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 873 ? cast<LoadSDNode>(Op)->getAddressingMode() 874 : cast<StoreSDNode>(Op)->getAddressingMode(); 875 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 876 ? ARM_AM::add : ARM_AM::sub; 877 int Val; 878 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 879 Offset = CurDAG->getRegister(0, MVT::i32); 880 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 881 ARM_AM::no_shift), 882 SDLoc(Op), MVT::i32); 883 return true; 884 } 885 886 return false; 887 } 888 889 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 890 Base = N; 891 return true; 892 } 893 894 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 895 SDValue &Base, SDValue &Offset, 896 SDValue &Opc) { 897 if (N.getOpcode() == ISD::SUB) { 898 // X - C is canonicalize to X + -C, no need to handle it here. 899 Base = N.getOperand(0); 900 Offset = N.getOperand(1); 901 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 902 MVT::i32); 903 return true; 904 } 905 906 if (!CurDAG->isBaseWithConstantOffset(N)) { 907 Base = N; 908 if (N.getOpcode() == ISD::FrameIndex) { 909 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 910 Base = CurDAG->getTargetFrameIndex( 911 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 912 } 913 Offset = CurDAG->getRegister(0, MVT::i32); 914 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 915 MVT::i32); 916 return true; 917 } 918 919 // If the RHS is +/- imm8, fold into addr mode. 920 int RHSC; 921 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 922 -256 + 1, 256, RHSC)) { // 8 bits. 923 Base = N.getOperand(0); 924 if (Base.getOpcode() == ISD::FrameIndex) { 925 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 926 Base = CurDAG->getTargetFrameIndex( 927 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 928 } 929 Offset = CurDAG->getRegister(0, MVT::i32); 930 931 ARM_AM::AddrOpc AddSub = ARM_AM::add; 932 if (RHSC < 0) { 933 AddSub = ARM_AM::sub; 934 RHSC = -RHSC; 935 } 936 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 937 MVT::i32); 938 return true; 939 } 940 941 Base = N.getOperand(0); 942 Offset = N.getOperand(1); 943 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 944 MVT::i32); 945 return true; 946 } 947 948 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 949 SDValue &Offset, SDValue &Opc) { 950 unsigned Opcode = Op->getOpcode(); 951 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 952 ? cast<LoadSDNode>(Op)->getAddressingMode() 953 : cast<StoreSDNode>(Op)->getAddressingMode(); 954 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 955 ? ARM_AM::add : ARM_AM::sub; 956 int Val; 957 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 958 Offset = CurDAG->getRegister(0, MVT::i32); 959 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 960 MVT::i32); 961 return true; 962 } 963 964 Offset = N; 965 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 966 MVT::i32); 967 return true; 968 } 969 970 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 971 bool FP16) { 972 if (!CurDAG->isBaseWithConstantOffset(N)) { 973 Base = N; 974 if (N.getOpcode() == ISD::FrameIndex) { 975 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 976 Base = CurDAG->getTargetFrameIndex( 977 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 978 } else if (N.getOpcode() == ARMISD::Wrapper && 979 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 980 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 981 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 982 Base = N.getOperand(0); 983 } 984 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 985 SDLoc(N), MVT::i32); 986 return true; 987 } 988 989 // If the RHS is +/- imm8, fold into addr mode. 990 int RHSC; 991 const int Scale = FP16 ? 2 : 4; 992 993 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 994 Base = N.getOperand(0); 995 if (Base.getOpcode() == ISD::FrameIndex) { 996 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 997 Base = CurDAG->getTargetFrameIndex( 998 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 999 } 1000 1001 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1002 if (RHSC < 0) { 1003 AddSub = ARM_AM::sub; 1004 RHSC = -RHSC; 1005 } 1006 1007 if (FP16) 1008 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1009 SDLoc(N), MVT::i32); 1010 else 1011 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1012 SDLoc(N), MVT::i32); 1013 1014 return true; 1015 } 1016 1017 Base = N; 1018 1019 if (FP16) 1020 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1021 SDLoc(N), MVT::i32); 1022 else 1023 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1024 SDLoc(N), MVT::i32); 1025 1026 return true; 1027 } 1028 1029 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1030 SDValue &Base, SDValue &Offset) { 1031 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1032 } 1033 1034 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1035 SDValue &Base, SDValue &Offset) { 1036 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1037 } 1038 1039 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1040 SDValue &Align) { 1041 Addr = N; 1042 1043 unsigned Alignment = 0; 1044 1045 MemSDNode *MemN = cast<MemSDNode>(Parent); 1046 1047 if (isa<LSBaseSDNode>(MemN) || 1048 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1049 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1050 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1051 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1052 // The maximum alignment is equal to the memory size being referenced. 1053 unsigned MMOAlign = MemN->getAlignment(); 1054 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1055 if (MMOAlign >= MemSize && MemSize > 1) 1056 Alignment = MemSize; 1057 } else { 1058 // All other uses of addrmode6 are for intrinsics. For now just record 1059 // the raw alignment value; it will be refined later based on the legal 1060 // alignment operands for the intrinsic. 1061 Alignment = MemN->getAlignment(); 1062 } 1063 1064 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1065 return true; 1066 } 1067 1068 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1069 SDValue &Offset) { 1070 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1071 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1072 if (AM != ISD::POST_INC) 1073 return false; 1074 Offset = N; 1075 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1076 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1077 Offset = CurDAG->getRegister(0, MVT::i32); 1078 } 1079 return true; 1080 } 1081 1082 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1083 SDValue &Offset, SDValue &Label) { 1084 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1085 Offset = N.getOperand(0); 1086 SDValue N1 = N.getOperand(1); 1087 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1088 SDLoc(N), MVT::i32); 1089 return true; 1090 } 1091 1092 return false; 1093 } 1094 1095 1096 //===----------------------------------------------------------------------===// 1097 // Thumb Addressing Modes 1098 //===----------------------------------------------------------------------===// 1099 1100 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1101 // Negative numbers are difficult to materialise in thumb1. If we are 1102 // selecting the add of a negative, instead try to select ri with a zero 1103 // offset, so create the add node directly which will become a sub. 1104 if (N.getOpcode() != ISD::ADD) 1105 return false; 1106 1107 // Look for an imm which is not legal for ld/st, but is legal for sub. 1108 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1109 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1110 1111 return false; 1112 } 1113 1114 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1115 SDValue &Offset) { 1116 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1117 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1118 if (!NC || !NC->isNullValue()) 1119 return false; 1120 1121 Base = Offset = N; 1122 return true; 1123 } 1124 1125 Base = N.getOperand(0); 1126 Offset = N.getOperand(1); 1127 return true; 1128 } 1129 1130 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1131 SDValue &Offset) { 1132 if (shouldUseZeroOffsetLdSt(N)) 1133 return false; // Select ri instead 1134 return SelectThumbAddrModeRRSext(N, Base, Offset); 1135 } 1136 1137 bool 1138 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1139 SDValue &Base, SDValue &OffImm) { 1140 if (shouldUseZeroOffsetLdSt(N)) { 1141 Base = N; 1142 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1143 return true; 1144 } 1145 1146 if (!CurDAG->isBaseWithConstantOffset(N)) { 1147 if (N.getOpcode() == ISD::ADD) { 1148 return false; // We want to select register offset instead 1149 } else if (N.getOpcode() == ARMISD::Wrapper && 1150 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1151 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1152 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1153 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1154 Base = N.getOperand(0); 1155 } else { 1156 Base = N; 1157 } 1158 1159 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1160 return true; 1161 } 1162 1163 // If the RHS is + imm5 * scale, fold into addr mode. 1164 int RHSC; 1165 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1166 Base = N.getOperand(0); 1167 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1168 return true; 1169 } 1170 1171 // Offset is too large, so use register offset instead. 1172 return false; 1173 } 1174 1175 bool 1176 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1177 SDValue &OffImm) { 1178 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1179 } 1180 1181 bool 1182 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1183 SDValue &OffImm) { 1184 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1185 } 1186 1187 bool 1188 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1189 SDValue &OffImm) { 1190 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1191 } 1192 1193 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1194 SDValue &Base, SDValue &OffImm) { 1195 if (N.getOpcode() == ISD::FrameIndex) { 1196 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1197 // Only multiples of 4 are allowed for the offset, so the frame object 1198 // alignment must be at least 4. 1199 MachineFrameInfo &MFI = MF->getFrameInfo(); 1200 if (MFI.getObjectAlign(FI) < Align(4)) 1201 MFI.setObjectAlignment(FI, Align(4)); 1202 Base = CurDAG->getTargetFrameIndex( 1203 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1204 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1205 return true; 1206 } 1207 1208 if (!CurDAG->isBaseWithConstantOffset(N)) 1209 return false; 1210 1211 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1212 // If the RHS is + imm8 * scale, fold into addr mode. 1213 int RHSC; 1214 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1215 Base = N.getOperand(0); 1216 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1217 // Make sure the offset is inside the object, or we might fail to 1218 // allocate an emergency spill slot. (An out-of-range access is UB, but 1219 // it could show up anyway.) 1220 MachineFrameInfo &MFI = MF->getFrameInfo(); 1221 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1222 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1223 // indexed by the LHS must be 4-byte aligned. 1224 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1225 MFI.setObjectAlignment(FI, Align(4)); 1226 if (MFI.getObjectAlign(FI) >= Align(4)) { 1227 Base = CurDAG->getTargetFrameIndex( 1228 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1229 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1230 return true; 1231 } 1232 } 1233 } 1234 } 1235 1236 return false; 1237 } 1238 1239 template <unsigned Shift> 1240 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1241 SDValue &OffImm) { 1242 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1243 int RHSC; 1244 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1245 RHSC)) { 1246 Base = N.getOperand(0); 1247 if (N.getOpcode() == ISD::SUB) 1248 RHSC = -RHSC; 1249 OffImm = 1250 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1251 return true; 1252 } 1253 } 1254 1255 // Base only. 1256 Base = N; 1257 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1258 return true; 1259 } 1260 1261 1262 //===----------------------------------------------------------------------===// 1263 // Thumb 2 Addressing Modes 1264 //===----------------------------------------------------------------------===// 1265 1266 1267 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1268 SDValue &Base, SDValue &OffImm) { 1269 // Match simple R + imm12 operands. 1270 1271 // Base only. 1272 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1273 !CurDAG->isBaseWithConstantOffset(N)) { 1274 if (N.getOpcode() == ISD::FrameIndex) { 1275 // Match frame index. 1276 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1277 Base = CurDAG->getTargetFrameIndex( 1278 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1279 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1280 return true; 1281 } 1282 1283 if (N.getOpcode() == ARMISD::Wrapper && 1284 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1285 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1286 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1287 Base = N.getOperand(0); 1288 if (Base.getOpcode() == ISD::TargetConstantPool) 1289 return false; // We want to select t2LDRpci instead. 1290 } else 1291 Base = N; 1292 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1293 return true; 1294 } 1295 1296 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1297 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1298 // Let t2LDRi8 handle (R - imm8). 1299 return false; 1300 1301 int RHSC = (int)RHS->getZExtValue(); 1302 if (N.getOpcode() == ISD::SUB) 1303 RHSC = -RHSC; 1304 1305 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1306 Base = N.getOperand(0); 1307 if (Base.getOpcode() == ISD::FrameIndex) { 1308 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1309 Base = CurDAG->getTargetFrameIndex( 1310 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1311 } 1312 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1313 return true; 1314 } 1315 } 1316 1317 // Base only. 1318 Base = N; 1319 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1320 return true; 1321 } 1322 1323 template <unsigned Shift> 1324 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1325 SDValue &OffImm) { 1326 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1327 int RHSC; 1328 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1329 Base = N.getOperand(0); 1330 if (Base.getOpcode() == ISD::FrameIndex) { 1331 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1332 Base = CurDAG->getTargetFrameIndex( 1333 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1334 } 1335 1336 if (N.getOpcode() == ISD::SUB) 1337 RHSC = -RHSC; 1338 OffImm = 1339 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1340 return true; 1341 } 1342 } 1343 1344 // Base only. 1345 Base = N; 1346 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1347 return true; 1348 } 1349 1350 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1351 SDValue &Base, SDValue &OffImm) { 1352 // Match simple R - imm8 operands. 1353 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1354 !CurDAG->isBaseWithConstantOffset(N)) 1355 return false; 1356 1357 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1358 int RHSC = (int)RHS->getSExtValue(); 1359 if (N.getOpcode() == ISD::SUB) 1360 RHSC = -RHSC; 1361 1362 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1363 Base = N.getOperand(0); 1364 if (Base.getOpcode() == ISD::FrameIndex) { 1365 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1366 Base = CurDAG->getTargetFrameIndex( 1367 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1368 } 1369 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1370 return true; 1371 } 1372 } 1373 1374 return false; 1375 } 1376 1377 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1378 SDValue &OffImm){ 1379 unsigned Opcode = Op->getOpcode(); 1380 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1381 ? cast<LoadSDNode>(Op)->getAddressingMode() 1382 : cast<StoreSDNode>(Op)->getAddressingMode(); 1383 int RHSC; 1384 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1385 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1386 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1387 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1388 return true; 1389 } 1390 1391 return false; 1392 } 1393 1394 template <unsigned Shift> 1395 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1396 SDValue &OffImm) { 1397 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1398 int RHSC; 1399 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1400 RHSC)) { 1401 Base = N.getOperand(0); 1402 if (Base.getOpcode() == ISD::FrameIndex) { 1403 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1404 Base = CurDAG->getTargetFrameIndex( 1405 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1406 } 1407 1408 if (N.getOpcode() == ISD::SUB) 1409 RHSC = -RHSC; 1410 OffImm = 1411 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1412 return true; 1413 } 1414 } 1415 1416 // Base only. 1417 Base = N; 1418 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1419 return true; 1420 } 1421 1422 template <unsigned Shift> 1423 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1424 SDValue &OffImm) { 1425 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1426 } 1427 1428 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1429 SDValue &OffImm, 1430 unsigned Shift) { 1431 unsigned Opcode = Op->getOpcode(); 1432 ISD::MemIndexedMode AM; 1433 switch (Opcode) { 1434 case ISD::LOAD: 1435 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1436 break; 1437 case ISD::STORE: 1438 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1439 break; 1440 case ISD::MLOAD: 1441 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1442 break; 1443 case ISD::MSTORE: 1444 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1445 break; 1446 default: 1447 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1448 } 1449 1450 int RHSC; 1451 // 7 bit constant, shifted by Shift. 1452 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1453 OffImm = 1454 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1455 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1456 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1457 MVT::i32); 1458 return true; 1459 } 1460 return false; 1461 } 1462 1463 template <int Min, int Max> 1464 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1465 int Val; 1466 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1467 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); 1468 return true; 1469 } 1470 return false; 1471 } 1472 1473 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1474 SDValue &Base, 1475 SDValue &OffReg, SDValue &ShImm) { 1476 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1477 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1478 return false; 1479 1480 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1481 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1482 int RHSC = (int)RHS->getZExtValue(); 1483 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1484 return false; 1485 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1486 return false; 1487 } 1488 1489 // Look for (R + R) or (R + (R << [1,2,3])). 1490 unsigned ShAmt = 0; 1491 Base = N.getOperand(0); 1492 OffReg = N.getOperand(1); 1493 1494 // Swap if it is ((R << c) + R). 1495 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1496 if (ShOpcVal != ARM_AM::lsl) { 1497 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1498 if (ShOpcVal == ARM_AM::lsl) 1499 std::swap(Base, OffReg); 1500 } 1501 1502 if (ShOpcVal == ARM_AM::lsl) { 1503 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1504 // it. 1505 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1506 ShAmt = Sh->getZExtValue(); 1507 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1508 OffReg = OffReg.getOperand(0); 1509 else { 1510 ShAmt = 0; 1511 } 1512 } 1513 } 1514 1515 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1516 // and use it in a shifted operand do so. 1517 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1518 unsigned PowerOfTwo = 0; 1519 SDValue NewMulConst; 1520 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1521 HandleSDNode Handle(OffReg); 1522 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1523 OffReg = Handle.getValue(); 1524 ShAmt = PowerOfTwo; 1525 } 1526 } 1527 1528 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1529 1530 return true; 1531 } 1532 1533 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1534 SDValue &OffImm) { 1535 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1536 // instructions. 1537 Base = N; 1538 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1539 1540 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1541 return true; 1542 1543 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1544 if (!RHS) 1545 return true; 1546 1547 uint32_t RHSC = (int)RHS->getZExtValue(); 1548 if (RHSC > 1020 || RHSC % 4 != 0) 1549 return true; 1550 1551 Base = N.getOperand(0); 1552 if (Base.getOpcode() == ISD::FrameIndex) { 1553 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1554 Base = CurDAG->getTargetFrameIndex( 1555 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1556 } 1557 1558 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1559 return true; 1560 } 1561 1562 //===--------------------------------------------------------------------===// 1563 1564 /// getAL - Returns a ARMCC::AL immediate node. 1565 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1566 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1567 } 1568 1569 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1570 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1571 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1572 } 1573 1574 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1575 LoadSDNode *LD = cast<LoadSDNode>(N); 1576 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1577 if (AM == ISD::UNINDEXED) 1578 return false; 1579 1580 EVT LoadedVT = LD->getMemoryVT(); 1581 SDValue Offset, AMOpc; 1582 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1583 unsigned Opcode = 0; 1584 bool Match = false; 1585 if (LoadedVT == MVT::i32 && isPre && 1586 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1587 Opcode = ARM::LDR_PRE_IMM; 1588 Match = true; 1589 } else if (LoadedVT == MVT::i32 && !isPre && 1590 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1591 Opcode = ARM::LDR_POST_IMM; 1592 Match = true; 1593 } else if (LoadedVT == MVT::i32 && 1594 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1595 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1596 Match = true; 1597 1598 } else if (LoadedVT == MVT::i16 && 1599 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1600 Match = true; 1601 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1602 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1603 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1604 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1605 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1606 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1607 Match = true; 1608 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1609 } 1610 } else { 1611 if (isPre && 1612 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1613 Match = true; 1614 Opcode = ARM::LDRB_PRE_IMM; 1615 } else if (!isPre && 1616 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1617 Match = true; 1618 Opcode = ARM::LDRB_POST_IMM; 1619 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1620 Match = true; 1621 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1622 } 1623 } 1624 } 1625 1626 if (Match) { 1627 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1628 SDValue Chain = LD->getChain(); 1629 SDValue Base = LD->getBasePtr(); 1630 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1631 CurDAG->getRegister(0, MVT::i32), Chain }; 1632 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1633 MVT::Other, Ops); 1634 transferMemOperands(N, New); 1635 ReplaceNode(N, New); 1636 return true; 1637 } else { 1638 SDValue Chain = LD->getChain(); 1639 SDValue Base = LD->getBasePtr(); 1640 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1641 CurDAG->getRegister(0, MVT::i32), Chain }; 1642 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1643 MVT::Other, Ops); 1644 transferMemOperands(N, New); 1645 ReplaceNode(N, New); 1646 return true; 1647 } 1648 } 1649 1650 return false; 1651 } 1652 1653 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1654 LoadSDNode *LD = cast<LoadSDNode>(N); 1655 EVT LoadedVT = LD->getMemoryVT(); 1656 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1657 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1658 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1659 return false; 1660 1661 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1662 if (!COffs || COffs->getZExtValue() != 4) 1663 return false; 1664 1665 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1666 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1667 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1668 // ISel. 1669 SDValue Chain = LD->getChain(); 1670 SDValue Base = LD->getBasePtr(); 1671 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1672 CurDAG->getRegister(0, MVT::i32), Chain }; 1673 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1674 MVT::i32, MVT::Other, Ops); 1675 transferMemOperands(N, New); 1676 ReplaceNode(N, New); 1677 return true; 1678 } 1679 1680 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1681 LoadSDNode *LD = cast<LoadSDNode>(N); 1682 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1683 if (AM == ISD::UNINDEXED) 1684 return false; 1685 1686 EVT LoadedVT = LD->getMemoryVT(); 1687 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1688 SDValue Offset; 1689 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1690 unsigned Opcode = 0; 1691 bool Match = false; 1692 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1693 switch (LoadedVT.getSimpleVT().SimpleTy) { 1694 case MVT::i32: 1695 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1696 break; 1697 case MVT::i16: 1698 if (isSExtLd) 1699 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1700 else 1701 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1702 break; 1703 case MVT::i8: 1704 case MVT::i1: 1705 if (isSExtLd) 1706 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1707 else 1708 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1709 break; 1710 default: 1711 return false; 1712 } 1713 Match = true; 1714 } 1715 1716 if (Match) { 1717 SDValue Chain = LD->getChain(); 1718 SDValue Base = LD->getBasePtr(); 1719 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1720 CurDAG->getRegister(0, MVT::i32), Chain }; 1721 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1722 MVT::Other, Ops); 1723 transferMemOperands(N, New); 1724 ReplaceNode(N, New); 1725 return true; 1726 } 1727 1728 return false; 1729 } 1730 1731 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1732 EVT LoadedVT; 1733 unsigned Opcode = 0; 1734 bool isSExtLd, isPre; 1735 Align Alignment; 1736 ARMVCC::VPTCodes Pred; 1737 SDValue PredReg; 1738 SDValue Chain, Base, Offset; 1739 1740 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1741 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1742 if (AM == ISD::UNINDEXED) 1743 return false; 1744 LoadedVT = LD->getMemoryVT(); 1745 if (!LoadedVT.isVector()) 1746 return false; 1747 1748 Chain = LD->getChain(); 1749 Base = LD->getBasePtr(); 1750 Offset = LD->getOffset(); 1751 Alignment = LD->getAlign(); 1752 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1753 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1754 Pred = ARMVCC::None; 1755 PredReg = CurDAG->getRegister(0, MVT::i32); 1756 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1757 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1758 if (AM == ISD::UNINDEXED) 1759 return false; 1760 LoadedVT = LD->getMemoryVT(); 1761 if (!LoadedVT.isVector()) 1762 return false; 1763 1764 Chain = LD->getChain(); 1765 Base = LD->getBasePtr(); 1766 Offset = LD->getOffset(); 1767 Alignment = LD->getAlign(); 1768 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1769 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1770 Pred = ARMVCC::Then; 1771 PredReg = LD->getMask(); 1772 } else 1773 llvm_unreachable("Expected a Load or a Masked Load!"); 1774 1775 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1776 // as opposed to a vldrw.32). This can allow extra addressing modes or 1777 // alignments for what is otherwise an equivalent instruction. 1778 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1779 1780 SDValue NewOffset; 1781 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && 1782 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1783 if (isSExtLd) 1784 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1785 else 1786 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1787 } else if (LoadedVT == MVT::v8i8 && 1788 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1789 if (isSExtLd) 1790 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1791 else 1792 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1793 } else if (LoadedVT == MVT::v4i8 && 1794 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1795 if (isSExtLd) 1796 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1797 else 1798 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1799 } else if (Alignment >= Align(4) && 1800 (CanChangeType || LoadedVT == MVT::v4i32 || 1801 LoadedVT == MVT::v4f32) && 1802 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1803 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1804 else if (Alignment >= Align(2) && 1805 (CanChangeType || LoadedVT == MVT::v8i16 || 1806 LoadedVT == MVT::v8f16) && 1807 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1808 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1809 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1810 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1811 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1812 else 1813 return false; 1814 1815 SDValue Ops[] = {Base, NewOffset, 1816 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg, 1817 Chain}; 1818 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1819 N->getValueType(0), MVT::Other, Ops); 1820 transferMemOperands(N, New); 1821 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1822 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1823 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1824 CurDAG->RemoveDeadNode(N); 1825 return true; 1826 } 1827 1828 /// Form a GPRPair pseudo register from a pair of GPR regs. 1829 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1830 SDLoc dl(V0.getNode()); 1831 SDValue RegClass = 1832 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1833 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1834 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1835 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1836 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1837 } 1838 1839 /// Form a D register from a pair of S registers. 1840 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1841 SDLoc dl(V0.getNode()); 1842 SDValue RegClass = 1843 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1844 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1845 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1846 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1847 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1848 } 1849 1850 /// Form a quad register from a pair of D registers. 1851 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1852 SDLoc dl(V0.getNode()); 1853 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1854 MVT::i32); 1855 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1856 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1857 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1858 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1859 } 1860 1861 /// Form 4 consecutive D registers from a pair of Q registers. 1862 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1863 SDLoc dl(V0.getNode()); 1864 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1865 MVT::i32); 1866 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1867 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1868 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1869 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1870 } 1871 1872 /// Form 4 consecutive S registers. 1873 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1874 SDValue V2, SDValue V3) { 1875 SDLoc dl(V0.getNode()); 1876 SDValue RegClass = 1877 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1878 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1879 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1880 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1881 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1882 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1883 V2, SubReg2, V3, SubReg3 }; 1884 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1885 } 1886 1887 /// Form 4 consecutive D registers. 1888 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1889 SDValue V2, SDValue V3) { 1890 SDLoc dl(V0.getNode()); 1891 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1892 MVT::i32); 1893 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1894 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1895 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1896 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1897 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1898 V2, SubReg2, V3, SubReg3 }; 1899 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1900 } 1901 1902 /// Form 4 consecutive Q registers. 1903 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1904 SDValue V2, SDValue V3) { 1905 SDLoc dl(V0.getNode()); 1906 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1907 MVT::i32); 1908 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1909 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1910 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1911 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1912 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1913 V2, SubReg2, V3, SubReg3 }; 1914 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1915 } 1916 1917 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1918 /// of a NEON VLD or VST instruction. The supported values depend on the 1919 /// number of registers being loaded. 1920 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1921 unsigned NumVecs, bool is64BitVector) { 1922 unsigned NumRegs = NumVecs; 1923 if (!is64BitVector && NumVecs < 3) 1924 NumRegs *= 2; 1925 1926 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1927 if (Alignment >= 32 && NumRegs == 4) 1928 Alignment = 32; 1929 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1930 Alignment = 16; 1931 else if (Alignment >= 8) 1932 Alignment = 8; 1933 else 1934 Alignment = 0; 1935 1936 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1937 } 1938 1939 static bool isVLDfixed(unsigned Opc) 1940 { 1941 switch (Opc) { 1942 default: return false; 1943 case ARM::VLD1d8wb_fixed : return true; 1944 case ARM::VLD1d16wb_fixed : return true; 1945 case ARM::VLD1d64Qwb_fixed : return true; 1946 case ARM::VLD1d32wb_fixed : return true; 1947 case ARM::VLD1d64wb_fixed : return true; 1948 case ARM::VLD1d8TPseudoWB_fixed : return true; 1949 case ARM::VLD1d16TPseudoWB_fixed : return true; 1950 case ARM::VLD1d32TPseudoWB_fixed : return true; 1951 case ARM::VLD1d64TPseudoWB_fixed : return true; 1952 case ARM::VLD1d8QPseudoWB_fixed : return true; 1953 case ARM::VLD1d16QPseudoWB_fixed : return true; 1954 case ARM::VLD1d32QPseudoWB_fixed : return true; 1955 case ARM::VLD1d64QPseudoWB_fixed : return true; 1956 case ARM::VLD1q8wb_fixed : return true; 1957 case ARM::VLD1q16wb_fixed : return true; 1958 case ARM::VLD1q32wb_fixed : return true; 1959 case ARM::VLD1q64wb_fixed : return true; 1960 case ARM::VLD1DUPd8wb_fixed : return true; 1961 case ARM::VLD1DUPd16wb_fixed : return true; 1962 case ARM::VLD1DUPd32wb_fixed : return true; 1963 case ARM::VLD1DUPq8wb_fixed : return true; 1964 case ARM::VLD1DUPq16wb_fixed : return true; 1965 case ARM::VLD1DUPq32wb_fixed : return true; 1966 case ARM::VLD2d8wb_fixed : return true; 1967 case ARM::VLD2d16wb_fixed : return true; 1968 case ARM::VLD2d32wb_fixed : return true; 1969 case ARM::VLD2q8PseudoWB_fixed : return true; 1970 case ARM::VLD2q16PseudoWB_fixed : return true; 1971 case ARM::VLD2q32PseudoWB_fixed : return true; 1972 case ARM::VLD2DUPd8wb_fixed : return true; 1973 case ARM::VLD2DUPd16wb_fixed : return true; 1974 case ARM::VLD2DUPd32wb_fixed : return true; 1975 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true; 1976 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true; 1977 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true; 1978 } 1979 } 1980 1981 static bool isVSTfixed(unsigned Opc) 1982 { 1983 switch (Opc) { 1984 default: return false; 1985 case ARM::VST1d8wb_fixed : return true; 1986 case ARM::VST1d16wb_fixed : return true; 1987 case ARM::VST1d32wb_fixed : return true; 1988 case ARM::VST1d64wb_fixed : return true; 1989 case ARM::VST1q8wb_fixed : return true; 1990 case ARM::VST1q16wb_fixed : return true; 1991 case ARM::VST1q32wb_fixed : return true; 1992 case ARM::VST1q64wb_fixed : return true; 1993 case ARM::VST1d8TPseudoWB_fixed : return true; 1994 case ARM::VST1d16TPseudoWB_fixed : return true; 1995 case ARM::VST1d32TPseudoWB_fixed : return true; 1996 case ARM::VST1d64TPseudoWB_fixed : return true; 1997 case ARM::VST1d8QPseudoWB_fixed : return true; 1998 case ARM::VST1d16QPseudoWB_fixed : return true; 1999 case ARM::VST1d32QPseudoWB_fixed : return true; 2000 case ARM::VST1d64QPseudoWB_fixed : return true; 2001 case ARM::VST2d8wb_fixed : return true; 2002 case ARM::VST2d16wb_fixed : return true; 2003 case ARM::VST2d32wb_fixed : return true; 2004 case ARM::VST2q8PseudoWB_fixed : return true; 2005 case ARM::VST2q16PseudoWB_fixed : return true; 2006 case ARM::VST2q32PseudoWB_fixed : return true; 2007 } 2008 } 2009 2010 // Get the register stride update opcode of a VLD/VST instruction that 2011 // is otherwise equivalent to the given fixed stride updating instruction. 2012 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 2013 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 2014 && "Incorrect fixed stride updating instruction."); 2015 switch (Opc) { 2016 default: break; 2017 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 2018 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 2019 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 2020 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2021 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2022 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2023 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2024 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2025 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2026 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2027 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register; 2028 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register; 2029 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register; 2030 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2031 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register; 2032 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register; 2033 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register; 2034 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2035 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2036 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2037 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2038 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2039 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2040 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2041 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register; 2042 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register; 2043 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register; 2044 2045 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2046 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2047 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2048 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2049 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2050 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2051 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2052 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2053 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register; 2054 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register; 2055 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register; 2056 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2057 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register; 2058 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register; 2059 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register; 2060 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2061 2062 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2063 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2064 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2065 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2066 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2067 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2068 2069 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2070 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2071 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2072 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2073 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2074 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2075 2076 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2077 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2078 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2079 } 2080 return Opc; // If not one we handle, return it unchanged. 2081 } 2082 2083 /// Returns true if the given increment is a Constant known to be equal to the 2084 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2085 /// be used. 2086 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2087 auto C = dyn_cast<ConstantSDNode>(Inc); 2088 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2089 } 2090 2091 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2092 const uint16_t *DOpcodes, 2093 const uint16_t *QOpcodes0, 2094 const uint16_t *QOpcodes1) { 2095 assert(Subtarget->hasNEON()); 2096 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2097 SDLoc dl(N); 2098 2099 SDValue MemAddr, Align; 2100 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2101 // nodes are not intrinsics. 2102 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2103 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2104 return; 2105 2106 SDValue Chain = N->getOperand(0); 2107 EVT VT = N->getValueType(0); 2108 bool is64BitVector = VT.is64BitVector(); 2109 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2110 2111 unsigned OpcodeIndex; 2112 switch (VT.getSimpleVT().SimpleTy) { 2113 default: llvm_unreachable("unhandled vld type"); 2114 // Double-register operations: 2115 case MVT::v8i8: OpcodeIndex = 0; break; 2116 case MVT::v4f16: 2117 case MVT::v4bf16: 2118 case MVT::v4i16: OpcodeIndex = 1; break; 2119 case MVT::v2f32: 2120 case MVT::v2i32: OpcodeIndex = 2; break; 2121 case MVT::v1i64: OpcodeIndex = 3; break; 2122 // Quad-register operations: 2123 case MVT::v16i8: OpcodeIndex = 0; break; 2124 case MVT::v8f16: 2125 case MVT::v8bf16: 2126 case MVT::v8i16: OpcodeIndex = 1; break; 2127 case MVT::v4f32: 2128 case MVT::v4i32: OpcodeIndex = 2; break; 2129 case MVT::v2f64: 2130 case MVT::v2i64: OpcodeIndex = 3; break; 2131 } 2132 2133 EVT ResTy; 2134 if (NumVecs == 1) 2135 ResTy = VT; 2136 else { 2137 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2138 if (!is64BitVector) 2139 ResTyElts *= 2; 2140 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2141 } 2142 std::vector<EVT> ResTys; 2143 ResTys.push_back(ResTy); 2144 if (isUpdating) 2145 ResTys.push_back(MVT::i32); 2146 ResTys.push_back(MVT::Other); 2147 2148 SDValue Pred = getAL(CurDAG, dl); 2149 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2150 SDNode *VLd; 2151 SmallVector<SDValue, 7> Ops; 2152 2153 // Double registers and VLD1/VLD2 quad registers are directly supported. 2154 if (is64BitVector || NumVecs <= 2) { 2155 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2156 QOpcodes0[OpcodeIndex]); 2157 Ops.push_back(MemAddr); 2158 Ops.push_back(Align); 2159 if (isUpdating) { 2160 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2161 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2162 if (!IsImmUpdate) { 2163 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2164 // check for the opcode rather than the number of vector elements. 2165 if (isVLDfixed(Opc)) 2166 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2167 Ops.push_back(Inc); 2168 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2169 // the operands if not such an opcode. 2170 } else if (!isVLDfixed(Opc)) 2171 Ops.push_back(Reg0); 2172 } 2173 Ops.push_back(Pred); 2174 Ops.push_back(Reg0); 2175 Ops.push_back(Chain); 2176 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2177 2178 } else { 2179 // Otherwise, quad registers are loaded with two separate instructions, 2180 // where one loads the even registers and the other loads the odd registers. 2181 EVT AddrTy = MemAddr.getValueType(); 2182 2183 // Load the even subregs. This is always an updating load, so that it 2184 // provides the address to the second load for the odd subregs. 2185 SDValue ImplDef = 2186 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2187 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2188 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2189 ResTy, AddrTy, MVT::Other, OpsA); 2190 Chain = SDValue(VLdA, 2); 2191 2192 // Load the odd subregs. 2193 Ops.push_back(SDValue(VLdA, 1)); 2194 Ops.push_back(Align); 2195 if (isUpdating) { 2196 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2197 assert(isa<ConstantSDNode>(Inc.getNode()) && 2198 "only constant post-increment update allowed for VLD3/4"); 2199 (void)Inc; 2200 Ops.push_back(Reg0); 2201 } 2202 Ops.push_back(SDValue(VLdA, 0)); 2203 Ops.push_back(Pred); 2204 Ops.push_back(Reg0); 2205 Ops.push_back(Chain); 2206 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2207 } 2208 2209 // Transfer memoperands. 2210 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2211 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2212 2213 if (NumVecs == 1) { 2214 ReplaceNode(N, VLd); 2215 return; 2216 } 2217 2218 // Extract out the subregisters. 2219 SDValue SuperReg = SDValue(VLd, 0); 2220 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2221 ARM::qsub_3 == ARM::qsub_0 + 3, 2222 "Unexpected subreg numbering"); 2223 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2224 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2225 ReplaceUses(SDValue(N, Vec), 2226 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2227 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2228 if (isUpdating) 2229 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2230 CurDAG->RemoveDeadNode(N); 2231 } 2232 2233 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2234 const uint16_t *DOpcodes, 2235 const uint16_t *QOpcodes0, 2236 const uint16_t *QOpcodes1) { 2237 assert(Subtarget->hasNEON()); 2238 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2239 SDLoc dl(N); 2240 2241 SDValue MemAddr, Align; 2242 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2243 // nodes are not intrinsics. 2244 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2245 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2246 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2247 return; 2248 2249 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2250 2251 SDValue Chain = N->getOperand(0); 2252 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2253 bool is64BitVector = VT.is64BitVector(); 2254 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2255 2256 unsigned OpcodeIndex; 2257 switch (VT.getSimpleVT().SimpleTy) { 2258 default: llvm_unreachable("unhandled vst type"); 2259 // Double-register operations: 2260 case MVT::v8i8: OpcodeIndex = 0; break; 2261 case MVT::v4f16: 2262 case MVT::v4bf16: 2263 case MVT::v4i16: OpcodeIndex = 1; break; 2264 case MVT::v2f32: 2265 case MVT::v2i32: OpcodeIndex = 2; break; 2266 case MVT::v1i64: OpcodeIndex = 3; break; 2267 // Quad-register operations: 2268 case MVT::v16i8: OpcodeIndex = 0; break; 2269 case MVT::v8f16: 2270 case MVT::v8bf16: 2271 case MVT::v8i16: OpcodeIndex = 1; break; 2272 case MVT::v4f32: 2273 case MVT::v4i32: OpcodeIndex = 2; break; 2274 case MVT::v2f64: 2275 case MVT::v2i64: OpcodeIndex = 3; break; 2276 } 2277 2278 std::vector<EVT> ResTys; 2279 if (isUpdating) 2280 ResTys.push_back(MVT::i32); 2281 ResTys.push_back(MVT::Other); 2282 2283 SDValue Pred = getAL(CurDAG, dl); 2284 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2285 SmallVector<SDValue, 7> Ops; 2286 2287 // Double registers and VST1/VST2 quad registers are directly supported. 2288 if (is64BitVector || NumVecs <= 2) { 2289 SDValue SrcReg; 2290 if (NumVecs == 1) { 2291 SrcReg = N->getOperand(Vec0Idx); 2292 } else if (is64BitVector) { 2293 // Form a REG_SEQUENCE to force register allocation. 2294 SDValue V0 = N->getOperand(Vec0Idx + 0); 2295 SDValue V1 = N->getOperand(Vec0Idx + 1); 2296 if (NumVecs == 2) 2297 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2298 else { 2299 SDValue V2 = N->getOperand(Vec0Idx + 2); 2300 // If it's a vst3, form a quad D-register and leave the last part as 2301 // an undef. 2302 SDValue V3 = (NumVecs == 3) 2303 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2304 : N->getOperand(Vec0Idx + 3); 2305 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2306 } 2307 } else { 2308 // Form a QQ register. 2309 SDValue Q0 = N->getOperand(Vec0Idx); 2310 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2311 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2312 } 2313 2314 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2315 QOpcodes0[OpcodeIndex]); 2316 Ops.push_back(MemAddr); 2317 Ops.push_back(Align); 2318 if (isUpdating) { 2319 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2320 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2321 if (!IsImmUpdate) { 2322 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2323 // check for the opcode rather than the number of vector elements. 2324 if (isVSTfixed(Opc)) 2325 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2326 Ops.push_back(Inc); 2327 } 2328 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2329 // the operands if not such an opcode. 2330 else if (!isVSTfixed(Opc)) 2331 Ops.push_back(Reg0); 2332 } 2333 Ops.push_back(SrcReg); 2334 Ops.push_back(Pred); 2335 Ops.push_back(Reg0); 2336 Ops.push_back(Chain); 2337 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2338 2339 // Transfer memoperands. 2340 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2341 2342 ReplaceNode(N, VSt); 2343 return; 2344 } 2345 2346 // Otherwise, quad registers are stored with two separate instructions, 2347 // where one stores the even registers and the other stores the odd registers. 2348 2349 // Form the QQQQ REG_SEQUENCE. 2350 SDValue V0 = N->getOperand(Vec0Idx + 0); 2351 SDValue V1 = N->getOperand(Vec0Idx + 1); 2352 SDValue V2 = N->getOperand(Vec0Idx + 2); 2353 SDValue V3 = (NumVecs == 3) 2354 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2355 : N->getOperand(Vec0Idx + 3); 2356 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2357 2358 // Store the even D registers. This is always an updating store, so that it 2359 // provides the address to the second store for the odd subregs. 2360 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2361 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2362 MemAddr.getValueType(), 2363 MVT::Other, OpsA); 2364 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2365 Chain = SDValue(VStA, 1); 2366 2367 // Store the odd D registers. 2368 Ops.push_back(SDValue(VStA, 0)); 2369 Ops.push_back(Align); 2370 if (isUpdating) { 2371 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2372 assert(isa<ConstantSDNode>(Inc.getNode()) && 2373 "only constant post-increment update allowed for VST3/4"); 2374 (void)Inc; 2375 Ops.push_back(Reg0); 2376 } 2377 Ops.push_back(RegSeq); 2378 Ops.push_back(Pred); 2379 Ops.push_back(Reg0); 2380 Ops.push_back(Chain); 2381 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2382 Ops); 2383 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2384 ReplaceNode(N, VStB); 2385 } 2386 2387 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2388 unsigned NumVecs, 2389 const uint16_t *DOpcodes, 2390 const uint16_t *QOpcodes) { 2391 assert(Subtarget->hasNEON()); 2392 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2393 SDLoc dl(N); 2394 2395 SDValue MemAddr, Align; 2396 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2397 // nodes are not intrinsics. 2398 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2399 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2400 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2401 return; 2402 2403 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2404 2405 SDValue Chain = N->getOperand(0); 2406 unsigned Lane = 2407 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2408 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2409 bool is64BitVector = VT.is64BitVector(); 2410 2411 unsigned Alignment = 0; 2412 if (NumVecs != 3) { 2413 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2414 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2415 if (Alignment > NumBytes) 2416 Alignment = NumBytes; 2417 if (Alignment < 8 && Alignment < NumBytes) 2418 Alignment = 0; 2419 // Alignment must be a power of two; make sure of that. 2420 Alignment = (Alignment & -Alignment); 2421 if (Alignment == 1) 2422 Alignment = 0; 2423 } 2424 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2425 2426 unsigned OpcodeIndex; 2427 switch (VT.getSimpleVT().SimpleTy) { 2428 default: llvm_unreachable("unhandled vld/vst lane type"); 2429 // Double-register operations: 2430 case MVT::v8i8: OpcodeIndex = 0; break; 2431 case MVT::v4f16: 2432 case MVT::v4bf16: 2433 case MVT::v4i16: OpcodeIndex = 1; break; 2434 case MVT::v2f32: 2435 case MVT::v2i32: OpcodeIndex = 2; break; 2436 // Quad-register operations: 2437 case MVT::v8f16: 2438 case MVT::v8bf16: 2439 case MVT::v8i16: OpcodeIndex = 0; break; 2440 case MVT::v4f32: 2441 case MVT::v4i32: OpcodeIndex = 1; break; 2442 } 2443 2444 std::vector<EVT> ResTys; 2445 if (IsLoad) { 2446 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2447 if (!is64BitVector) 2448 ResTyElts *= 2; 2449 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2450 MVT::i64, ResTyElts)); 2451 } 2452 if (isUpdating) 2453 ResTys.push_back(MVT::i32); 2454 ResTys.push_back(MVT::Other); 2455 2456 SDValue Pred = getAL(CurDAG, dl); 2457 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2458 2459 SmallVector<SDValue, 8> Ops; 2460 Ops.push_back(MemAddr); 2461 Ops.push_back(Align); 2462 if (isUpdating) { 2463 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2464 bool IsImmUpdate = 2465 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2466 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2467 } 2468 2469 SDValue SuperReg; 2470 SDValue V0 = N->getOperand(Vec0Idx + 0); 2471 SDValue V1 = N->getOperand(Vec0Idx + 1); 2472 if (NumVecs == 2) { 2473 if (is64BitVector) 2474 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2475 else 2476 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2477 } else { 2478 SDValue V2 = N->getOperand(Vec0Idx + 2); 2479 SDValue V3 = (NumVecs == 3) 2480 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2481 : N->getOperand(Vec0Idx + 3); 2482 if (is64BitVector) 2483 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2484 else 2485 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2486 } 2487 Ops.push_back(SuperReg); 2488 Ops.push_back(getI32Imm(Lane, dl)); 2489 Ops.push_back(Pred); 2490 Ops.push_back(Reg0); 2491 Ops.push_back(Chain); 2492 2493 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2494 QOpcodes[OpcodeIndex]); 2495 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2496 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2497 if (!IsLoad) { 2498 ReplaceNode(N, VLdLn); 2499 return; 2500 } 2501 2502 // Extract the subregisters. 2503 SuperReg = SDValue(VLdLn, 0); 2504 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2505 ARM::qsub_3 == ARM::qsub_0 + 3, 2506 "Unexpected subreg numbering"); 2507 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2508 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2509 ReplaceUses(SDValue(N, Vec), 2510 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2511 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2512 if (isUpdating) 2513 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2514 CurDAG->RemoveDeadNode(N); 2515 } 2516 2517 template <typename SDValueVector> 2518 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2519 SDValue PredicateMask) { 2520 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2521 Ops.push_back(PredicateMask); 2522 } 2523 2524 template <typename SDValueVector> 2525 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2526 SDValue PredicateMask, 2527 SDValue Inactive) { 2528 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2529 Ops.push_back(PredicateMask); 2530 Ops.push_back(Inactive); 2531 } 2532 2533 template <typename SDValueVector> 2534 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2535 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2536 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2537 } 2538 2539 template <typename SDValueVector> 2540 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2541 EVT InactiveTy) { 2542 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2543 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2544 Ops.push_back(SDValue( 2545 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2546 } 2547 2548 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2549 bool Predicated) { 2550 SDLoc Loc(N); 2551 SmallVector<SDValue, 8> Ops; 2552 2553 uint16_t Opcode; 2554 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2555 case 32: 2556 Opcode = Opcodes[0]; 2557 break; 2558 case 64: 2559 Opcode = Opcodes[1]; 2560 break; 2561 default: 2562 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2563 } 2564 2565 Ops.push_back(N->getOperand(2)); // vector of base addresses 2566 2567 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2568 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2569 2570 if (Predicated) 2571 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2572 else 2573 AddEmptyMVEPredicateToOps(Ops, Loc); 2574 2575 Ops.push_back(N->getOperand(0)); // chain 2576 2577 SmallVector<EVT, 8> VTs; 2578 VTs.push_back(N->getValueType(1)); 2579 VTs.push_back(N->getValueType(0)); 2580 VTs.push_back(N->getValueType(2)); 2581 2582 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2583 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2584 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2585 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2586 transferMemOperands(N, New); 2587 CurDAG->RemoveDeadNode(N); 2588 } 2589 2590 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2591 bool Immediate, 2592 bool HasSaturationOperand) { 2593 SDLoc Loc(N); 2594 SmallVector<SDValue, 8> Ops; 2595 2596 // Two 32-bit halves of the value to be shifted 2597 Ops.push_back(N->getOperand(1)); 2598 Ops.push_back(N->getOperand(2)); 2599 2600 // The shift count 2601 if (Immediate) { 2602 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2603 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2604 } else { 2605 Ops.push_back(N->getOperand(3)); 2606 } 2607 2608 // The immediate saturation operand, if any 2609 if (HasSaturationOperand) { 2610 int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); 2611 int SatBit = (SatOp == 64 ? 0 : 1); 2612 Ops.push_back(getI32Imm(SatBit, Loc)); 2613 } 2614 2615 // MVE scalar shifts are IT-predicable, so include the standard 2616 // predicate arguments. 2617 Ops.push_back(getAL(CurDAG, Loc)); 2618 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2619 2620 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2621 } 2622 2623 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2624 uint16_t OpcodeWithNoCarry, 2625 bool Add, bool Predicated) { 2626 SDLoc Loc(N); 2627 SmallVector<SDValue, 8> Ops; 2628 uint16_t Opcode; 2629 2630 unsigned FirstInputOp = Predicated ? 2 : 1; 2631 2632 // Two input vectors and the input carry flag 2633 Ops.push_back(N->getOperand(FirstInputOp)); 2634 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2635 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2636 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2637 uint32_t CarryMask = 1 << 29; 2638 uint32_t CarryExpected = Add ? 0 : CarryMask; 2639 if (CarryInConstant && 2640 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2641 Opcode = OpcodeWithNoCarry; 2642 } else { 2643 Ops.push_back(CarryIn); 2644 Opcode = OpcodeWithCarry; 2645 } 2646 2647 if (Predicated) 2648 AddMVEPredicateToOps(Ops, Loc, 2649 N->getOperand(FirstInputOp + 3), // predicate 2650 N->getOperand(FirstInputOp - 1)); // inactive 2651 else 2652 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2653 2654 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2655 } 2656 2657 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2658 SDLoc Loc(N); 2659 SmallVector<SDValue, 8> Ops; 2660 2661 // One vector input, followed by a 32-bit word of bits to shift in 2662 // and then an immediate shift count 2663 Ops.push_back(N->getOperand(1)); 2664 Ops.push_back(N->getOperand(2)); 2665 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2666 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2667 2668 if (Predicated) 2669 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2670 else 2671 AddEmptyMVEPredicateToOps(Ops, Loc); 2672 2673 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops)); 2674 } 2675 2676 static bool SDValueToConstBool(SDValue SDVal) { 2677 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2678 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2679 uint64_t Value = SDValConstant->getZExtValue(); 2680 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2681 return Value; 2682 } 2683 2684 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2685 const uint16_t *OpcodesS, 2686 const uint16_t *OpcodesU, 2687 size_t Stride, size_t TySize) { 2688 assert(TySize < Stride && "Invalid TySize"); 2689 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2690 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2691 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2692 if (IsUnsigned) { 2693 assert(!IsSub && 2694 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2695 assert(!IsExchange && 2696 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2697 } 2698 2699 auto OpIsZero = [N](size_t OpNo) { 2700 if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo))) 2701 if (OpConst->getZExtValue() == 0) 2702 return true; 2703 return false; 2704 }; 2705 2706 // If the input accumulator value is not zero, select an instruction with 2707 // accumulator, otherwise select an instruction without accumulator 2708 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2709 2710 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2711 if (IsSub) 2712 Opcodes += 4 * Stride; 2713 if (IsExchange) 2714 Opcodes += 2 * Stride; 2715 if (IsAccum) 2716 Opcodes += Stride; 2717 uint16_t Opcode = Opcodes[TySize]; 2718 2719 SDLoc Loc(N); 2720 SmallVector<SDValue, 8> Ops; 2721 // Push the accumulator operands, if they are used 2722 if (IsAccum) { 2723 Ops.push_back(N->getOperand(4)); 2724 Ops.push_back(N->getOperand(5)); 2725 } 2726 // Push the two vector operands 2727 Ops.push_back(N->getOperand(6)); 2728 Ops.push_back(N->getOperand(7)); 2729 2730 if (Predicated) 2731 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2732 else 2733 AddEmptyMVEPredicateToOps(Ops, Loc); 2734 2735 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2736 } 2737 2738 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2739 const uint16_t *OpcodesS, 2740 const uint16_t *OpcodesU) { 2741 EVT VecTy = N->getOperand(6).getValueType(); 2742 size_t SizeIndex; 2743 switch (VecTy.getVectorElementType().getSizeInBits()) { 2744 case 16: 2745 SizeIndex = 0; 2746 break; 2747 case 32: 2748 SizeIndex = 1; 2749 break; 2750 default: 2751 llvm_unreachable("bad vector element size"); 2752 } 2753 2754 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2755 } 2756 2757 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2758 const uint16_t *OpcodesS, 2759 const uint16_t *OpcodesU) { 2760 assert( 2761 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2762 32 && 2763 "bad vector element size"); 2764 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2765 } 2766 2767 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2768 const uint16_t *const *Opcodes, 2769 bool HasWriteback) { 2770 EVT VT = N->getValueType(0); 2771 SDLoc Loc(N); 2772 2773 const uint16_t *OurOpcodes; 2774 switch (VT.getVectorElementType().getSizeInBits()) { 2775 case 8: 2776 OurOpcodes = Opcodes[0]; 2777 break; 2778 case 16: 2779 OurOpcodes = Opcodes[1]; 2780 break; 2781 case 32: 2782 OurOpcodes = Opcodes[2]; 2783 break; 2784 default: 2785 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2786 } 2787 2788 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2789 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2790 unsigned PtrOperand = HasWriteback ? 1 : 2; 2791 2792 auto Data = SDValue( 2793 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2794 SDValue Chain = N->getOperand(0); 2795 // Add a MVE_VLDn instruction for each Vec, except the last 2796 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2797 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2798 auto LoadInst = 2799 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2800 Data = SDValue(LoadInst, 0); 2801 Chain = SDValue(LoadInst, 1); 2802 transferMemOperands(N, LoadInst); 2803 } 2804 // The last may need a writeback on it 2805 if (HasWriteback) 2806 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2807 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2808 auto LoadInst = 2809 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2810 transferMemOperands(N, LoadInst); 2811 2812 unsigned i; 2813 for (i = 0; i < NumVecs; i++) 2814 ReplaceUses(SDValue(N, i), 2815 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2816 SDValue(LoadInst, 0))); 2817 if (HasWriteback) 2818 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2819 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2820 CurDAG->RemoveDeadNode(N); 2821 } 2822 2823 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2824 bool Wrapping, bool Predicated) { 2825 EVT VT = N->getValueType(0); 2826 SDLoc Loc(N); 2827 2828 uint16_t Opcode; 2829 switch (VT.getScalarSizeInBits()) { 2830 case 8: 2831 Opcode = Opcodes[0]; 2832 break; 2833 case 16: 2834 Opcode = Opcodes[1]; 2835 break; 2836 case 32: 2837 Opcode = Opcodes[2]; 2838 break; 2839 default: 2840 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2841 } 2842 2843 SmallVector<SDValue, 8> Ops; 2844 unsigned OpIdx = 1; 2845 2846 SDValue Inactive; 2847 if (Predicated) 2848 Inactive = N->getOperand(OpIdx++); 2849 2850 Ops.push_back(N->getOperand(OpIdx++)); // base 2851 if (Wrapping) 2852 Ops.push_back(N->getOperand(OpIdx++)); // limit 2853 2854 SDValue ImmOp = N->getOperand(OpIdx++); // step 2855 int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue(); 2856 Ops.push_back(getI32Imm(ImmValue, Loc)); 2857 2858 if (Predicated) 2859 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2860 else 2861 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2862 2863 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2864 } 2865 2866 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2867 size_t NumExtraOps, bool HasAccum) { 2868 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2869 SDLoc Loc(N); 2870 SmallVector<SDValue, 8> Ops; 2871 2872 unsigned OpIdx = 1; 2873 2874 // Convert and append the immediate operand designating the coprocessor. 2875 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2876 uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue(); 2877 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2878 2879 // For accumulating variants copy the low and high order parts of the 2880 // accumulator into a register pair and add it to the operand vector. 2881 if (HasAccum) { 2882 SDValue AccLo = N->getOperand(OpIdx++); 2883 SDValue AccHi = N->getOperand(OpIdx++); 2884 if (IsBigEndian) 2885 std::swap(AccLo, AccHi); 2886 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2887 } 2888 2889 // Copy extra operands as-is. 2890 for (size_t I = 0; I < NumExtraOps; I++) 2891 Ops.push_back(N->getOperand(OpIdx++)); 2892 2893 // Convert and append the immediate operand 2894 SDValue Imm = N->getOperand(OpIdx); 2895 uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue(); 2896 Ops.push_back(getI32Imm(ImmVal, Loc)); 2897 2898 // Accumulating variants are IT-predicable, add predicate operands. 2899 if (HasAccum) { 2900 SDValue Pred = getAL(CurDAG, Loc); 2901 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2902 Ops.push_back(Pred); 2903 Ops.push_back(PredReg); 2904 } 2905 2906 // Create the CDE intruction 2907 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2908 SDValue ResultPair = SDValue(InstrNode, 0); 2909 2910 // The original intrinsic had two outputs, and the output of the dual-register 2911 // CDE instruction is a register pair. We need to extract the two subregisters 2912 // and replace all uses of the original outputs with the extracted 2913 // subregisters. 2914 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2915 if (IsBigEndian) 2916 std::swap(SubRegs[0], SubRegs[1]); 2917 2918 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2919 if (SDValue(N, ResIdx).use_empty()) 2920 continue; 2921 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2922 MVT::i32, ResultPair); 2923 ReplaceUses(SDValue(N, ResIdx), SubReg); 2924 } 2925 2926 CurDAG->RemoveDeadNode(N); 2927 } 2928 2929 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2930 bool isUpdating, unsigned NumVecs, 2931 const uint16_t *DOpcodes, 2932 const uint16_t *QOpcodes0, 2933 const uint16_t *QOpcodes1) { 2934 assert(Subtarget->hasNEON()); 2935 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2936 SDLoc dl(N); 2937 2938 SDValue MemAddr, Align; 2939 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2940 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2941 return; 2942 2943 SDValue Chain = N->getOperand(0); 2944 EVT VT = N->getValueType(0); 2945 bool is64BitVector = VT.is64BitVector(); 2946 2947 unsigned Alignment = 0; 2948 if (NumVecs != 3) { 2949 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2950 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2951 if (Alignment > NumBytes) 2952 Alignment = NumBytes; 2953 if (Alignment < 8 && Alignment < NumBytes) 2954 Alignment = 0; 2955 // Alignment must be a power of two; make sure of that. 2956 Alignment = (Alignment & -Alignment); 2957 if (Alignment == 1) 2958 Alignment = 0; 2959 } 2960 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2961 2962 unsigned OpcodeIndex; 2963 switch (VT.getSimpleVT().SimpleTy) { 2964 default: llvm_unreachable("unhandled vld-dup type"); 2965 case MVT::v8i8: 2966 case MVT::v16i8: OpcodeIndex = 0; break; 2967 case MVT::v4i16: 2968 case MVT::v8i16: 2969 case MVT::v4f16: 2970 case MVT::v8f16: 2971 case MVT::v4bf16: 2972 case MVT::v8bf16: 2973 OpcodeIndex = 1; break; 2974 case MVT::v2f32: 2975 case MVT::v2i32: 2976 case MVT::v4f32: 2977 case MVT::v4i32: OpcodeIndex = 2; break; 2978 case MVT::v1f64: 2979 case MVT::v1i64: OpcodeIndex = 3; break; 2980 } 2981 2982 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2983 if (!is64BitVector) 2984 ResTyElts *= 2; 2985 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2986 2987 std::vector<EVT> ResTys; 2988 ResTys.push_back(ResTy); 2989 if (isUpdating) 2990 ResTys.push_back(MVT::i32); 2991 ResTys.push_back(MVT::Other); 2992 2993 SDValue Pred = getAL(CurDAG, dl); 2994 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2995 2996 SmallVector<SDValue, 6> Ops; 2997 Ops.push_back(MemAddr); 2998 Ops.push_back(Align); 2999 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] 3000 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex] 3001 : QOpcodes1[OpcodeIndex]; 3002 if (isUpdating) { 3003 SDValue Inc = N->getOperand(2); 3004 bool IsImmUpdate = 3005 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 3006 if (IsImmUpdate) { 3007 if (!isVLDfixed(Opc)) 3008 Ops.push_back(Reg0); 3009 } else { 3010 if (isVLDfixed(Opc)) 3011 Opc = getVLDSTRegisterUpdateOpcode(Opc); 3012 Ops.push_back(Inc); 3013 } 3014 } 3015 if (is64BitVector || NumVecs == 1) { 3016 // Double registers and VLD1 quad registers are directly supported. 3017 } else if (NumVecs == 2) { 3018 const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain}; 3019 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3020 MVT::Other, OpsA); 3021 Chain = SDValue(VLdA, 1); 3022 } else { 3023 SDValue ImplDef = SDValue( 3024 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 3025 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain}; 3026 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3027 MVT::Other, OpsA); 3028 Ops.push_back(SDValue(VLdA, 0)); 3029 Chain = SDValue(VLdA, 1); 3030 } 3031 3032 Ops.push_back(Pred); 3033 Ops.push_back(Reg0); 3034 Ops.push_back(Chain); 3035 3036 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 3037 3038 // Transfer memoperands. 3039 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3040 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 3041 3042 // Extract the subregisters. 3043 if (NumVecs == 1) { 3044 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3045 } else { 3046 SDValue SuperReg = SDValue(VLdDup, 0); 3047 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3048 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3049 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3050 ReplaceUses(SDValue(N, Vec), 3051 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3052 } 3053 } 3054 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3055 if (isUpdating) 3056 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3057 CurDAG->RemoveDeadNode(N); 3058 } 3059 3060 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { 3061 if (!Subtarget->hasMVEIntegerOps()) 3062 return false; 3063 3064 SDLoc dl(N); 3065 3066 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and 3067 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent 3068 // inserts of the correct type: 3069 SDValue Ins1 = SDValue(N, 0); 3070 SDValue Ins2 = N->getOperand(0); 3071 EVT VT = Ins1.getValueType(); 3072 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() || 3073 !isa<ConstantSDNode>(Ins1.getOperand(2)) || 3074 !isa<ConstantSDNode>(Ins2.getOperand(2)) || 3075 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT)) 3076 return false; 3077 3078 unsigned Lane1 = Ins1.getConstantOperandVal(2); 3079 unsigned Lane2 = Ins2.getConstantOperandVal(2); 3080 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1) 3081 return false; 3082 3083 // If the inserted values will be able to use T/B already, leave it to the 3084 // existing tablegen patterns. For example VCVTT/VCVTB. 3085 SDValue Val1 = Ins1.getOperand(1); 3086 SDValue Val2 = Ins2.getOperand(1); 3087 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND) 3088 return false; 3089 3090 // Check if the inserted values are both extracts. 3091 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3092 Val1.getOpcode() == ARMISD::VGETLANEu) && 3093 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3094 Val2.getOpcode() == ARMISD::VGETLANEu) && 3095 isa<ConstantSDNode>(Val1.getOperand(1)) && 3096 isa<ConstantSDNode>(Val2.getOperand(1)) && 3097 (Val1.getOperand(0).getValueType() == MVT::v8f16 || 3098 Val1.getOperand(0).getValueType() == MVT::v8i16) && 3099 (Val2.getOperand(0).getValueType() == MVT::v8f16 || 3100 Val2.getOperand(0).getValueType() == MVT::v8i16)) { 3101 unsigned ExtractLane1 = Val1.getConstantOperandVal(1); 3102 unsigned ExtractLane2 = Val2.getConstantOperandVal(1); 3103 3104 // If the two extracted lanes are from the same place and adjacent, this 3105 // simplifies into a f32 lane move. 3106 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 && 3107 ExtractLane1 == ExtractLane2 + 1) { 3108 SDValue NewExt = CurDAG->getTargetExtractSubreg( 3109 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0)); 3110 SDValue NewIns = CurDAG->getTargetInsertSubreg( 3111 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0), 3112 NewExt); 3113 ReplaceUses(Ins1, NewIns); 3114 return true; 3115 } 3116 3117 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for 3118 // extracting odd lanes. 3119 if (VT == MVT::v8i16) { 3120 SDValue Inp1 = CurDAG->getTargetExtractSubreg( 3121 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); 3122 SDValue Inp2 = CurDAG->getTargetExtractSubreg( 3123 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0)); 3124 if (ExtractLane1 % 2 != 0) 3125 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0); 3126 if (ExtractLane2 % 2 != 0) 3127 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0); 3128 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1); 3129 SDValue NewIns = 3130 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3131 Ins2.getOperand(0), SDValue(VINS, 0)); 3132 ReplaceUses(Ins1, NewIns); 3133 return true; 3134 } 3135 } 3136 3137 // The inserted values are not extracted - if they are f16 then insert them 3138 // directly using a VINS. 3139 if (VT == MVT::v8f16) { 3140 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); 3141 SDValue NewIns = 3142 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3143 Ins2.getOperand(0), SDValue(VINS, 0)); 3144 ReplaceUses(Ins1, NewIns); 3145 return true; 3146 } 3147 3148 return false; 3149 } 3150 3151 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3152 if (!Subtarget->hasV6T2Ops()) 3153 return false; 3154 3155 unsigned Opc = isSigned 3156 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3157 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3158 SDLoc dl(N); 3159 3160 // For unsigned extracts, check for a shift right and mask 3161 unsigned And_imm = 0; 3162 if (N->getOpcode() == ISD::AND) { 3163 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3164 3165 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3166 if (And_imm & (And_imm + 1)) 3167 return false; 3168 3169 unsigned Srl_imm = 0; 3170 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3171 Srl_imm)) { 3172 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3173 3174 // Mask off the unnecessary bits of the AND immediate; normally 3175 // DAGCombine will do this, but that might not happen if 3176 // targetShrinkDemandedConstant chooses a different immediate. 3177 And_imm &= -1U >> Srl_imm; 3178 3179 // Note: The width operand is encoded as width-1. 3180 unsigned Width = countTrailingOnes(And_imm) - 1; 3181 unsigned LSB = Srl_imm; 3182 3183 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3184 3185 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3186 // It's cheaper to use a right shift to extract the top bits. 3187 if (Subtarget->isThumb()) { 3188 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3189 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3190 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3191 getAL(CurDAG, dl), Reg0, Reg0 }; 3192 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3193 return true; 3194 } 3195 3196 // ARM models shift instructions as MOVsi with shifter operand. 3197 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3198 SDValue ShOpc = 3199 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3200 MVT::i32); 3201 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3202 getAL(CurDAG, dl), Reg0, Reg0 }; 3203 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3204 return true; 3205 } 3206 3207 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3208 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3209 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3210 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3211 getAL(CurDAG, dl), Reg0 }; 3212 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3213 return true; 3214 } 3215 } 3216 return false; 3217 } 3218 3219 // Otherwise, we're looking for a shift of a shift 3220 unsigned Shl_imm = 0; 3221 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3222 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3223 unsigned Srl_imm = 0; 3224 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3225 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3226 // Note: The width operand is encoded as width-1. 3227 unsigned Width = 32 - Srl_imm - 1; 3228 int LSB = Srl_imm - Shl_imm; 3229 if (LSB < 0) 3230 return false; 3231 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3232 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3233 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3234 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3235 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3236 getAL(CurDAG, dl), Reg0 }; 3237 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3238 return true; 3239 } 3240 } 3241 3242 // Or we are looking for a shift of an and, with a mask operand 3243 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3244 isShiftedMask_32(And_imm)) { 3245 unsigned Srl_imm = 0; 3246 unsigned LSB = countTrailingZeros(And_imm); 3247 // Shift must be the same as the ands lsb 3248 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3249 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3250 unsigned MSB = 31 - countLeadingZeros(And_imm); 3251 // Note: The width operand is encoded as width-1. 3252 unsigned Width = MSB - LSB; 3253 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3254 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3255 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3256 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3257 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3258 getAL(CurDAG, dl), Reg0 }; 3259 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3260 return true; 3261 } 3262 } 3263 3264 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3265 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3266 unsigned LSB = 0; 3267 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3268 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3269 return false; 3270 3271 if (LSB + Width > 32) 3272 return false; 3273 3274 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3275 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3276 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3277 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3278 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3279 getAL(CurDAG, dl), Reg0 }; 3280 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3281 return true; 3282 } 3283 3284 return false; 3285 } 3286 3287 /// Target-specific DAG combining for ISD::XOR. 3288 /// Target-independent combining lowers SELECT_CC nodes of the form 3289 /// select_cc setg[ge] X, 0, X, -X 3290 /// select_cc setgt X, -1, X, -X 3291 /// select_cc setl[te] X, 0, -X, X 3292 /// select_cc setlt X, 1, -X, X 3293 /// which represent Integer ABS into: 3294 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 3295 /// ARM instruction selection detects the latter and matches it to 3296 /// ARM::ABS or ARM::t2ABS machine node. 3297 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3298 SDValue XORSrc0 = N->getOperand(0); 3299 SDValue XORSrc1 = N->getOperand(1); 3300 EVT VT = N->getValueType(0); 3301 3302 if (Subtarget->isThumb1Only()) 3303 return false; 3304 3305 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 3306 return false; 3307 3308 SDValue ADDSrc0 = XORSrc0.getOperand(0); 3309 SDValue ADDSrc1 = XORSrc0.getOperand(1); 3310 SDValue SRASrc0 = XORSrc1.getOperand(0); 3311 SDValue SRASrc1 = XORSrc1.getOperand(1); 3312 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3313 EVT XType = SRASrc0.getValueType(); 3314 unsigned Size = XType.getSizeInBits() - 1; 3315 3316 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 3317 XType.isInteger() && SRAConstant != nullptr && 3318 Size == SRAConstant->getZExtValue()) { 3319 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3320 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 3321 return true; 3322 } 3323 3324 return false; 3325 } 3326 3327 /// We've got special pseudo-instructions for these 3328 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3329 unsigned Opcode; 3330 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3331 if (MemTy == MVT::i8) 3332 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8; 3333 else if (MemTy == MVT::i16) 3334 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; 3335 else if (MemTy == MVT::i32) 3336 Opcode = ARM::CMP_SWAP_32; 3337 else 3338 llvm_unreachable("Unknown AtomicCmpSwap type"); 3339 3340 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3341 N->getOperand(0)}; 3342 SDNode *CmpSwap = CurDAG->getMachineNode( 3343 Opcode, SDLoc(N), 3344 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3345 3346 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3347 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3348 3349 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3350 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3351 CurDAG->RemoveDeadNode(N); 3352 } 3353 3354 static Optional<std::pair<unsigned, unsigned>> 3355 getContiguousRangeOfSetBits(const APInt &A) { 3356 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 3357 unsigned LastOne = A.countTrailingZeros(); 3358 if (A.countPopulation() != (FirstOne - LastOne + 1)) 3359 return Optional<std::pair<unsigned,unsigned>>(); 3360 return std::make_pair(FirstOne, LastOne); 3361 } 3362 3363 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3364 assert(N->getOpcode() == ARMISD::CMPZ); 3365 SwitchEQNEToPLMI = false; 3366 3367 if (!Subtarget->isThumb()) 3368 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3369 // LSR don't exist as standalone instructions - they need the barrel shifter. 3370 return; 3371 3372 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3373 SDValue And = N->getOperand(0); 3374 if (!And->hasOneUse()) 3375 return; 3376 3377 SDValue Zero = N->getOperand(1); 3378 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 3379 And->getOpcode() != ISD::AND) 3380 return; 3381 SDValue X = And.getOperand(0); 3382 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3383 3384 if (!C) 3385 return; 3386 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3387 if (!Range) 3388 return; 3389 3390 // There are several ways to lower this: 3391 SDNode *NewN; 3392 SDLoc dl(N); 3393 3394 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3395 if (Subtarget->isThumb2()) { 3396 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3397 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3398 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3399 CurDAG->getRegister(0, MVT::i32) }; 3400 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3401 } else { 3402 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3403 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3404 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3405 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3406 } 3407 }; 3408 3409 if (Range->second == 0) { 3410 // 1. Mask includes the LSB -> Simply shift the top N bits off 3411 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3412 ReplaceNode(And.getNode(), NewN); 3413 } else if (Range->first == 31) { 3414 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3415 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3416 ReplaceNode(And.getNode(), NewN); 3417 } else if (Range->first == Range->second) { 3418 // 3. Only one bit is set. We can shift this into the sign bit and use a 3419 // PL/MI comparison. 3420 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3421 ReplaceNode(And.getNode(), NewN); 3422 3423 SwitchEQNEToPLMI = true; 3424 } else if (!Subtarget->hasV6T2Ops()) { 3425 // 4. Do a double shift to clear bottom and top bits, but only in 3426 // thumb-1 mode as in thumb-2 we can use UBFX. 3427 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3428 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3429 Range->second + (31 - Range->first)); 3430 ReplaceNode(And.getNode(), NewN); 3431 } 3432 3433 } 3434 3435 void ARMDAGToDAGISel::Select(SDNode *N) { 3436 SDLoc dl(N); 3437 3438 if (N->isMachineOpcode()) { 3439 N->setNodeId(-1); 3440 return; // Already selected. 3441 } 3442 3443 switch (N->getOpcode()) { 3444 default: break; 3445 case ISD::STORE: { 3446 // For Thumb1, match an sp-relative store in C++. This is a little 3447 // unfortunate, but I don't think I can make the chain check work 3448 // otherwise. (The chain of the store has to be the same as the chain 3449 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3450 // a direct reference to "SP".) 3451 // 3452 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3453 // a different addressing mode from other four-byte stores. 3454 // 3455 // This pattern usually comes up with call arguments. 3456 StoreSDNode *ST = cast<StoreSDNode>(N); 3457 SDValue Ptr = ST->getBasePtr(); 3458 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3459 int RHSC = 0; 3460 if (Ptr.getOpcode() == ISD::ADD && 3461 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3462 Ptr = Ptr.getOperand(0); 3463 3464 if (Ptr.getOpcode() == ISD::CopyFromReg && 3465 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3466 Ptr.getOperand(0) == ST->getChain()) { 3467 SDValue Ops[] = {ST->getValue(), 3468 CurDAG->getRegister(ARM::SP, MVT::i32), 3469 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3470 getAL(CurDAG, dl), 3471 CurDAG->getRegister(0, MVT::i32), 3472 ST->getChain()}; 3473 MachineSDNode *ResNode = 3474 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3475 MachineMemOperand *MemOp = ST->getMemOperand(); 3476 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3477 ReplaceNode(N, ResNode); 3478 return; 3479 } 3480 } 3481 break; 3482 } 3483 case ISD::WRITE_REGISTER: 3484 if (tryWriteRegister(N)) 3485 return; 3486 break; 3487 case ISD::READ_REGISTER: 3488 if (tryReadRegister(N)) 3489 return; 3490 break; 3491 case ISD::INLINEASM: 3492 case ISD::INLINEASM_BR: 3493 if (tryInlineAsm(N)) 3494 return; 3495 break; 3496 case ISD::XOR: 3497 // Select special operations if XOR node forms integer ABS pattern 3498 if (tryABSOp(N)) 3499 return; 3500 // Other cases are autogenerated. 3501 break; 3502 case ISD::Constant: { 3503 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 3504 // If we can't materialize the constant we need to use a literal pool 3505 if (ConstantMaterializationCost(Val, Subtarget) > 2) { 3506 SDValue CPIdx = CurDAG->getTargetConstantPool( 3507 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3508 TLI->getPointerTy(CurDAG->getDataLayout())); 3509 3510 SDNode *ResNode; 3511 if (Subtarget->isThumb()) { 3512 SDValue Ops[] = { 3513 CPIdx, 3514 getAL(CurDAG, dl), 3515 CurDAG->getRegister(0, MVT::i32), 3516 CurDAG->getEntryNode() 3517 }; 3518 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3519 Ops); 3520 } else { 3521 SDValue Ops[] = { 3522 CPIdx, 3523 CurDAG->getTargetConstant(0, dl, MVT::i32), 3524 getAL(CurDAG, dl), 3525 CurDAG->getRegister(0, MVT::i32), 3526 CurDAG->getEntryNode() 3527 }; 3528 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3529 Ops); 3530 } 3531 // Annotate the Node with memory operand information so that MachineInstr 3532 // queries work properly. This e.g. gives the register allocation the 3533 // required information for rematerialization. 3534 MachineFunction& MF = CurDAG->getMachineFunction(); 3535 MachineMemOperand *MemOp = 3536 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3537 MachineMemOperand::MOLoad, 4, Align(4)); 3538 3539 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3540 3541 ReplaceNode(N, ResNode); 3542 return; 3543 } 3544 3545 // Other cases are autogenerated. 3546 break; 3547 } 3548 case ISD::FrameIndex: { 3549 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3550 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3551 SDValue TFI = CurDAG->getTargetFrameIndex( 3552 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3553 if (Subtarget->isThumb1Only()) { 3554 // Set the alignment of the frame object to 4, to avoid having to generate 3555 // more than one ADD 3556 MachineFrameInfo &MFI = MF->getFrameInfo(); 3557 if (MFI.getObjectAlign(FI) < Align(4)) 3558 MFI.setObjectAlignment(FI, Align(4)); 3559 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3560 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3561 return; 3562 } else { 3563 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3564 ARM::t2ADDri : ARM::ADDri); 3565 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3566 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3567 CurDAG->getRegister(0, MVT::i32) }; 3568 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3569 return; 3570 } 3571 } 3572 case ISD::INSERT_VECTOR_ELT: { 3573 if (tryInsertVectorElt(N)) 3574 return; 3575 break; 3576 } 3577 case ISD::SRL: 3578 if (tryV6T2BitfieldExtractOp(N, false)) 3579 return; 3580 break; 3581 case ISD::SIGN_EXTEND_INREG: 3582 case ISD::SRA: 3583 if (tryV6T2BitfieldExtractOp(N, true)) 3584 return; 3585 break; 3586 case ISD::MUL: 3587 if (Subtarget->isThumb1Only()) 3588 break; 3589 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3590 unsigned RHSV = C->getZExtValue(); 3591 if (!RHSV) break; 3592 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3593 unsigned ShImm = Log2_32(RHSV-1); 3594 if (ShImm >= 32) 3595 break; 3596 SDValue V = N->getOperand(0); 3597 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3598 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3599 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3600 if (Subtarget->isThumb()) { 3601 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3602 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3603 return; 3604 } else { 3605 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3606 Reg0 }; 3607 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3608 return; 3609 } 3610 } 3611 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3612 unsigned ShImm = Log2_32(RHSV+1); 3613 if (ShImm >= 32) 3614 break; 3615 SDValue V = N->getOperand(0); 3616 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3617 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3618 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3619 if (Subtarget->isThumb()) { 3620 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3621 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3622 return; 3623 } else { 3624 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3625 Reg0 }; 3626 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3627 return; 3628 } 3629 } 3630 } 3631 break; 3632 case ISD::AND: { 3633 // Check for unsigned bitfield extract 3634 if (tryV6T2BitfieldExtractOp(N, false)) 3635 return; 3636 3637 // If an immediate is used in an AND node, it is possible that the immediate 3638 // can be more optimally materialized when negated. If this is the case we 3639 // can negate the immediate and use a BIC instead. 3640 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3641 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3642 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3643 3644 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3645 // immediate can be negated and fit in the immediate operand of 3646 // a t2BIC, don't do any manual transform here as this can be 3647 // handled by the generic ISel machinery. 3648 bool PreferImmediateEncoding = 3649 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3650 if (!PreferImmediateEncoding && 3651 ConstantMaterializationCost(Imm, Subtarget) > 3652 ConstantMaterializationCost(~Imm, Subtarget)) { 3653 // The current immediate costs more to materialize than a negated 3654 // immediate, so negate the immediate and use a BIC. 3655 SDValue NewImm = 3656 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 3657 // If the new constant didn't exist before, reposition it in the topological 3658 // ordering so it is just before N. Otherwise, don't touch its location. 3659 if (NewImm->getNodeId() == -1) 3660 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3661 3662 if (!Subtarget->hasThumb2()) { 3663 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3664 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3665 CurDAG->getRegister(0, MVT::i32)}; 3666 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3667 return; 3668 } else { 3669 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3670 CurDAG->getRegister(0, MVT::i32), 3671 CurDAG->getRegister(0, MVT::i32)}; 3672 ReplaceNode(N, 3673 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3674 return; 3675 } 3676 } 3677 } 3678 3679 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3680 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3681 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3682 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3683 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3684 EVT VT = N->getValueType(0); 3685 if (VT != MVT::i32) 3686 break; 3687 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3688 ? ARM::t2MOVTi16 3689 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3690 if (!Opc) 3691 break; 3692 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3693 N1C = dyn_cast<ConstantSDNode>(N1); 3694 if (!N1C) 3695 break; 3696 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3697 SDValue N2 = N0.getOperand(1); 3698 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3699 if (!N2C) 3700 break; 3701 unsigned N1CVal = N1C->getZExtValue(); 3702 unsigned N2CVal = N2C->getZExtValue(); 3703 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3704 (N1CVal & 0xffffU) == 0xffffU && 3705 (N2CVal & 0xffffU) == 0x0U) { 3706 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3707 dl, MVT::i32); 3708 SDValue Ops[] = { N0.getOperand(0), Imm16, 3709 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3710 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3711 return; 3712 } 3713 } 3714 3715 break; 3716 } 3717 case ARMISD::UMAAL: { 3718 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3719 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3720 N->getOperand(2), N->getOperand(3), 3721 getAL(CurDAG, dl), 3722 CurDAG->getRegister(0, MVT::i32) }; 3723 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3724 return; 3725 } 3726 case ARMISD::UMLAL:{ 3727 if (Subtarget->isThumb()) { 3728 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3729 N->getOperand(3), getAL(CurDAG, dl), 3730 CurDAG->getRegister(0, MVT::i32)}; 3731 ReplaceNode( 3732 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3733 return; 3734 }else{ 3735 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3736 N->getOperand(3), getAL(CurDAG, dl), 3737 CurDAG->getRegister(0, MVT::i32), 3738 CurDAG->getRegister(0, MVT::i32) }; 3739 ReplaceNode(N, CurDAG->getMachineNode( 3740 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3741 MVT::i32, MVT::i32, Ops)); 3742 return; 3743 } 3744 } 3745 case ARMISD::SMLAL:{ 3746 if (Subtarget->isThumb()) { 3747 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3748 N->getOperand(3), getAL(CurDAG, dl), 3749 CurDAG->getRegister(0, MVT::i32)}; 3750 ReplaceNode( 3751 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3752 return; 3753 }else{ 3754 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3755 N->getOperand(3), getAL(CurDAG, dl), 3756 CurDAG->getRegister(0, MVT::i32), 3757 CurDAG->getRegister(0, MVT::i32) }; 3758 ReplaceNode(N, CurDAG->getMachineNode( 3759 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3760 MVT::i32, MVT::i32, Ops)); 3761 return; 3762 } 3763 } 3764 case ARMISD::SUBE: { 3765 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3766 break; 3767 // Look for a pattern to match SMMLS 3768 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3769 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3770 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3771 !SDValue(N, 1).use_empty()) 3772 break; 3773 3774 if (Subtarget->isThumb()) 3775 assert(Subtarget->hasThumb2() && 3776 "This pattern should not be generated for Thumb"); 3777 3778 SDValue SmulLoHi = N->getOperand(1); 3779 SDValue Subc = N->getOperand(2); 3780 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 3781 3782 if (!Zero || Zero->getZExtValue() != 0 || 3783 Subc.getOperand(1) != SmulLoHi.getValue(0) || 3784 N->getOperand(1) != SmulLoHi.getValue(1) || 3785 N->getOperand(2) != Subc.getValue(1)) 3786 break; 3787 3788 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3789 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3790 N->getOperand(0), getAL(CurDAG, dl), 3791 CurDAG->getRegister(0, MVT::i32) }; 3792 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3793 return; 3794 } 3795 case ISD::LOAD: { 3796 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3797 return; 3798 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3799 if (tryT2IndexedLoad(N)) 3800 return; 3801 } else if (Subtarget->isThumb()) { 3802 if (tryT1IndexedLoad(N)) 3803 return; 3804 } else if (tryARMIndexedLoad(N)) 3805 return; 3806 // Other cases are autogenerated. 3807 break; 3808 } 3809 case ISD::MLOAD: 3810 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3811 return; 3812 // Other cases are autogenerated. 3813 break; 3814 case ARMISD::WLSSETUP: { 3815 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, 3816 N->getOperand(0)); 3817 ReplaceUses(N, New); 3818 CurDAG->RemoveDeadNode(N); 3819 return; 3820 } 3821 case ARMISD::WLS: { 3822 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, 3823 N->getOperand(1), N->getOperand(2), 3824 N->getOperand(0)); 3825 ReplaceUses(N, New); 3826 CurDAG->RemoveDeadNode(N); 3827 return; 3828 } 3829 case ARMISD::LE: { 3830 SDValue Ops[] = { N->getOperand(1), 3831 N->getOperand(2), 3832 N->getOperand(0) }; 3833 unsigned Opc = ARM::t2LoopEnd; 3834 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 3835 ReplaceUses(N, New); 3836 CurDAG->RemoveDeadNode(N); 3837 return; 3838 } 3839 case ARMISD::LDRD: { 3840 if (Subtarget->isThumb2()) 3841 break; // TableGen handles isel in this case. 3842 SDValue Base, RegOffset, ImmOffset; 3843 const SDValue &Chain = N->getOperand(0); 3844 const SDValue &Addr = N->getOperand(1); 3845 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 3846 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 3847 // The register-offset variant of LDRD mandates that the register 3848 // allocated to RegOffset is not reused in any of the remaining operands. 3849 // This restriction is currently not enforced. Therefore emitting this 3850 // variant is explicitly avoided. 3851 Base = Addr; 3852 RegOffset = CurDAG->getRegister(0, MVT::i32); 3853 } 3854 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 3855 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 3856 {MVT::Untyped, MVT::Other}, Ops); 3857 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 3858 SDValue(New, 0)); 3859 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 3860 SDValue(New, 0)); 3861 transferMemOperands(N, New); 3862 ReplaceUses(SDValue(N, 0), Lo); 3863 ReplaceUses(SDValue(N, 1), Hi); 3864 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 3865 CurDAG->RemoveDeadNode(N); 3866 return; 3867 } 3868 case ARMISD::STRD: { 3869 if (Subtarget->isThumb2()) 3870 break; // TableGen handles isel in this case. 3871 SDValue Base, RegOffset, ImmOffset; 3872 const SDValue &Chain = N->getOperand(0); 3873 const SDValue &Addr = N->getOperand(3); 3874 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 3875 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 3876 // The register-offset variant of STRD mandates that the register 3877 // allocated to RegOffset is not reused in any of the remaining operands. 3878 // This restriction is currently not enforced. Therefore emitting this 3879 // variant is explicitly avoided. 3880 Base = Addr; 3881 RegOffset = CurDAG->getRegister(0, MVT::i32); 3882 } 3883 SDNode *RegPair = 3884 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 3885 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 3886 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 3887 transferMemOperands(N, New); 3888 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 3889 CurDAG->RemoveDeadNode(N); 3890 return; 3891 } 3892 case ARMISD::LOOP_DEC: { 3893 SDValue Ops[] = { N->getOperand(1), 3894 N->getOperand(2), 3895 N->getOperand(0) }; 3896 SDNode *Dec = 3897 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3898 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 3899 ReplaceUses(N, Dec); 3900 CurDAG->RemoveDeadNode(N); 3901 return; 3902 } 3903 case ARMISD::BRCOND: { 3904 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3905 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3906 // Pattern complexity = 6 cost = 1 size = 0 3907 3908 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3909 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 3910 // Pattern complexity = 6 cost = 1 size = 0 3911 3912 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3913 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3914 // Pattern complexity = 6 cost = 1 size = 0 3915 3916 unsigned Opc = Subtarget->isThumb() ? 3917 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 3918 SDValue Chain = N->getOperand(0); 3919 SDValue N1 = N->getOperand(1); 3920 SDValue N2 = N->getOperand(2); 3921 SDValue N3 = N->getOperand(3); 3922 SDValue InFlag = N->getOperand(4); 3923 assert(N1.getOpcode() == ISD::BasicBlock); 3924 assert(N2.getOpcode() == ISD::Constant); 3925 assert(N3.getOpcode() == ISD::Register); 3926 3927 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 3928 3929 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3930 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 3931 SDValue Int = InFlag.getOperand(0); 3932 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 3933 3934 // Handle low-overhead loops. 3935 if (ID == Intrinsic::loop_decrement_reg) { 3936 SDValue Elements = Int.getOperand(2); 3937 SDValue Size = CurDAG->getTargetConstant( 3938 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 3939 MVT::i32); 3940 3941 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 3942 SDNode *LoopDec = 3943 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3944 CurDAG->getVTList(MVT::i32, MVT::Other), 3945 Args); 3946 ReplaceUses(Int.getNode(), LoopDec); 3947 3948 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 3949 SDNode *LoopEnd = 3950 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 3951 3952 ReplaceUses(N, LoopEnd); 3953 CurDAG->RemoveDeadNode(N); 3954 CurDAG->RemoveDeadNode(InFlag.getNode()); 3955 CurDAG->RemoveDeadNode(Int.getNode()); 3956 return; 3957 } 3958 } 3959 3960 bool SwitchEQNEToPLMI; 3961 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3962 InFlag = N->getOperand(4); 3963 3964 if (SwitchEQNEToPLMI) { 3965 switch ((ARMCC::CondCodes)CC) { 3966 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3967 case ARMCC::NE: 3968 CC = (unsigned)ARMCC::MI; 3969 break; 3970 case ARMCC::EQ: 3971 CC = (unsigned)ARMCC::PL; 3972 break; 3973 } 3974 } 3975 } 3976 3977 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 3978 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 3979 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 3980 MVT::Glue, Ops); 3981 Chain = SDValue(ResNode, 0); 3982 if (N->getNumValues() == 2) { 3983 InFlag = SDValue(ResNode, 1); 3984 ReplaceUses(SDValue(N, 1), InFlag); 3985 } 3986 ReplaceUses(SDValue(N, 0), 3987 SDValue(Chain.getNode(), Chain.getResNo())); 3988 CurDAG->RemoveDeadNode(N); 3989 return; 3990 } 3991 3992 case ARMISD::CMPZ: { 3993 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 3994 // This allows us to avoid materializing the expensive negative constant. 3995 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 3996 // for its glue output. 3997 SDValue X = N->getOperand(0); 3998 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 3999 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 4000 int64_t Addend = -C->getSExtValue(); 4001 4002 SDNode *Add = nullptr; 4003 // ADDS can be better than CMN if the immediate fits in a 4004 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 4005 // Outside that range we can just use a CMN which is 32-bit but has a 4006 // 12-bit immediate range. 4007 if (Addend < 1<<8) { 4008 if (Subtarget->isThumb2()) { 4009 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4010 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 4011 CurDAG->getRegister(0, MVT::i32) }; 4012 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 4013 } else { 4014 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 4015 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 4016 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4017 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 4018 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 4019 } 4020 } 4021 if (Add) { 4022 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 4023 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 4024 } 4025 } 4026 // Other cases are autogenerated. 4027 break; 4028 } 4029 4030 case ARMISD::CMOV: { 4031 SDValue InFlag = N->getOperand(4); 4032 4033 if (InFlag.getOpcode() == ARMISD::CMPZ) { 4034 bool SwitchEQNEToPLMI; 4035 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 4036 4037 if (SwitchEQNEToPLMI) { 4038 SDValue ARMcc = N->getOperand(2); 4039 ARMCC::CondCodes CC = 4040 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 4041 4042 switch (CC) { 4043 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4044 case ARMCC::NE: 4045 CC = ARMCC::MI; 4046 break; 4047 case ARMCC::EQ: 4048 CC = ARMCC::PL; 4049 break; 4050 } 4051 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 4052 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 4053 N->getOperand(3), N->getOperand(4)}; 4054 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 4055 } 4056 4057 } 4058 // Other cases are autogenerated. 4059 break; 4060 } 4061 4062 case ARMISD::VZIP: { 4063 unsigned Opc = 0; 4064 EVT VT = N->getValueType(0); 4065 switch (VT.getSimpleVT().SimpleTy) { 4066 default: return; 4067 case MVT::v8i8: Opc = ARM::VZIPd8; break; 4068 case MVT::v4f16: 4069 case MVT::v4i16: Opc = ARM::VZIPd16; break; 4070 case MVT::v2f32: 4071 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4072 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4073 case MVT::v16i8: Opc = ARM::VZIPq8; break; 4074 case MVT::v8f16: 4075 case MVT::v8i16: Opc = ARM::VZIPq16; break; 4076 case MVT::v4f32: 4077 case MVT::v4i32: Opc = ARM::VZIPq32; break; 4078 } 4079 SDValue Pred = getAL(CurDAG, dl); 4080 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4081 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4082 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4083 return; 4084 } 4085 case ARMISD::VUZP: { 4086 unsigned Opc = 0; 4087 EVT VT = N->getValueType(0); 4088 switch (VT.getSimpleVT().SimpleTy) { 4089 default: return; 4090 case MVT::v8i8: Opc = ARM::VUZPd8; break; 4091 case MVT::v4f16: 4092 case MVT::v4i16: Opc = ARM::VUZPd16; break; 4093 case MVT::v2f32: 4094 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4095 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4096 case MVT::v16i8: Opc = ARM::VUZPq8; break; 4097 case MVT::v8f16: 4098 case MVT::v8i16: Opc = ARM::VUZPq16; break; 4099 case MVT::v4f32: 4100 case MVT::v4i32: Opc = ARM::VUZPq32; break; 4101 } 4102 SDValue Pred = getAL(CurDAG, dl); 4103 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4104 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4105 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4106 return; 4107 } 4108 case ARMISD::VTRN: { 4109 unsigned Opc = 0; 4110 EVT VT = N->getValueType(0); 4111 switch (VT.getSimpleVT().SimpleTy) { 4112 default: return; 4113 case MVT::v8i8: Opc = ARM::VTRNd8; break; 4114 case MVT::v4f16: 4115 case MVT::v4i16: Opc = ARM::VTRNd16; break; 4116 case MVT::v2f32: 4117 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4118 case MVT::v16i8: Opc = ARM::VTRNq8; break; 4119 case MVT::v8f16: 4120 case MVT::v8i16: Opc = ARM::VTRNq16; break; 4121 case MVT::v4f32: 4122 case MVT::v4i32: Opc = ARM::VTRNq32; break; 4123 } 4124 SDValue Pred = getAL(CurDAG, dl); 4125 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4126 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4127 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4128 return; 4129 } 4130 case ARMISD::BUILD_VECTOR: { 4131 EVT VecVT = N->getValueType(0); 4132 EVT EltVT = VecVT.getVectorElementType(); 4133 unsigned NumElts = VecVT.getVectorNumElements(); 4134 if (EltVT == MVT::f64) { 4135 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 4136 ReplaceNode( 4137 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4138 return; 4139 } 4140 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 4141 if (NumElts == 2) { 4142 ReplaceNode( 4143 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4144 return; 4145 } 4146 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 4147 ReplaceNode(N, 4148 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 4149 N->getOperand(2), N->getOperand(3))); 4150 return; 4151 } 4152 4153 case ARMISD::VLD1DUP: { 4154 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4155 ARM::VLD1DUPd32 }; 4156 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4157 ARM::VLD1DUPq32 }; 4158 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4159 return; 4160 } 4161 4162 case ARMISD::VLD2DUP: { 4163 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4164 ARM::VLD2DUPd32 }; 4165 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4166 return; 4167 } 4168 4169 case ARMISD::VLD3DUP: { 4170 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4171 ARM::VLD3DUPd16Pseudo, 4172 ARM::VLD3DUPd32Pseudo }; 4173 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4174 return; 4175 } 4176 4177 case ARMISD::VLD4DUP: { 4178 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4179 ARM::VLD4DUPd16Pseudo, 4180 ARM::VLD4DUPd32Pseudo }; 4181 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4182 return; 4183 } 4184 4185 case ARMISD::VLD1DUP_UPD: { 4186 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4187 ARM::VLD1DUPd16wb_fixed, 4188 ARM::VLD1DUPd32wb_fixed }; 4189 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4190 ARM::VLD1DUPq16wb_fixed, 4191 ARM::VLD1DUPq32wb_fixed }; 4192 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4193 return; 4194 } 4195 4196 case ARMISD::VLD2DUP_UPD: { 4197 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed, 4198 ARM::VLD2DUPd16wb_fixed, 4199 ARM::VLD2DUPd32wb_fixed, 4200 ARM::VLD1q64wb_fixed }; 4201 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4202 ARM::VLD2DUPq16EvenPseudo, 4203 ARM::VLD2DUPq32EvenPseudo }; 4204 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed, 4205 ARM::VLD2DUPq16OddPseudoWB_fixed, 4206 ARM::VLD2DUPq32OddPseudoWB_fixed }; 4207 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1); 4208 return; 4209 } 4210 4211 case ARMISD::VLD3DUP_UPD: { 4212 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4213 ARM::VLD3DUPd16Pseudo_UPD, 4214 ARM::VLD3DUPd32Pseudo_UPD, 4215 ARM::VLD1d64TPseudoWB_fixed }; 4216 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4217 ARM::VLD3DUPq16EvenPseudo, 4218 ARM::VLD3DUPq32EvenPseudo }; 4219 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD, 4220 ARM::VLD3DUPq16OddPseudo_UPD, 4221 ARM::VLD3DUPq32OddPseudo_UPD }; 4222 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4223 return; 4224 } 4225 4226 case ARMISD::VLD4DUP_UPD: { 4227 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4228 ARM::VLD4DUPd16Pseudo_UPD, 4229 ARM::VLD4DUPd32Pseudo_UPD, 4230 ARM::VLD1d64QPseudoWB_fixed }; 4231 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4232 ARM::VLD4DUPq16EvenPseudo, 4233 ARM::VLD4DUPq32EvenPseudo }; 4234 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD, 4235 ARM::VLD4DUPq16OddPseudo_UPD, 4236 ARM::VLD4DUPq32OddPseudo_UPD }; 4237 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4238 return; 4239 } 4240 4241 case ARMISD::VLD1_UPD: { 4242 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4243 ARM::VLD1d16wb_fixed, 4244 ARM::VLD1d32wb_fixed, 4245 ARM::VLD1d64wb_fixed }; 4246 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4247 ARM::VLD1q16wb_fixed, 4248 ARM::VLD1q32wb_fixed, 4249 ARM::VLD1q64wb_fixed }; 4250 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4251 return; 4252 } 4253 4254 case ARMISD::VLD2_UPD: { 4255 if (Subtarget->hasNEON()) { 4256 static const uint16_t DOpcodes[] = { 4257 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4258 ARM::VLD1q64wb_fixed}; 4259 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4260 ARM::VLD2q16PseudoWB_fixed, 4261 ARM::VLD2q32PseudoWB_fixed}; 4262 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4263 } else { 4264 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4265 ARM::MVE_VLD21_8_wb}; 4266 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4267 ARM::MVE_VLD21_16_wb}; 4268 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4269 ARM::MVE_VLD21_32_wb}; 4270 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4271 SelectMVE_VLD(N, 2, Opcodes, true); 4272 } 4273 return; 4274 } 4275 4276 case ARMISD::VLD3_UPD: { 4277 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4278 ARM::VLD3d16Pseudo_UPD, 4279 ARM::VLD3d32Pseudo_UPD, 4280 ARM::VLD1d64TPseudoWB_fixed}; 4281 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4282 ARM::VLD3q16Pseudo_UPD, 4283 ARM::VLD3q32Pseudo_UPD }; 4284 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4285 ARM::VLD3q16oddPseudo_UPD, 4286 ARM::VLD3q32oddPseudo_UPD }; 4287 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4288 return; 4289 } 4290 4291 case ARMISD::VLD4_UPD: { 4292 if (Subtarget->hasNEON()) { 4293 static const uint16_t DOpcodes[] = { 4294 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4295 ARM::VLD1d64QPseudoWB_fixed}; 4296 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4297 ARM::VLD4q16Pseudo_UPD, 4298 ARM::VLD4q32Pseudo_UPD}; 4299 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4300 ARM::VLD4q16oddPseudo_UPD, 4301 ARM::VLD4q32oddPseudo_UPD}; 4302 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4303 } else { 4304 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4305 ARM::MVE_VLD42_8, 4306 ARM::MVE_VLD43_8_wb}; 4307 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4308 ARM::MVE_VLD42_16, 4309 ARM::MVE_VLD43_16_wb}; 4310 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4311 ARM::MVE_VLD42_32, 4312 ARM::MVE_VLD43_32_wb}; 4313 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4314 SelectMVE_VLD(N, 4, Opcodes, true); 4315 } 4316 return; 4317 } 4318 4319 case ARMISD::VLD1x2_UPD: { 4320 if (Subtarget->hasNEON()) { 4321 static const uint16_t DOpcodes[] = { 4322 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, 4323 ARM::VLD1q64wb_fixed}; 4324 static const uint16_t QOpcodes[] = { 4325 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4326 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4327 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4328 return; 4329 } 4330 break; 4331 } 4332 4333 case ARMISD::VLD1x3_UPD: { 4334 if (Subtarget->hasNEON()) { 4335 static const uint16_t DOpcodes[] = { 4336 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed, 4337 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed}; 4338 static const uint16_t QOpcodes0[] = { 4339 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD, 4340 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD}; 4341 static const uint16_t QOpcodes1[] = { 4342 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD, 4343 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD}; 4344 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4345 return; 4346 } 4347 break; 4348 } 4349 4350 case ARMISD::VLD1x4_UPD: { 4351 if (Subtarget->hasNEON()) { 4352 static const uint16_t DOpcodes[] = { 4353 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4354 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4355 static const uint16_t QOpcodes0[] = { 4356 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD, 4357 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD}; 4358 static const uint16_t QOpcodes1[] = { 4359 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD, 4360 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD}; 4361 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4362 return; 4363 } 4364 break; 4365 } 4366 4367 case ARMISD::VLD2LN_UPD: { 4368 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4369 ARM::VLD2LNd16Pseudo_UPD, 4370 ARM::VLD2LNd32Pseudo_UPD }; 4371 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4372 ARM::VLD2LNq32Pseudo_UPD }; 4373 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4374 return; 4375 } 4376 4377 case ARMISD::VLD3LN_UPD: { 4378 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4379 ARM::VLD3LNd16Pseudo_UPD, 4380 ARM::VLD3LNd32Pseudo_UPD }; 4381 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4382 ARM::VLD3LNq32Pseudo_UPD }; 4383 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4384 return; 4385 } 4386 4387 case ARMISD::VLD4LN_UPD: { 4388 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4389 ARM::VLD4LNd16Pseudo_UPD, 4390 ARM::VLD4LNd32Pseudo_UPD }; 4391 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4392 ARM::VLD4LNq32Pseudo_UPD }; 4393 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4394 return; 4395 } 4396 4397 case ARMISD::VST1_UPD: { 4398 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4399 ARM::VST1d16wb_fixed, 4400 ARM::VST1d32wb_fixed, 4401 ARM::VST1d64wb_fixed }; 4402 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4403 ARM::VST1q16wb_fixed, 4404 ARM::VST1q32wb_fixed, 4405 ARM::VST1q64wb_fixed }; 4406 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4407 return; 4408 } 4409 4410 case ARMISD::VST2_UPD: { 4411 if (Subtarget->hasNEON()) { 4412 static const uint16_t DOpcodes[] = { 4413 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4414 ARM::VST1q64wb_fixed}; 4415 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4416 ARM::VST2q16PseudoWB_fixed, 4417 ARM::VST2q32PseudoWB_fixed}; 4418 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4419 return; 4420 } 4421 break; 4422 } 4423 4424 case ARMISD::VST3_UPD: { 4425 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4426 ARM::VST3d16Pseudo_UPD, 4427 ARM::VST3d32Pseudo_UPD, 4428 ARM::VST1d64TPseudoWB_fixed}; 4429 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4430 ARM::VST3q16Pseudo_UPD, 4431 ARM::VST3q32Pseudo_UPD }; 4432 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4433 ARM::VST3q16oddPseudo_UPD, 4434 ARM::VST3q32oddPseudo_UPD }; 4435 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4436 return; 4437 } 4438 4439 case ARMISD::VST4_UPD: { 4440 if (Subtarget->hasNEON()) { 4441 static const uint16_t DOpcodes[] = { 4442 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4443 ARM::VST1d64QPseudoWB_fixed}; 4444 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4445 ARM::VST4q16Pseudo_UPD, 4446 ARM::VST4q32Pseudo_UPD}; 4447 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4448 ARM::VST4q16oddPseudo_UPD, 4449 ARM::VST4q32oddPseudo_UPD}; 4450 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4451 return; 4452 } 4453 break; 4454 } 4455 4456 case ARMISD::VST1x2_UPD: { 4457 if (Subtarget->hasNEON()) { 4458 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed, 4459 ARM::VST1q16wb_fixed, 4460 ARM::VST1q32wb_fixed, 4461 ARM::VST1q64wb_fixed}; 4462 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4463 ARM::VST1d16QPseudoWB_fixed, 4464 ARM::VST1d32QPseudoWB_fixed, 4465 ARM::VST1d64QPseudoWB_fixed }; 4466 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4467 return; 4468 } 4469 break; 4470 } 4471 4472 case ARMISD::VST1x3_UPD: { 4473 if (Subtarget->hasNEON()) { 4474 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed, 4475 ARM::VST1d16TPseudoWB_fixed, 4476 ARM::VST1d32TPseudoWB_fixed, 4477 ARM::VST1d64TPseudoWB_fixed }; 4478 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4479 ARM::VST1q16LowTPseudo_UPD, 4480 ARM::VST1q32LowTPseudo_UPD, 4481 ARM::VST1q64LowTPseudo_UPD }; 4482 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD, 4483 ARM::VST1q16HighTPseudo_UPD, 4484 ARM::VST1q32HighTPseudo_UPD, 4485 ARM::VST1q64HighTPseudo_UPD }; 4486 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4487 return; 4488 } 4489 break; 4490 } 4491 4492 case ARMISD::VST1x4_UPD: { 4493 if (Subtarget->hasNEON()) { 4494 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4495 ARM::VST1d16QPseudoWB_fixed, 4496 ARM::VST1d32QPseudoWB_fixed, 4497 ARM::VST1d64QPseudoWB_fixed }; 4498 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4499 ARM::VST1q16LowQPseudo_UPD, 4500 ARM::VST1q32LowQPseudo_UPD, 4501 ARM::VST1q64LowQPseudo_UPD }; 4502 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD, 4503 ARM::VST1q16HighQPseudo_UPD, 4504 ARM::VST1q32HighQPseudo_UPD, 4505 ARM::VST1q64HighQPseudo_UPD }; 4506 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4507 return; 4508 } 4509 break; 4510 } 4511 case ARMISD::VST2LN_UPD: { 4512 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4513 ARM::VST2LNd16Pseudo_UPD, 4514 ARM::VST2LNd32Pseudo_UPD }; 4515 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4516 ARM::VST2LNq32Pseudo_UPD }; 4517 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4518 return; 4519 } 4520 4521 case ARMISD::VST3LN_UPD: { 4522 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4523 ARM::VST3LNd16Pseudo_UPD, 4524 ARM::VST3LNd32Pseudo_UPD }; 4525 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4526 ARM::VST3LNq32Pseudo_UPD }; 4527 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4528 return; 4529 } 4530 4531 case ARMISD::VST4LN_UPD: { 4532 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4533 ARM::VST4LNd16Pseudo_UPD, 4534 ARM::VST4LNd32Pseudo_UPD }; 4535 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4536 ARM::VST4LNq32Pseudo_UPD }; 4537 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4538 return; 4539 } 4540 4541 case ISD::INTRINSIC_VOID: 4542 case ISD::INTRINSIC_W_CHAIN: { 4543 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 4544 switch (IntNo) { 4545 default: 4546 break; 4547 4548 case Intrinsic::arm_mrrc: 4549 case Intrinsic::arm_mrrc2: { 4550 SDLoc dl(N); 4551 SDValue Chain = N->getOperand(0); 4552 unsigned Opc; 4553 4554 if (Subtarget->isThumb()) 4555 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4556 else 4557 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4558 4559 SmallVector<SDValue, 5> Ops; 4560 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 4561 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 4562 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 4563 4564 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4565 // instruction will always be '1111' but it is possible in assembly language to specify 4566 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4567 if (Opc != ARM::MRRC2) { 4568 Ops.push_back(getAL(CurDAG, dl)); 4569 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4570 } 4571 4572 Ops.push_back(Chain); 4573 4574 // Writes to two registers. 4575 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4576 4577 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4578 return; 4579 } 4580 case Intrinsic::arm_ldaexd: 4581 case Intrinsic::arm_ldrexd: { 4582 SDLoc dl(N); 4583 SDValue Chain = N->getOperand(0); 4584 SDValue MemAddr = N->getOperand(2); 4585 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4586 4587 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4588 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4589 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4590 4591 // arm_ldrexd returns a i64 value in {i32, i32} 4592 std::vector<EVT> ResTys; 4593 if (isThumb) { 4594 ResTys.push_back(MVT::i32); 4595 ResTys.push_back(MVT::i32); 4596 } else 4597 ResTys.push_back(MVT::Untyped); 4598 ResTys.push_back(MVT::Other); 4599 4600 // Place arguments in the right order. 4601 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4602 CurDAG->getRegister(0, MVT::i32), Chain}; 4603 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4604 // Transfer memoperands. 4605 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4606 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4607 4608 // Remap uses. 4609 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4610 if (!SDValue(N, 0).use_empty()) { 4611 SDValue Result; 4612 if (isThumb) 4613 Result = SDValue(Ld, 0); 4614 else { 4615 SDValue SubRegIdx = 4616 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4617 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4618 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4619 Result = SDValue(ResNode,0); 4620 } 4621 ReplaceUses(SDValue(N, 0), Result); 4622 } 4623 if (!SDValue(N, 1).use_empty()) { 4624 SDValue Result; 4625 if (isThumb) 4626 Result = SDValue(Ld, 1); 4627 else { 4628 SDValue SubRegIdx = 4629 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4630 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4631 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4632 Result = SDValue(ResNode,0); 4633 } 4634 ReplaceUses(SDValue(N, 1), Result); 4635 } 4636 ReplaceUses(SDValue(N, 2), OutChain); 4637 CurDAG->RemoveDeadNode(N); 4638 return; 4639 } 4640 case Intrinsic::arm_stlexd: 4641 case Intrinsic::arm_strexd: { 4642 SDLoc dl(N); 4643 SDValue Chain = N->getOperand(0); 4644 SDValue Val0 = N->getOperand(2); 4645 SDValue Val1 = N->getOperand(3); 4646 SDValue MemAddr = N->getOperand(4); 4647 4648 // Store exclusive double return a i32 value which is the return status 4649 // of the issued store. 4650 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4651 4652 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4653 // Place arguments in the right order. 4654 SmallVector<SDValue, 7> Ops; 4655 if (isThumb) { 4656 Ops.push_back(Val0); 4657 Ops.push_back(Val1); 4658 } else 4659 // arm_strexd uses GPRPair. 4660 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4661 Ops.push_back(MemAddr); 4662 Ops.push_back(getAL(CurDAG, dl)); 4663 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4664 Ops.push_back(Chain); 4665 4666 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4667 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4668 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4669 4670 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4671 // Transfer memoperands. 4672 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4673 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4674 4675 ReplaceNode(N, St); 4676 return; 4677 } 4678 4679 case Intrinsic::arm_neon_vld1: { 4680 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4681 ARM::VLD1d32, ARM::VLD1d64 }; 4682 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4683 ARM::VLD1q32, ARM::VLD1q64}; 4684 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4685 return; 4686 } 4687 4688 case Intrinsic::arm_neon_vld1x2: { 4689 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4690 ARM::VLD1q32, ARM::VLD1q64 }; 4691 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4692 ARM::VLD1d16QPseudo, 4693 ARM::VLD1d32QPseudo, 4694 ARM::VLD1d64QPseudo }; 4695 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4696 return; 4697 } 4698 4699 case Intrinsic::arm_neon_vld1x3: { 4700 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4701 ARM::VLD1d16TPseudo, 4702 ARM::VLD1d32TPseudo, 4703 ARM::VLD1d64TPseudo }; 4704 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4705 ARM::VLD1q16LowTPseudo_UPD, 4706 ARM::VLD1q32LowTPseudo_UPD, 4707 ARM::VLD1q64LowTPseudo_UPD }; 4708 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4709 ARM::VLD1q16HighTPseudo, 4710 ARM::VLD1q32HighTPseudo, 4711 ARM::VLD1q64HighTPseudo }; 4712 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4713 return; 4714 } 4715 4716 case Intrinsic::arm_neon_vld1x4: { 4717 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4718 ARM::VLD1d16QPseudo, 4719 ARM::VLD1d32QPseudo, 4720 ARM::VLD1d64QPseudo }; 4721 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4722 ARM::VLD1q16LowQPseudo_UPD, 4723 ARM::VLD1q32LowQPseudo_UPD, 4724 ARM::VLD1q64LowQPseudo_UPD }; 4725 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4726 ARM::VLD1q16HighQPseudo, 4727 ARM::VLD1q32HighQPseudo, 4728 ARM::VLD1q64HighQPseudo }; 4729 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4730 return; 4731 } 4732 4733 case Intrinsic::arm_neon_vld2: { 4734 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4735 ARM::VLD2d32, ARM::VLD1q64 }; 4736 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4737 ARM::VLD2q32Pseudo }; 4738 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4739 return; 4740 } 4741 4742 case Intrinsic::arm_neon_vld3: { 4743 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4744 ARM::VLD3d16Pseudo, 4745 ARM::VLD3d32Pseudo, 4746 ARM::VLD1d64TPseudo }; 4747 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4748 ARM::VLD3q16Pseudo_UPD, 4749 ARM::VLD3q32Pseudo_UPD }; 4750 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4751 ARM::VLD3q16oddPseudo, 4752 ARM::VLD3q32oddPseudo }; 4753 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4754 return; 4755 } 4756 4757 case Intrinsic::arm_neon_vld4: { 4758 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4759 ARM::VLD4d16Pseudo, 4760 ARM::VLD4d32Pseudo, 4761 ARM::VLD1d64QPseudo }; 4762 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4763 ARM::VLD4q16Pseudo_UPD, 4764 ARM::VLD4q32Pseudo_UPD }; 4765 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4766 ARM::VLD4q16oddPseudo, 4767 ARM::VLD4q32oddPseudo }; 4768 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4769 return; 4770 } 4771 4772 case Intrinsic::arm_neon_vld2dup: { 4773 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4774 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4775 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4776 ARM::VLD2DUPq16EvenPseudo, 4777 ARM::VLD2DUPq32EvenPseudo }; 4778 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4779 ARM::VLD2DUPq16OddPseudo, 4780 ARM::VLD2DUPq32OddPseudo }; 4781 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4782 DOpcodes, QOpcodes0, QOpcodes1); 4783 return; 4784 } 4785 4786 case Intrinsic::arm_neon_vld3dup: { 4787 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4788 ARM::VLD3DUPd16Pseudo, 4789 ARM::VLD3DUPd32Pseudo, 4790 ARM::VLD1d64TPseudo }; 4791 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4792 ARM::VLD3DUPq16EvenPseudo, 4793 ARM::VLD3DUPq32EvenPseudo }; 4794 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4795 ARM::VLD3DUPq16OddPseudo, 4796 ARM::VLD3DUPq32OddPseudo }; 4797 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4798 DOpcodes, QOpcodes0, QOpcodes1); 4799 return; 4800 } 4801 4802 case Intrinsic::arm_neon_vld4dup: { 4803 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4804 ARM::VLD4DUPd16Pseudo, 4805 ARM::VLD4DUPd32Pseudo, 4806 ARM::VLD1d64QPseudo }; 4807 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4808 ARM::VLD4DUPq16EvenPseudo, 4809 ARM::VLD4DUPq32EvenPseudo }; 4810 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4811 ARM::VLD4DUPq16OddPseudo, 4812 ARM::VLD4DUPq32OddPseudo }; 4813 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4814 DOpcodes, QOpcodes0, QOpcodes1); 4815 return; 4816 } 4817 4818 case Intrinsic::arm_neon_vld2lane: { 4819 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4820 ARM::VLD2LNd16Pseudo, 4821 ARM::VLD2LNd32Pseudo }; 4822 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 4823 ARM::VLD2LNq32Pseudo }; 4824 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 4825 return; 4826 } 4827 4828 case Intrinsic::arm_neon_vld3lane: { 4829 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 4830 ARM::VLD3LNd16Pseudo, 4831 ARM::VLD3LNd32Pseudo }; 4832 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 4833 ARM::VLD3LNq32Pseudo }; 4834 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 4835 return; 4836 } 4837 4838 case Intrinsic::arm_neon_vld4lane: { 4839 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 4840 ARM::VLD4LNd16Pseudo, 4841 ARM::VLD4LNd32Pseudo }; 4842 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 4843 ARM::VLD4LNq32Pseudo }; 4844 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 4845 return; 4846 } 4847 4848 case Intrinsic::arm_neon_vst1: { 4849 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 4850 ARM::VST1d32, ARM::VST1d64 }; 4851 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4852 ARM::VST1q32, ARM::VST1q64 }; 4853 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 4854 return; 4855 } 4856 4857 case Intrinsic::arm_neon_vst1x2: { 4858 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4859 ARM::VST1q32, ARM::VST1q64 }; 4860 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 4861 ARM::VST1d16QPseudo, 4862 ARM::VST1d32QPseudo, 4863 ARM::VST1d64QPseudo }; 4864 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4865 return; 4866 } 4867 4868 case Intrinsic::arm_neon_vst1x3: { 4869 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 4870 ARM::VST1d16TPseudo, 4871 ARM::VST1d32TPseudo, 4872 ARM::VST1d64TPseudo }; 4873 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4874 ARM::VST1q16LowTPseudo_UPD, 4875 ARM::VST1q32LowTPseudo_UPD, 4876 ARM::VST1q64LowTPseudo_UPD }; 4877 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 4878 ARM::VST1q16HighTPseudo, 4879 ARM::VST1q32HighTPseudo, 4880 ARM::VST1q64HighTPseudo }; 4881 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4882 return; 4883 } 4884 4885 case Intrinsic::arm_neon_vst1x4: { 4886 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 4887 ARM::VST1d16QPseudo, 4888 ARM::VST1d32QPseudo, 4889 ARM::VST1d64QPseudo }; 4890 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4891 ARM::VST1q16LowQPseudo_UPD, 4892 ARM::VST1q32LowQPseudo_UPD, 4893 ARM::VST1q64LowQPseudo_UPD }; 4894 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 4895 ARM::VST1q16HighQPseudo, 4896 ARM::VST1q32HighQPseudo, 4897 ARM::VST1q64HighQPseudo }; 4898 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4899 return; 4900 } 4901 4902 case Intrinsic::arm_neon_vst2: { 4903 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 4904 ARM::VST2d32, ARM::VST1q64 }; 4905 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 4906 ARM::VST2q32Pseudo }; 4907 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4908 return; 4909 } 4910 4911 case Intrinsic::arm_neon_vst3: { 4912 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 4913 ARM::VST3d16Pseudo, 4914 ARM::VST3d32Pseudo, 4915 ARM::VST1d64TPseudo }; 4916 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4917 ARM::VST3q16Pseudo_UPD, 4918 ARM::VST3q32Pseudo_UPD }; 4919 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 4920 ARM::VST3q16oddPseudo, 4921 ARM::VST3q32oddPseudo }; 4922 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4923 return; 4924 } 4925 4926 case Intrinsic::arm_neon_vst4: { 4927 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 4928 ARM::VST4d16Pseudo, 4929 ARM::VST4d32Pseudo, 4930 ARM::VST1d64QPseudo }; 4931 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 4932 ARM::VST4q16Pseudo_UPD, 4933 ARM::VST4q32Pseudo_UPD }; 4934 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 4935 ARM::VST4q16oddPseudo, 4936 ARM::VST4q32oddPseudo }; 4937 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4938 return; 4939 } 4940 4941 case Intrinsic::arm_neon_vst2lane: { 4942 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 4943 ARM::VST2LNd16Pseudo, 4944 ARM::VST2LNd32Pseudo }; 4945 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 4946 ARM::VST2LNq32Pseudo }; 4947 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 4948 return; 4949 } 4950 4951 case Intrinsic::arm_neon_vst3lane: { 4952 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 4953 ARM::VST3LNd16Pseudo, 4954 ARM::VST3LNd32Pseudo }; 4955 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 4956 ARM::VST3LNq32Pseudo }; 4957 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 4958 return; 4959 } 4960 4961 case Intrinsic::arm_neon_vst4lane: { 4962 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 4963 ARM::VST4LNd16Pseudo, 4964 ARM::VST4LNd32Pseudo }; 4965 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 4966 ARM::VST4LNq32Pseudo }; 4967 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 4968 return; 4969 } 4970 4971 case Intrinsic::arm_mve_vldr_gather_base_wb: 4972 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 4973 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 4974 ARM::MVE_VLDRDU64_qi_pre}; 4975 SelectMVE_WB(N, Opcodes, 4976 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 4977 return; 4978 } 4979 4980 case Intrinsic::arm_mve_vld2q: { 4981 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 4982 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4983 ARM::MVE_VLD21_16}; 4984 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4985 ARM::MVE_VLD21_32}; 4986 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4987 SelectMVE_VLD(N, 2, Opcodes, false); 4988 return; 4989 } 4990 4991 case Intrinsic::arm_mve_vld4q: { 4992 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4993 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 4994 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4995 ARM::MVE_VLD42_16, 4996 ARM::MVE_VLD43_16}; 4997 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4998 ARM::MVE_VLD42_32, 4999 ARM::MVE_VLD43_32}; 5000 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5001 SelectMVE_VLD(N, 4, Opcodes, false); 5002 return; 5003 } 5004 } 5005 break; 5006 } 5007 5008 case ISD::INTRINSIC_WO_CHAIN: { 5009 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 5010 switch (IntNo) { 5011 default: 5012 break; 5013 5014 // Scalar f32 -> bf16 5015 case Intrinsic::arm_neon_vcvtbfp2bf: { 5016 SDLoc dl(N); 5017 const SDValue &Src = N->getOperand(1); 5018 llvm::EVT DestTy = N->getValueType(0); 5019 SDValue Pred = getAL(CurDAG, dl); 5020 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5021 SDValue Ops[] = { Src, Src, Pred, Reg0 }; 5022 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops); 5023 return; 5024 } 5025 5026 // Vector v4f32 -> v4bf16 5027 case Intrinsic::arm_neon_vcvtfp2bf: { 5028 SDLoc dl(N); 5029 const SDValue &Src = N->getOperand(1); 5030 SDValue Pred = getAL(CurDAG, dl); 5031 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5032 SDValue Ops[] = { Src, Pred, Reg0 }; 5033 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops); 5034 return; 5035 } 5036 5037 case Intrinsic::arm_mve_urshrl: 5038 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 5039 return; 5040 case Intrinsic::arm_mve_uqshll: 5041 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 5042 return; 5043 case Intrinsic::arm_mve_srshrl: 5044 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 5045 return; 5046 case Intrinsic::arm_mve_sqshll: 5047 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 5048 return; 5049 case Intrinsic::arm_mve_uqrshll: 5050 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 5051 return; 5052 case Intrinsic::arm_mve_sqrshrl: 5053 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 5054 return; 5055 5056 case Intrinsic::arm_mve_vadc: 5057 case Intrinsic::arm_mve_vadc_predicated: 5058 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 5059 IntNo == Intrinsic::arm_mve_vadc_predicated); 5060 return; 5061 case Intrinsic::arm_mve_vsbc: 5062 case Intrinsic::arm_mve_vsbc_predicated: 5063 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true, 5064 IntNo == Intrinsic::arm_mve_vsbc_predicated); 5065 return; 5066 case Intrinsic::arm_mve_vshlc: 5067 case Intrinsic::arm_mve_vshlc_predicated: 5068 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 5069 return; 5070 5071 case Intrinsic::arm_mve_vmlldava: 5072 case Intrinsic::arm_mve_vmlldava_predicated: { 5073 static const uint16_t OpcodesU[] = { 5074 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 5075 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 5076 }; 5077 static const uint16_t OpcodesS[] = { 5078 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 5079 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 5080 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 5081 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 5082 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 5083 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 5084 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 5085 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 5086 }; 5087 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 5088 OpcodesS, OpcodesU); 5089 return; 5090 } 5091 5092 case Intrinsic::arm_mve_vrmlldavha: 5093 case Intrinsic::arm_mve_vrmlldavha_predicated: { 5094 static const uint16_t OpcodesU[] = { 5095 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 5096 }; 5097 static const uint16_t OpcodesS[] = { 5098 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 5099 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 5100 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 5101 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 5102 }; 5103 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 5104 OpcodesS, OpcodesU); 5105 return; 5106 } 5107 5108 case Intrinsic::arm_mve_vidup: 5109 case Intrinsic::arm_mve_vidup_predicated: { 5110 static const uint16_t Opcodes[] = { 5111 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 5112 }; 5113 SelectMVE_VxDUP(N, Opcodes, false, 5114 IntNo == Intrinsic::arm_mve_vidup_predicated); 5115 return; 5116 } 5117 5118 case Intrinsic::arm_mve_vddup: 5119 case Intrinsic::arm_mve_vddup_predicated: { 5120 static const uint16_t Opcodes[] = { 5121 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 5122 }; 5123 SelectMVE_VxDUP(N, Opcodes, false, 5124 IntNo == Intrinsic::arm_mve_vddup_predicated); 5125 return; 5126 } 5127 5128 case Intrinsic::arm_mve_viwdup: 5129 case Intrinsic::arm_mve_viwdup_predicated: { 5130 static const uint16_t Opcodes[] = { 5131 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 5132 }; 5133 SelectMVE_VxDUP(N, Opcodes, true, 5134 IntNo == Intrinsic::arm_mve_viwdup_predicated); 5135 return; 5136 } 5137 5138 case Intrinsic::arm_mve_vdwdup: 5139 case Intrinsic::arm_mve_vdwdup_predicated: { 5140 static const uint16_t Opcodes[] = { 5141 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 5142 }; 5143 SelectMVE_VxDUP(N, Opcodes, true, 5144 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 5145 return; 5146 } 5147 5148 case Intrinsic::arm_cde_cx1d: 5149 case Intrinsic::arm_cde_cx1da: 5150 case Intrinsic::arm_cde_cx2d: 5151 case Intrinsic::arm_cde_cx2da: 5152 case Intrinsic::arm_cde_cx3d: 5153 case Intrinsic::arm_cde_cx3da: { 5154 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 5155 IntNo == Intrinsic::arm_cde_cx2da || 5156 IntNo == Intrinsic::arm_cde_cx3da; 5157 size_t NumExtraOps; 5158 uint16_t Opcode; 5159 switch (IntNo) { 5160 case Intrinsic::arm_cde_cx1d: 5161 case Intrinsic::arm_cde_cx1da: 5162 NumExtraOps = 0; 5163 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 5164 break; 5165 case Intrinsic::arm_cde_cx2d: 5166 case Intrinsic::arm_cde_cx2da: 5167 NumExtraOps = 1; 5168 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 5169 break; 5170 case Intrinsic::arm_cde_cx3d: 5171 case Intrinsic::arm_cde_cx3da: 5172 NumExtraOps = 2; 5173 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 5174 break; 5175 default: 5176 llvm_unreachable("Unexpected opcode"); 5177 } 5178 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 5179 return; 5180 } 5181 } 5182 break; 5183 } 5184 5185 case ISD::ATOMIC_CMP_SWAP: 5186 SelectCMP_SWAP(N); 5187 return; 5188 } 5189 5190 SelectCode(N); 5191 } 5192 5193 // Inspect a register string of the form 5194 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 5195 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 5196 // and obtain the integer operands from them, adding these operands to the 5197 // provided vector. 5198 static void getIntOperandsFromRegisterString(StringRef RegString, 5199 SelectionDAG *CurDAG, 5200 const SDLoc &DL, 5201 std::vector<SDValue> &Ops) { 5202 SmallVector<StringRef, 5> Fields; 5203 RegString.split(Fields, ':'); 5204 5205 if (Fields.size() > 1) { 5206 bool AllIntFields = true; 5207 5208 for (StringRef Field : Fields) { 5209 // Need to trim out leading 'cp' characters and get the integer field. 5210 unsigned IntField; 5211 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 5212 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 5213 } 5214 5215 assert(AllIntFields && 5216 "Unexpected non-integer value in special register string."); 5217 } 5218 } 5219 5220 // Maps a Banked Register string to its mask value. The mask value returned is 5221 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 5222 // mask operand, which expresses which register is to be used, e.g. r8, and in 5223 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 5224 // was invalid. 5225 static inline int getBankedRegisterMask(StringRef RegString) { 5226 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 5227 if (!TheReg) 5228 return -1; 5229 return TheReg->Encoding; 5230 } 5231 5232 // The flags here are common to those allowed for apsr in the A class cores and 5233 // those allowed for the special registers in the M class cores. Returns a 5234 // value representing which flags were present, -1 if invalid. 5235 static inline int getMClassFlagsMask(StringRef Flags) { 5236 return StringSwitch<int>(Flags) 5237 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 5238 // correct when flags are not permitted 5239 .Case("g", 0x1) 5240 .Case("nzcvq", 0x2) 5241 .Case("nzcvqg", 0x3) 5242 .Default(-1); 5243 } 5244 5245 // Maps MClass special registers string to its value for use in the 5246 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 5247 // Returns -1 to signify that the string was invalid. 5248 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 5249 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 5250 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 5251 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 5252 return -1; 5253 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 5254 } 5255 5256 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 5257 // The mask operand contains the special register (R Bit) in bit 4, whether 5258 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 5259 // bits 3-0 contains the fields to be accessed in the special register, set by 5260 // the flags provided with the register. 5261 int Mask = 0; 5262 if (Reg == "apsr") { 5263 // The flags permitted for apsr are the same flags that are allowed in 5264 // M class registers. We get the flag value and then shift the flags into 5265 // the correct place to combine with the mask. 5266 Mask = getMClassFlagsMask(Flags); 5267 if (Mask == -1) 5268 return -1; 5269 return Mask << 2; 5270 } 5271 5272 if (Reg != "cpsr" && Reg != "spsr") { 5273 return -1; 5274 } 5275 5276 // This is the same as if the flags were "fc" 5277 if (Flags.empty() || Flags == "all") 5278 return Mask | 0x9; 5279 5280 // Inspect the supplied flags string and set the bits in the mask for 5281 // the relevant and valid flags allowed for cpsr and spsr. 5282 for (char Flag : Flags) { 5283 int FlagVal; 5284 switch (Flag) { 5285 case 'c': 5286 FlagVal = 0x1; 5287 break; 5288 case 'x': 5289 FlagVal = 0x2; 5290 break; 5291 case 's': 5292 FlagVal = 0x4; 5293 break; 5294 case 'f': 5295 FlagVal = 0x8; 5296 break; 5297 default: 5298 FlagVal = 0; 5299 } 5300 5301 // This avoids allowing strings where the same flag bit appears twice. 5302 if (!FlagVal || (Mask & FlagVal)) 5303 return -1; 5304 Mask |= FlagVal; 5305 } 5306 5307 // If the register is spsr then we need to set the R bit. 5308 if (Reg == "spsr") 5309 Mask |= 0x10; 5310 5311 return Mask; 5312 } 5313 5314 // Lower the read_register intrinsic to ARM specific DAG nodes 5315 // using the supplied metadata string to select the instruction node to use 5316 // and the registers/masks to construct as operands for the node. 5317 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5318 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5319 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5320 bool IsThumb2 = Subtarget->isThumb2(); 5321 SDLoc DL(N); 5322 5323 std::vector<SDValue> Ops; 5324 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5325 5326 if (!Ops.empty()) { 5327 // If the special register string was constructed of fields (as defined 5328 // in the ACLE) then need to lower to MRC node (32 bit) or 5329 // MRRC node(64 bit), we can make the distinction based on the number of 5330 // operands we have. 5331 unsigned Opcode; 5332 SmallVector<EVT, 3> ResTypes; 5333 if (Ops.size() == 5){ 5334 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5335 ResTypes.append({ MVT::i32, MVT::Other }); 5336 } else { 5337 assert(Ops.size() == 3 && 5338 "Invalid number of fields in special register string."); 5339 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5340 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5341 } 5342 5343 Ops.push_back(getAL(CurDAG, DL)); 5344 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5345 Ops.push_back(N->getOperand(0)); 5346 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5347 return true; 5348 } 5349 5350 std::string SpecialReg = RegString->getString().lower(); 5351 5352 int BankedReg = getBankedRegisterMask(SpecialReg); 5353 if (BankedReg != -1) { 5354 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5355 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5356 N->getOperand(0) }; 5357 ReplaceNode( 5358 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5359 DL, MVT::i32, MVT::Other, Ops)); 5360 return true; 5361 } 5362 5363 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5364 // corresponding to the register that is being read from. So we switch on the 5365 // string to find which opcode we need to use. 5366 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5367 .Case("fpscr", ARM::VMRS) 5368 .Case("fpexc", ARM::VMRS_FPEXC) 5369 .Case("fpsid", ARM::VMRS_FPSID) 5370 .Case("mvfr0", ARM::VMRS_MVFR0) 5371 .Case("mvfr1", ARM::VMRS_MVFR1) 5372 .Case("mvfr2", ARM::VMRS_MVFR2) 5373 .Case("fpinst", ARM::VMRS_FPINST) 5374 .Case("fpinst2", ARM::VMRS_FPINST2) 5375 .Default(0); 5376 5377 // If an opcode was found then we can lower the read to a VFP instruction. 5378 if (Opcode) { 5379 if (!Subtarget->hasVFP2Base()) 5380 return false; 5381 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5382 return false; 5383 5384 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5385 N->getOperand(0) }; 5386 ReplaceNode(N, 5387 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5388 return true; 5389 } 5390 5391 // If the target is M Class then need to validate that the register string 5392 // is an acceptable value, so check that a mask can be constructed from the 5393 // string. 5394 if (Subtarget->isMClass()) { 5395 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5396 if (SYSmValue == -1) 5397 return false; 5398 5399 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5400 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5401 N->getOperand(0) }; 5402 ReplaceNode( 5403 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5404 return true; 5405 } 5406 5407 // Here we know the target is not M Class so we need to check if it is one 5408 // of the remaining possible values which are apsr, cpsr or spsr. 5409 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5410 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5411 N->getOperand(0) }; 5412 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5413 DL, MVT::i32, MVT::Other, Ops)); 5414 return true; 5415 } 5416 5417 if (SpecialReg == "spsr") { 5418 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5419 N->getOperand(0) }; 5420 ReplaceNode( 5421 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5422 MVT::i32, MVT::Other, Ops)); 5423 return true; 5424 } 5425 5426 return false; 5427 } 5428 5429 // Lower the write_register intrinsic to ARM specific DAG nodes 5430 // using the supplied metadata string to select the instruction node to use 5431 // and the registers/masks to use in the nodes 5432 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5433 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5434 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5435 bool IsThumb2 = Subtarget->isThumb2(); 5436 SDLoc DL(N); 5437 5438 std::vector<SDValue> Ops; 5439 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5440 5441 if (!Ops.empty()) { 5442 // If the special register string was constructed of fields (as defined 5443 // in the ACLE) then need to lower to MCR node (32 bit) or 5444 // MCRR node(64 bit), we can make the distinction based on the number of 5445 // operands we have. 5446 unsigned Opcode; 5447 if (Ops.size() == 5) { 5448 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5449 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5450 } else { 5451 assert(Ops.size() == 3 && 5452 "Invalid number of fields in special register string."); 5453 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5454 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5455 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5456 } 5457 5458 Ops.push_back(getAL(CurDAG, DL)); 5459 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5460 Ops.push_back(N->getOperand(0)); 5461 5462 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5463 return true; 5464 } 5465 5466 std::string SpecialReg = RegString->getString().lower(); 5467 int BankedReg = getBankedRegisterMask(SpecialReg); 5468 if (BankedReg != -1) { 5469 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5470 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5471 N->getOperand(0) }; 5472 ReplaceNode( 5473 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5474 DL, MVT::Other, Ops)); 5475 return true; 5476 } 5477 5478 // The VFP registers are written to by creating SelectionDAG nodes with 5479 // opcodes corresponding to the register that is being written. So we switch 5480 // on the string to find which opcode we need to use. 5481 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5482 .Case("fpscr", ARM::VMSR) 5483 .Case("fpexc", ARM::VMSR_FPEXC) 5484 .Case("fpsid", ARM::VMSR_FPSID) 5485 .Case("fpinst", ARM::VMSR_FPINST) 5486 .Case("fpinst2", ARM::VMSR_FPINST2) 5487 .Default(0); 5488 5489 if (Opcode) { 5490 if (!Subtarget->hasVFP2Base()) 5491 return false; 5492 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5493 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5494 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5495 return true; 5496 } 5497 5498 std::pair<StringRef, StringRef> Fields; 5499 Fields = StringRef(SpecialReg).rsplit('_'); 5500 std::string Reg = Fields.first.str(); 5501 StringRef Flags = Fields.second; 5502 5503 // If the target was M Class then need to validate the special register value 5504 // and retrieve the mask for use in the instruction node. 5505 if (Subtarget->isMClass()) { 5506 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5507 if (SYSmValue == -1) 5508 return false; 5509 5510 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5511 N->getOperand(2), getAL(CurDAG, DL), 5512 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5513 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5514 return true; 5515 } 5516 5517 // We then check to see if a valid mask can be constructed for one of the 5518 // register string values permitted for the A and R class cores. These values 5519 // are apsr, spsr and cpsr; these are also valid on older cores. 5520 int Mask = getARClassRegisterMask(Reg, Flags); 5521 if (Mask != -1) { 5522 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5523 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5524 N->getOperand(0) }; 5525 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5526 DL, MVT::Other, Ops)); 5527 return true; 5528 } 5529 5530 return false; 5531 } 5532 5533 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5534 std::vector<SDValue> AsmNodeOperands; 5535 unsigned Flag, Kind; 5536 bool Changed = false; 5537 unsigned NumOps = N->getNumOperands(); 5538 5539 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5540 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5541 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5542 // respectively. Since there is no constraint to explicitly specify a 5543 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5544 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5545 // them into a GPRPair. 5546 5547 SDLoc dl(N); 5548 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 5549 : SDValue(nullptr,0); 5550 5551 SmallVector<bool, 8> OpChanged; 5552 // Glue node will be appended late. 5553 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5554 SDValue op = N->getOperand(i); 5555 AsmNodeOperands.push_back(op); 5556 5557 if (i < InlineAsm::Op_FirstOperand) 5558 continue; 5559 5560 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 5561 Flag = C->getZExtValue(); 5562 Kind = InlineAsm::getKind(Flag); 5563 } 5564 else 5565 continue; 5566 5567 // Immediate operands to inline asm in the SelectionDAG are modeled with 5568 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 5569 // the second is a constant with the value of the immediate. If we get here 5570 // and we have a Kind_Imm, skip the next operand, and continue. 5571 if (Kind == InlineAsm::Kind_Imm) { 5572 SDValue op = N->getOperand(++i); 5573 AsmNodeOperands.push_back(op); 5574 continue; 5575 } 5576 5577 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 5578 if (NumRegs) 5579 OpChanged.push_back(false); 5580 5581 unsigned DefIdx = 0; 5582 bool IsTiedToChangedOp = false; 5583 // If it's a use that is tied with a previous def, it has no 5584 // reg class constraint. 5585 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 5586 IsTiedToChangedOp = OpChanged[DefIdx]; 5587 5588 // Memory operands to inline asm in the SelectionDAG are modeled with two 5589 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 5590 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 5591 // it doesn't get misinterpreted), and continue. We do this here because 5592 // it's important to update the OpChanged array correctly before moving on. 5593 if (Kind == InlineAsm::Kind_Mem) { 5594 SDValue op = N->getOperand(++i); 5595 AsmNodeOperands.push_back(op); 5596 continue; 5597 } 5598 5599 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 5600 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 5601 continue; 5602 5603 unsigned RC; 5604 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 5605 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5606 || NumRegs != 2) 5607 continue; 5608 5609 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5610 SDValue V0 = N->getOperand(i+1); 5611 SDValue V1 = N->getOperand(i+2); 5612 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5613 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5614 SDValue PairedReg; 5615 MachineRegisterInfo &MRI = MF->getRegInfo(); 5616 5617 if (Kind == InlineAsm::Kind_RegDef || 5618 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 5619 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5620 // the original GPRs. 5621 5622 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5623 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5624 SDValue Chain = SDValue(N,0); 5625 5626 SDNode *GU = N->getGluedUser(); 5627 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5628 Chain.getValue(1)); 5629 5630 // Extract values from a GPRPair reg and copy to the original GPR reg. 5631 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5632 RegCopy); 5633 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5634 RegCopy); 5635 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5636 RegCopy.getValue(1)); 5637 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5638 5639 // Update the original glue user. 5640 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5641 Ops.push_back(T1.getValue(1)); 5642 CurDAG->UpdateNodeOperands(GU, Ops); 5643 } 5644 else { 5645 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 5646 // GPRPair and then pass the GPRPair to the inline asm. 5647 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5648 5649 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5650 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5651 Chain.getValue(1)); 5652 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5653 T0.getValue(1)); 5654 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5655 5656 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5657 // i32 VRs of inline asm with it. 5658 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5659 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5660 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5661 5662 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5663 Glue = Chain.getValue(1); 5664 } 5665 5666 Changed = true; 5667 5668 if(PairedReg.getNode()) { 5669 OpChanged[OpChanged.size() -1 ] = true; 5670 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 5671 if (IsTiedToChangedOp) 5672 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 5673 else 5674 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 5675 // Replace the current flag. 5676 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5677 Flag, dl, MVT::i32); 5678 // Add the new register node and skip the original two GPRs. 5679 AsmNodeOperands.push_back(PairedReg); 5680 // Skip the next two GPRs. 5681 i += 2; 5682 } 5683 } 5684 5685 if (Glue.getNode()) 5686 AsmNodeOperands.push_back(Glue); 5687 if (!Changed) 5688 return false; 5689 5690 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5691 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5692 New->setNodeId(-1); 5693 ReplaceNode(N, New.getNode()); 5694 return true; 5695 } 5696 5697 5698 bool ARMDAGToDAGISel:: 5699 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 5700 std::vector<SDValue> &OutOps) { 5701 switch(ConstraintID) { 5702 default: 5703 llvm_unreachable("Unexpected asm memory constraint"); 5704 case InlineAsm::Constraint_m: 5705 case InlineAsm::Constraint_o: 5706 case InlineAsm::Constraint_Q: 5707 case InlineAsm::Constraint_Um: 5708 case InlineAsm::Constraint_Un: 5709 case InlineAsm::Constraint_Uq: 5710 case InlineAsm::Constraint_Us: 5711 case InlineAsm::Constraint_Ut: 5712 case InlineAsm::Constraint_Uv: 5713 case InlineAsm::Constraint_Uy: 5714 // Require the address to be in a register. That is safe for all ARM 5715 // variants and it is hard to do anything much smarter without knowing 5716 // how the operand is used. 5717 OutOps.push_back(Op); 5718 return false; 5719 } 5720 return true; 5721 } 5722 5723 /// createARMISelDag - This pass converts a legalized DAG into a 5724 /// ARM-specific DAG, ready for instruction scheduling. 5725 /// 5726 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5727 CodeGenOpt::Level OptLevel) { 5728 return new ARMDAGToDAGISel(TM, OptLevel); 5729 } 5730