1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/CodeGen/TargetLowering.h" 26 #include "llvm/IR/CallingConv.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/IntrinsicsARM.h" 32 #include "llvm/IR/LLVMContext.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Target/TargetOptions.h" 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "arm-isel" 41 42 static cl::opt<bool> 43 DisableShifterOp("disable-shifter-op", cl::Hidden, 44 cl::desc("Disable isel of shifter-op"), 45 cl::init(false)); 46 47 //===--------------------------------------------------------------------===// 48 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 49 /// instructions for SelectionDAG operations. 50 /// 51 namespace { 52 53 class ARMDAGToDAGISel : public SelectionDAGISel { 54 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 55 /// make the right decision when generating code for different targets. 56 const ARMSubtarget *Subtarget; 57 58 public: 59 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 60 : SelectionDAGISel(tm, OptLevel) {} 61 62 bool runOnMachineFunction(MachineFunction &MF) override { 63 // Reset the subtarget each time through. 64 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 65 SelectionDAGISel::runOnMachineFunction(MF); 66 return true; 67 } 68 69 StringRef getPassName() const override { return "ARM Instruction Selection"; } 70 71 void PreprocessISelDAG() override; 72 73 /// getI32Imm - Return a target constant of type i32 with the specified 74 /// value. 75 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 76 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 77 } 78 79 void Select(SDNode *N) override; 80 81 bool hasNoVMLxHazardUse(SDNode *N) const; 82 bool isShifterOpProfitable(const SDValue &Shift, 83 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 84 bool SelectRegShifterOperand(SDValue N, SDValue &A, 85 SDValue &B, SDValue &C, 86 bool CheckProfitability = true); 87 bool SelectImmShifterOperand(SDValue N, SDValue &A, 88 SDValue &B, bool CheckProfitability = true); 89 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 90 SDValue &B, SDValue &C) { 91 // Don't apply the profitability check 92 return SelectRegShifterOperand(N, A, B, C, false); 93 } 94 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 95 SDValue &B) { 96 // Don't apply the profitability check 97 return SelectImmShifterOperand(N, A, B, false); 98 } 99 100 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 101 102 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 103 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 104 105 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 106 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 107 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 108 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 109 return true; 110 } 111 112 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 113 SDValue &Offset, SDValue &Opc); 114 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 115 SDValue &Offset, SDValue &Opc); 116 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 117 SDValue &Offset, SDValue &Opc); 118 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 119 bool SelectAddrMode3(SDValue N, SDValue &Base, 120 SDValue &Offset, SDValue &Opc); 121 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 122 SDValue &Offset, SDValue &Opc); 123 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 124 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 125 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 126 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 127 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 128 129 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 130 131 // Thumb Addressing Modes: 132 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 133 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 134 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 135 SDValue &OffImm); 136 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 137 SDValue &OffImm); 138 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 139 SDValue &OffImm); 140 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 141 SDValue &OffImm); 142 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 143 template <unsigned Shift> 144 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 145 146 // Thumb 2 Addressing Modes: 147 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 148 template <unsigned Shift> 149 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 150 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 151 SDValue &OffImm); 152 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 153 SDValue &OffImm); 154 template <unsigned Shift> 155 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 156 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 157 unsigned Shift); 158 template <unsigned Shift> 159 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 160 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 161 SDValue &OffReg, SDValue &ShImm); 162 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 163 164 template<int Min, int Max> 165 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 166 167 inline bool is_so_imm(unsigned Imm) const { 168 return ARM_AM::getSOImmVal(Imm) != -1; 169 } 170 171 inline bool is_so_imm_not(unsigned Imm) const { 172 return ARM_AM::getSOImmVal(~Imm) != -1; 173 } 174 175 inline bool is_t2_so_imm(unsigned Imm) const { 176 return ARM_AM::getT2SOImmVal(Imm) != -1; 177 } 178 179 inline bool is_t2_so_imm_not(unsigned Imm) const { 180 return ARM_AM::getT2SOImmVal(~Imm) != -1; 181 } 182 183 // Include the pieces autogenerated from the target description. 184 #include "ARMGenDAGISel.inc" 185 186 private: 187 void transferMemOperands(SDNode *Src, SDNode *Dst); 188 189 /// Indexed (pre/post inc/dec) load matching code for ARM. 190 bool tryARMIndexedLoad(SDNode *N); 191 bool tryT1IndexedLoad(SDNode *N); 192 bool tryT2IndexedLoad(SDNode *N); 193 bool tryMVEIndexedLoad(SDNode *N); 194 195 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 196 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 197 /// loads of D registers and even subregs and odd subregs of Q registers. 198 /// For NumVecs <= 2, QOpcodes1 is not used. 199 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 200 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 201 const uint16_t *QOpcodes1); 202 203 /// SelectVST - Select NEON store intrinsics. NumVecs should 204 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 205 /// stores of D registers and even subregs and odd subregs of Q registers. 206 /// For NumVecs <= 2, QOpcodes1 is not used. 207 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 208 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 209 const uint16_t *QOpcodes1); 210 211 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 212 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 213 /// load/store of D registers and Q registers. 214 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 215 unsigned NumVecs, const uint16_t *DOpcodes, 216 const uint16_t *QOpcodes); 217 218 /// Helper functions for setting up clusters of MVE predication operands. 219 template <typename SDValueVector> 220 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 221 SDValue PredicateMask); 222 template <typename SDValueVector> 223 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 224 SDValue PredicateMask, SDValue Inactive); 225 226 template <typename SDValueVector> 227 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 228 template <typename SDValueVector> 229 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 230 231 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 232 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 233 234 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 235 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 236 bool HasSaturationOperand); 237 238 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 239 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 240 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 241 242 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 243 /// vector lanes. 244 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 245 246 /// Select long MVE vector reductions with two vector operands 247 /// Stride is the number of vector element widths the instruction can operate 248 /// on: 249 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 250 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 251 /// Stride is used when addressing the OpcodesS array which contains multiple 252 /// opcodes for each element width. 253 /// TySize is the index into the list of element types listed above 254 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 255 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 256 size_t Stride, size_t TySize); 257 258 /// Select a 64-bit MVE vector reduction with two vector operands 259 /// arm_mve_vmlldava_[predicated] 260 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 261 const uint16_t *OpcodesU); 262 /// Select a 72-bit MVE vector rounding reduction with two vector operands 263 /// int_arm_mve_vrmlldavha[_predicated] 264 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 265 const uint16_t *OpcodesU); 266 267 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 268 /// should be 2 or 4. The opcode array specifies the instructions 269 /// used for 8, 16 and 32-bit lane sizes respectively, and each 270 /// pointer points to a set of NumVecs sub-opcodes used for the 271 /// different stages (e.g. VLD20 versus VLD21) of each load family. 272 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 273 const uint16_t *const *Opcodes, bool HasWriteback); 274 275 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 276 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 277 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 278 bool Wrapping, bool Predicated); 279 280 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 281 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 282 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 283 /// the accumulator and the immediate operand, i.e. 0 284 /// for CX1*, 1 for CX2*, 2 for CX3* 285 /// \arg \c HasAccum whether the instruction has an accumulator operand 286 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 287 bool HasAccum); 288 289 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 290 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 291 /// for loading D registers. 292 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 293 unsigned NumVecs, const uint16_t *DOpcodes, 294 const uint16_t *QOpcodes0 = nullptr, 295 const uint16_t *QOpcodes1 = nullptr); 296 297 /// Try to select SBFX/UBFX instructions for ARM. 298 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 299 300 bool tryInsertVectorElt(SDNode *N); 301 302 // Select special operations if node forms integer ABS pattern 303 bool tryABSOp(SDNode *N); 304 305 bool tryReadRegister(SDNode *N); 306 bool tryWriteRegister(SDNode *N); 307 308 bool tryInlineAsm(SDNode *N); 309 310 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 311 312 void SelectCMP_SWAP(SDNode *N); 313 314 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 315 /// inline asm expressions. 316 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 317 std::vector<SDValue> &OutOps) override; 318 319 // Form pairs of consecutive R, S, D, or Q registers. 320 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 321 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 322 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 323 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 324 325 // Form sequences of 4 consecutive S, D, or Q registers. 326 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 327 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 328 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 329 330 // Get the alignment operand for a NEON VLD or VST instruction. 331 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 332 bool is64BitVector); 333 334 /// Checks if N is a multiplication by a constant where we can extract out a 335 /// power of two from the constant so that it can be used in a shift, but only 336 /// if it simplifies the materialization of the constant. Returns true if it 337 /// is, and assigns to PowerOfTwo the power of two that should be extracted 338 /// out and to NewMulConst the new constant to be multiplied by. 339 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 340 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 341 342 /// Replace N with M in CurDAG, in a way that also ensures that M gets 343 /// selected when N would have been selected. 344 void replaceDAGValue(const SDValue &N, SDValue M); 345 }; 346 } 347 348 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 349 /// operand. If so Imm will receive the 32-bit value. 350 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 351 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 352 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 353 return true; 354 } 355 return false; 356 } 357 358 // isInt32Immediate - This method tests to see if a constant operand. 359 // If so Imm will receive the 32 bit value. 360 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 361 return isInt32Immediate(N.getNode(), Imm); 362 } 363 364 // isOpcWithIntImmediate - This method tests to see if the node is a specific 365 // opcode and that it has a immediate integer right operand. 366 // If so Imm will receive the 32 bit value. 367 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 368 return N->getOpcode() == Opc && 369 isInt32Immediate(N->getOperand(1).getNode(), Imm); 370 } 371 372 /// Check whether a particular node is a constant value representable as 373 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 374 /// 375 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 376 static bool isScaledConstantInRange(SDValue Node, int Scale, 377 int RangeMin, int RangeMax, 378 int &ScaledConstant) { 379 assert(Scale > 0 && "Invalid scale!"); 380 381 // Check that this is a constant. 382 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 383 if (!C) 384 return false; 385 386 ScaledConstant = (int) C->getZExtValue(); 387 if ((ScaledConstant % Scale) != 0) 388 return false; 389 390 ScaledConstant /= Scale; 391 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 392 } 393 394 void ARMDAGToDAGISel::PreprocessISelDAG() { 395 if (!Subtarget->hasV6T2Ops()) 396 return; 397 398 bool isThumb2 = Subtarget->isThumb(); 399 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 400 E = CurDAG->allnodes_end(); I != E; ) { 401 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 402 403 if (N->getOpcode() != ISD::ADD) 404 continue; 405 406 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 407 // leading zeros, followed by consecutive set bits, followed by 1 or 2 408 // trailing zeros, e.g. 1020. 409 // Transform the expression to 410 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 411 // of trailing zeros of c2. The left shift would be folded as an shifter 412 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 413 // node (UBFX). 414 415 SDValue N0 = N->getOperand(0); 416 SDValue N1 = N->getOperand(1); 417 unsigned And_imm = 0; 418 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 419 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 420 std::swap(N0, N1); 421 } 422 if (!And_imm) 423 continue; 424 425 // Check if the AND mask is an immediate of the form: 000.....1111111100 426 unsigned TZ = countTrailingZeros(And_imm); 427 if (TZ != 1 && TZ != 2) 428 // Be conservative here. Shifter operands aren't always free. e.g. On 429 // Swift, left shifter operand of 1 / 2 for free but others are not. 430 // e.g. 431 // ubfx r3, r1, #16, #8 432 // ldr.w r3, [r0, r3, lsl #2] 433 // vs. 434 // mov.w r9, #1020 435 // and.w r2, r9, r1, lsr #14 436 // ldr r2, [r0, r2] 437 continue; 438 And_imm >>= TZ; 439 if (And_imm & (And_imm + 1)) 440 continue; 441 442 // Look for (and (srl X, c1), c2). 443 SDValue Srl = N1.getOperand(0); 444 unsigned Srl_imm = 0; 445 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 446 (Srl_imm <= 2)) 447 continue; 448 449 // Make sure first operand is not a shifter operand which would prevent 450 // folding of the left shift. 451 SDValue CPTmp0; 452 SDValue CPTmp1; 453 SDValue CPTmp2; 454 if (isThumb2) { 455 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 456 continue; 457 } else { 458 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 459 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 460 continue; 461 } 462 463 // Now make the transformation. 464 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 465 Srl.getOperand(0), 466 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 467 MVT::i32)); 468 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 469 Srl, 470 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 471 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 472 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 473 CurDAG->UpdateNodeOperands(N, N0, N1); 474 } 475 } 476 477 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 478 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 479 /// least on current ARM implementations) which should be avoidded. 480 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 481 if (OptLevel == CodeGenOpt::None) 482 return true; 483 484 if (!Subtarget->hasVMLxHazards()) 485 return true; 486 487 if (!N->hasOneUse()) 488 return false; 489 490 SDNode *Use = *N->use_begin(); 491 if (Use->getOpcode() == ISD::CopyToReg) 492 return true; 493 if (Use->isMachineOpcode()) { 494 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 495 CurDAG->getSubtarget().getInstrInfo()); 496 497 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 498 if (MCID.mayStore()) 499 return true; 500 unsigned Opcode = MCID.getOpcode(); 501 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 502 return true; 503 // vmlx feeding into another vmlx. We actually want to unfold 504 // the use later in the MLxExpansion pass. e.g. 505 // vmla 506 // vmla (stall 8 cycles) 507 // 508 // vmul (5 cycles) 509 // vadd (5 cycles) 510 // vmla 511 // This adds up to about 18 - 19 cycles. 512 // 513 // vmla 514 // vmul (stall 4 cycles) 515 // vadd adds up to about 14 cycles. 516 return TII->isFpMLxInstruction(Opcode); 517 } 518 519 return false; 520 } 521 522 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 523 ARM_AM::ShiftOpc ShOpcVal, 524 unsigned ShAmt) { 525 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 526 return true; 527 if (Shift.hasOneUse()) 528 return true; 529 // R << 2 is free. 530 return ShOpcVal == ARM_AM::lsl && 531 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 532 } 533 534 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 535 unsigned MaxShift, 536 unsigned &PowerOfTwo, 537 SDValue &NewMulConst) const { 538 assert(N.getOpcode() == ISD::MUL); 539 assert(MaxShift > 0); 540 541 // If the multiply is used in more than one place then changing the constant 542 // will make other uses incorrect, so don't. 543 if (!N.hasOneUse()) return false; 544 // Check if the multiply is by a constant 545 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 546 if (!MulConst) return false; 547 // If the constant is used in more than one place then modifying it will mean 548 // we need to materialize two constants instead of one, which is a bad idea. 549 if (!MulConst->hasOneUse()) return false; 550 unsigned MulConstVal = MulConst->getZExtValue(); 551 if (MulConstVal == 0) return false; 552 553 // Find the largest power of 2 that MulConstVal is a multiple of 554 PowerOfTwo = MaxShift; 555 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 556 --PowerOfTwo; 557 if (PowerOfTwo == 0) return false; 558 } 559 560 // Only optimise if the new cost is better 561 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 562 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 563 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 564 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 565 return NewCost < OldCost; 566 } 567 568 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 569 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 570 ReplaceUses(N, M); 571 } 572 573 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 574 SDValue &BaseReg, 575 SDValue &Opc, 576 bool CheckProfitability) { 577 if (DisableShifterOp) 578 return false; 579 580 // If N is a multiply-by-constant and it's profitable to extract a shift and 581 // use it in a shifted operand do so. 582 if (N.getOpcode() == ISD::MUL) { 583 unsigned PowerOfTwo = 0; 584 SDValue NewMulConst; 585 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 586 HandleSDNode Handle(N); 587 SDLoc Loc(N); 588 replaceDAGValue(N.getOperand(1), NewMulConst); 589 BaseReg = Handle.getValue(); 590 Opc = CurDAG->getTargetConstant( 591 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 592 return true; 593 } 594 } 595 596 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 597 598 // Don't match base register only case. That is matched to a separate 599 // lower complexity pattern with explicit register operand. 600 if (ShOpcVal == ARM_AM::no_shift) return false; 601 602 BaseReg = N.getOperand(0); 603 unsigned ShImmVal = 0; 604 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 605 if (!RHS) return false; 606 ShImmVal = RHS->getZExtValue() & 31; 607 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 608 SDLoc(N), MVT::i32); 609 return true; 610 } 611 612 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 613 SDValue &BaseReg, 614 SDValue &ShReg, 615 SDValue &Opc, 616 bool CheckProfitability) { 617 if (DisableShifterOp) 618 return false; 619 620 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 621 622 // Don't match base register only case. That is matched to a separate 623 // lower complexity pattern with explicit register operand. 624 if (ShOpcVal == ARM_AM::no_shift) return false; 625 626 BaseReg = N.getOperand(0); 627 unsigned ShImmVal = 0; 628 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 629 if (RHS) return false; 630 631 ShReg = N.getOperand(1); 632 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 633 return false; 634 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 635 SDLoc(N), MVT::i32); 636 return true; 637 } 638 639 // Determine whether an ISD::OR's operands are suitable to turn the operation 640 // into an addition, which often has more compact encodings. 641 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 642 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 643 Out = N; 644 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 645 } 646 647 648 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 649 SDValue &Base, 650 SDValue &OffImm) { 651 // Match simple R + imm12 operands. 652 653 // Base only. 654 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 655 !CurDAG->isBaseWithConstantOffset(N)) { 656 if (N.getOpcode() == ISD::FrameIndex) { 657 // Match frame index. 658 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 659 Base = CurDAG->getTargetFrameIndex( 660 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 661 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 662 return true; 663 } 664 665 if (N.getOpcode() == ARMISD::Wrapper && 666 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 667 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 668 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 669 Base = N.getOperand(0); 670 } else 671 Base = N; 672 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 673 return true; 674 } 675 676 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 677 int RHSC = (int)RHS->getSExtValue(); 678 if (N.getOpcode() == ISD::SUB) 679 RHSC = -RHSC; 680 681 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 682 Base = N.getOperand(0); 683 if (Base.getOpcode() == ISD::FrameIndex) { 684 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 685 Base = CurDAG->getTargetFrameIndex( 686 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 687 } 688 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 689 return true; 690 } 691 } 692 693 // Base only. 694 Base = N; 695 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 696 return true; 697 } 698 699 700 701 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 702 SDValue &Opc) { 703 if (N.getOpcode() == ISD::MUL && 704 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 705 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 706 // X * [3,5,9] -> X + X * [2,4,8] etc. 707 int RHSC = (int)RHS->getZExtValue(); 708 if (RHSC & 1) { 709 RHSC = RHSC & ~1; 710 ARM_AM::AddrOpc AddSub = ARM_AM::add; 711 if (RHSC < 0) { 712 AddSub = ARM_AM::sub; 713 RHSC = - RHSC; 714 } 715 if (isPowerOf2_32(RHSC)) { 716 unsigned ShAmt = Log2_32(RHSC); 717 Base = Offset = N.getOperand(0); 718 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 719 ARM_AM::lsl), 720 SDLoc(N), MVT::i32); 721 return true; 722 } 723 } 724 } 725 } 726 727 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 728 // ISD::OR that is equivalent to an ISD::ADD. 729 !CurDAG->isBaseWithConstantOffset(N)) 730 return false; 731 732 // Leave simple R +/- imm12 operands for LDRi12 733 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 734 int RHSC; 735 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 736 -0x1000+1, 0x1000, RHSC)) // 12 bits. 737 return false; 738 } 739 740 // Otherwise this is R +/- [possibly shifted] R. 741 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 742 ARM_AM::ShiftOpc ShOpcVal = 743 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 744 unsigned ShAmt = 0; 745 746 Base = N.getOperand(0); 747 Offset = N.getOperand(1); 748 749 if (ShOpcVal != ARM_AM::no_shift) { 750 // Check to see if the RHS of the shift is a constant, if not, we can't fold 751 // it. 752 if (ConstantSDNode *Sh = 753 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 754 ShAmt = Sh->getZExtValue(); 755 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 756 Offset = N.getOperand(1).getOperand(0); 757 else { 758 ShAmt = 0; 759 ShOpcVal = ARM_AM::no_shift; 760 } 761 } else { 762 ShOpcVal = ARM_AM::no_shift; 763 } 764 } 765 766 // Try matching (R shl C) + (R). 767 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 768 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 769 N.getOperand(0).hasOneUse())) { 770 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 771 if (ShOpcVal != ARM_AM::no_shift) { 772 // Check to see if the RHS of the shift is a constant, if not, we can't 773 // fold it. 774 if (ConstantSDNode *Sh = 775 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 776 ShAmt = Sh->getZExtValue(); 777 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 778 Offset = N.getOperand(0).getOperand(0); 779 Base = N.getOperand(1); 780 } else { 781 ShAmt = 0; 782 ShOpcVal = ARM_AM::no_shift; 783 } 784 } else { 785 ShOpcVal = ARM_AM::no_shift; 786 } 787 } 788 } 789 790 // If Offset is a multiply-by-constant and it's profitable to extract a shift 791 // and use it in a shifted operand do so. 792 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 793 unsigned PowerOfTwo = 0; 794 SDValue NewMulConst; 795 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 796 HandleSDNode Handle(Offset); 797 replaceDAGValue(Offset.getOperand(1), NewMulConst); 798 Offset = Handle.getValue(); 799 ShAmt = PowerOfTwo; 800 ShOpcVal = ARM_AM::lsl; 801 } 802 } 803 804 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 805 SDLoc(N), MVT::i32); 806 return true; 807 } 808 809 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 810 SDValue &Offset, SDValue &Opc) { 811 unsigned Opcode = Op->getOpcode(); 812 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 813 ? cast<LoadSDNode>(Op)->getAddressingMode() 814 : cast<StoreSDNode>(Op)->getAddressingMode(); 815 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 816 ? ARM_AM::add : ARM_AM::sub; 817 int Val; 818 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 819 return false; 820 821 Offset = N; 822 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 823 unsigned ShAmt = 0; 824 if (ShOpcVal != ARM_AM::no_shift) { 825 // Check to see if the RHS of the shift is a constant, if not, we can't fold 826 // it. 827 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 828 ShAmt = Sh->getZExtValue(); 829 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 830 Offset = N.getOperand(0); 831 else { 832 ShAmt = 0; 833 ShOpcVal = ARM_AM::no_shift; 834 } 835 } else { 836 ShOpcVal = ARM_AM::no_shift; 837 } 838 } 839 840 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 841 SDLoc(N), MVT::i32); 842 return true; 843 } 844 845 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 846 SDValue &Offset, SDValue &Opc) { 847 unsigned Opcode = Op->getOpcode(); 848 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 849 ? cast<LoadSDNode>(Op)->getAddressingMode() 850 : cast<StoreSDNode>(Op)->getAddressingMode(); 851 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 852 ? ARM_AM::add : ARM_AM::sub; 853 int Val; 854 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 855 if (AddSub == ARM_AM::sub) Val *= -1; 856 Offset = CurDAG->getRegister(0, MVT::i32); 857 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 858 return true; 859 } 860 861 return false; 862 } 863 864 865 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 866 SDValue &Offset, SDValue &Opc) { 867 unsigned Opcode = Op->getOpcode(); 868 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 869 ? cast<LoadSDNode>(Op)->getAddressingMode() 870 : cast<StoreSDNode>(Op)->getAddressingMode(); 871 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 872 ? ARM_AM::add : ARM_AM::sub; 873 int Val; 874 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 875 Offset = CurDAG->getRegister(0, MVT::i32); 876 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 877 ARM_AM::no_shift), 878 SDLoc(Op), MVT::i32); 879 return true; 880 } 881 882 return false; 883 } 884 885 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 886 Base = N; 887 return true; 888 } 889 890 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 891 SDValue &Base, SDValue &Offset, 892 SDValue &Opc) { 893 if (N.getOpcode() == ISD::SUB) { 894 // X - C is canonicalize to X + -C, no need to handle it here. 895 Base = N.getOperand(0); 896 Offset = N.getOperand(1); 897 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 898 MVT::i32); 899 return true; 900 } 901 902 if (!CurDAG->isBaseWithConstantOffset(N)) { 903 Base = N; 904 if (N.getOpcode() == ISD::FrameIndex) { 905 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 906 Base = CurDAG->getTargetFrameIndex( 907 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 908 } 909 Offset = CurDAG->getRegister(0, MVT::i32); 910 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 911 MVT::i32); 912 return true; 913 } 914 915 // If the RHS is +/- imm8, fold into addr mode. 916 int RHSC; 917 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 918 -256 + 1, 256, RHSC)) { // 8 bits. 919 Base = N.getOperand(0); 920 if (Base.getOpcode() == ISD::FrameIndex) { 921 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 922 Base = CurDAG->getTargetFrameIndex( 923 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 924 } 925 Offset = CurDAG->getRegister(0, MVT::i32); 926 927 ARM_AM::AddrOpc AddSub = ARM_AM::add; 928 if (RHSC < 0) { 929 AddSub = ARM_AM::sub; 930 RHSC = -RHSC; 931 } 932 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 933 MVT::i32); 934 return true; 935 } 936 937 Base = N.getOperand(0); 938 Offset = N.getOperand(1); 939 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 940 MVT::i32); 941 return true; 942 } 943 944 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 945 SDValue &Offset, SDValue &Opc) { 946 unsigned Opcode = Op->getOpcode(); 947 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 948 ? cast<LoadSDNode>(Op)->getAddressingMode() 949 : cast<StoreSDNode>(Op)->getAddressingMode(); 950 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 951 ? ARM_AM::add : ARM_AM::sub; 952 int Val; 953 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 954 Offset = CurDAG->getRegister(0, MVT::i32); 955 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 956 MVT::i32); 957 return true; 958 } 959 960 Offset = N; 961 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 962 MVT::i32); 963 return true; 964 } 965 966 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 967 bool FP16) { 968 if (!CurDAG->isBaseWithConstantOffset(N)) { 969 Base = N; 970 if (N.getOpcode() == ISD::FrameIndex) { 971 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 972 Base = CurDAG->getTargetFrameIndex( 973 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 974 } else if (N.getOpcode() == ARMISD::Wrapper && 975 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 976 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 977 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 978 Base = N.getOperand(0); 979 } 980 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 981 SDLoc(N), MVT::i32); 982 return true; 983 } 984 985 // If the RHS is +/- imm8, fold into addr mode. 986 int RHSC; 987 const int Scale = FP16 ? 2 : 4; 988 989 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 990 Base = N.getOperand(0); 991 if (Base.getOpcode() == ISD::FrameIndex) { 992 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 993 Base = CurDAG->getTargetFrameIndex( 994 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 995 } 996 997 ARM_AM::AddrOpc AddSub = ARM_AM::add; 998 if (RHSC < 0) { 999 AddSub = ARM_AM::sub; 1000 RHSC = -RHSC; 1001 } 1002 1003 if (FP16) 1004 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1005 SDLoc(N), MVT::i32); 1006 else 1007 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1008 SDLoc(N), MVT::i32); 1009 1010 return true; 1011 } 1012 1013 Base = N; 1014 1015 if (FP16) 1016 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1017 SDLoc(N), MVT::i32); 1018 else 1019 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1020 SDLoc(N), MVT::i32); 1021 1022 return true; 1023 } 1024 1025 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1026 SDValue &Base, SDValue &Offset) { 1027 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1028 } 1029 1030 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1031 SDValue &Base, SDValue &Offset) { 1032 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1033 } 1034 1035 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1036 SDValue &Align) { 1037 Addr = N; 1038 1039 unsigned Alignment = 0; 1040 1041 MemSDNode *MemN = cast<MemSDNode>(Parent); 1042 1043 if (isa<LSBaseSDNode>(MemN) || 1044 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1045 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1046 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1047 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1048 // The maximum alignment is equal to the memory size being referenced. 1049 unsigned MMOAlign = MemN->getAlignment(); 1050 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1051 if (MMOAlign >= MemSize && MemSize > 1) 1052 Alignment = MemSize; 1053 } else { 1054 // All other uses of addrmode6 are for intrinsics. For now just record 1055 // the raw alignment value; it will be refined later based on the legal 1056 // alignment operands for the intrinsic. 1057 Alignment = MemN->getAlignment(); 1058 } 1059 1060 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1061 return true; 1062 } 1063 1064 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1065 SDValue &Offset) { 1066 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1067 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1068 if (AM != ISD::POST_INC) 1069 return false; 1070 Offset = N; 1071 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1072 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1073 Offset = CurDAG->getRegister(0, MVT::i32); 1074 } 1075 return true; 1076 } 1077 1078 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1079 SDValue &Offset, SDValue &Label) { 1080 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1081 Offset = N.getOperand(0); 1082 SDValue N1 = N.getOperand(1); 1083 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1084 SDLoc(N), MVT::i32); 1085 return true; 1086 } 1087 1088 return false; 1089 } 1090 1091 1092 //===----------------------------------------------------------------------===// 1093 // Thumb Addressing Modes 1094 //===----------------------------------------------------------------------===// 1095 1096 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1097 // Negative numbers are difficult to materialise in thumb1. If we are 1098 // selecting the add of a negative, instead try to select ri with a zero 1099 // offset, so create the add node directly which will become a sub. 1100 if (N.getOpcode() != ISD::ADD) 1101 return false; 1102 1103 // Look for an imm which is not legal for ld/st, but is legal for sub. 1104 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1105 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1106 1107 return false; 1108 } 1109 1110 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1111 SDValue &Offset) { 1112 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1113 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1114 if (!NC || !NC->isNullValue()) 1115 return false; 1116 1117 Base = Offset = N; 1118 return true; 1119 } 1120 1121 Base = N.getOperand(0); 1122 Offset = N.getOperand(1); 1123 return true; 1124 } 1125 1126 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1127 SDValue &Offset) { 1128 if (shouldUseZeroOffsetLdSt(N)) 1129 return false; // Select ri instead 1130 return SelectThumbAddrModeRRSext(N, Base, Offset); 1131 } 1132 1133 bool 1134 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1135 SDValue &Base, SDValue &OffImm) { 1136 if (shouldUseZeroOffsetLdSt(N)) { 1137 Base = N; 1138 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1139 return true; 1140 } 1141 1142 if (!CurDAG->isBaseWithConstantOffset(N)) { 1143 if (N.getOpcode() == ISD::ADD) { 1144 return false; // We want to select register offset instead 1145 } else if (N.getOpcode() == ARMISD::Wrapper && 1146 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1147 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1148 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1149 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1150 Base = N.getOperand(0); 1151 } else { 1152 Base = N; 1153 } 1154 1155 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1156 return true; 1157 } 1158 1159 // If the RHS is + imm5 * scale, fold into addr mode. 1160 int RHSC; 1161 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1162 Base = N.getOperand(0); 1163 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1164 return true; 1165 } 1166 1167 // Offset is too large, so use register offset instead. 1168 return false; 1169 } 1170 1171 bool 1172 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1173 SDValue &OffImm) { 1174 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1175 } 1176 1177 bool 1178 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1179 SDValue &OffImm) { 1180 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1181 } 1182 1183 bool 1184 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1185 SDValue &OffImm) { 1186 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1187 } 1188 1189 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1190 SDValue &Base, SDValue &OffImm) { 1191 if (N.getOpcode() == ISD::FrameIndex) { 1192 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1193 // Only multiples of 4 are allowed for the offset, so the frame object 1194 // alignment must be at least 4. 1195 MachineFrameInfo &MFI = MF->getFrameInfo(); 1196 if (MFI.getObjectAlign(FI) < Align(4)) 1197 MFI.setObjectAlignment(FI, Align(4)); 1198 Base = CurDAG->getTargetFrameIndex( 1199 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1200 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1201 return true; 1202 } 1203 1204 if (!CurDAG->isBaseWithConstantOffset(N)) 1205 return false; 1206 1207 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1208 // If the RHS is + imm8 * scale, fold into addr mode. 1209 int RHSC; 1210 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1211 Base = N.getOperand(0); 1212 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1213 // Make sure the offset is inside the object, or we might fail to 1214 // allocate an emergency spill slot. (An out-of-range access is UB, but 1215 // it could show up anyway.) 1216 MachineFrameInfo &MFI = MF->getFrameInfo(); 1217 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1218 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1219 // indexed by the LHS must be 4-byte aligned. 1220 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1221 MFI.setObjectAlignment(FI, Align(4)); 1222 if (MFI.getObjectAlign(FI) >= Align(4)) { 1223 Base = CurDAG->getTargetFrameIndex( 1224 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1225 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1226 return true; 1227 } 1228 } 1229 } 1230 } 1231 1232 return false; 1233 } 1234 1235 template <unsigned Shift> 1236 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1237 SDValue &OffImm) { 1238 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1239 int RHSC; 1240 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1241 RHSC)) { 1242 Base = N.getOperand(0); 1243 if (N.getOpcode() == ISD::SUB) 1244 RHSC = -RHSC; 1245 OffImm = 1246 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1247 return true; 1248 } 1249 } 1250 1251 // Base only. 1252 Base = N; 1253 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1254 return true; 1255 } 1256 1257 1258 //===----------------------------------------------------------------------===// 1259 // Thumb 2 Addressing Modes 1260 //===----------------------------------------------------------------------===// 1261 1262 1263 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1264 SDValue &Base, SDValue &OffImm) { 1265 // Match simple R + imm12 operands. 1266 1267 // Base only. 1268 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1269 !CurDAG->isBaseWithConstantOffset(N)) { 1270 if (N.getOpcode() == ISD::FrameIndex) { 1271 // Match frame index. 1272 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1273 Base = CurDAG->getTargetFrameIndex( 1274 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1275 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1276 return true; 1277 } 1278 1279 if (N.getOpcode() == ARMISD::Wrapper && 1280 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1281 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1282 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1283 Base = N.getOperand(0); 1284 if (Base.getOpcode() == ISD::TargetConstantPool) 1285 return false; // We want to select t2LDRpci instead. 1286 } else 1287 Base = N; 1288 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1289 return true; 1290 } 1291 1292 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1293 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1294 // Let t2LDRi8 handle (R - imm8). 1295 return false; 1296 1297 int RHSC = (int)RHS->getZExtValue(); 1298 if (N.getOpcode() == ISD::SUB) 1299 RHSC = -RHSC; 1300 1301 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1302 Base = N.getOperand(0); 1303 if (Base.getOpcode() == ISD::FrameIndex) { 1304 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1305 Base = CurDAG->getTargetFrameIndex( 1306 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1307 } 1308 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1309 return true; 1310 } 1311 } 1312 1313 // Base only. 1314 Base = N; 1315 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1316 return true; 1317 } 1318 1319 template <unsigned Shift> 1320 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1321 SDValue &OffImm) { 1322 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1323 int RHSC; 1324 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1325 Base = N.getOperand(0); 1326 if (Base.getOpcode() == ISD::FrameIndex) { 1327 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1328 Base = CurDAG->getTargetFrameIndex( 1329 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1330 } 1331 1332 if (N.getOpcode() == ISD::SUB) 1333 RHSC = -RHSC; 1334 OffImm = 1335 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1336 return true; 1337 } 1338 } 1339 1340 // Base only. 1341 Base = N; 1342 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1343 return true; 1344 } 1345 1346 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1347 SDValue &Base, SDValue &OffImm) { 1348 // Match simple R - imm8 operands. 1349 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1350 !CurDAG->isBaseWithConstantOffset(N)) 1351 return false; 1352 1353 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1354 int RHSC = (int)RHS->getSExtValue(); 1355 if (N.getOpcode() == ISD::SUB) 1356 RHSC = -RHSC; 1357 1358 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1359 Base = N.getOperand(0); 1360 if (Base.getOpcode() == ISD::FrameIndex) { 1361 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1362 Base = CurDAG->getTargetFrameIndex( 1363 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1364 } 1365 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1366 return true; 1367 } 1368 } 1369 1370 return false; 1371 } 1372 1373 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1374 SDValue &OffImm){ 1375 unsigned Opcode = Op->getOpcode(); 1376 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1377 ? cast<LoadSDNode>(Op)->getAddressingMode() 1378 : cast<StoreSDNode>(Op)->getAddressingMode(); 1379 int RHSC; 1380 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1381 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1382 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1383 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1384 return true; 1385 } 1386 1387 return false; 1388 } 1389 1390 template <unsigned Shift> 1391 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1392 SDValue &OffImm) { 1393 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1394 int RHSC; 1395 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1396 RHSC)) { 1397 Base = N.getOperand(0); 1398 if (Base.getOpcode() == ISD::FrameIndex) { 1399 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1400 Base = CurDAG->getTargetFrameIndex( 1401 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1402 } 1403 1404 if (N.getOpcode() == ISD::SUB) 1405 RHSC = -RHSC; 1406 OffImm = 1407 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1408 return true; 1409 } 1410 } 1411 1412 // Base only. 1413 Base = N; 1414 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1415 return true; 1416 } 1417 1418 template <unsigned Shift> 1419 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1420 SDValue &OffImm) { 1421 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1422 } 1423 1424 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1425 SDValue &OffImm, 1426 unsigned Shift) { 1427 unsigned Opcode = Op->getOpcode(); 1428 ISD::MemIndexedMode AM; 1429 switch (Opcode) { 1430 case ISD::LOAD: 1431 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1432 break; 1433 case ISD::STORE: 1434 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1435 break; 1436 case ISD::MLOAD: 1437 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1438 break; 1439 case ISD::MSTORE: 1440 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1441 break; 1442 default: 1443 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1444 } 1445 1446 int RHSC; 1447 // 7 bit constant, shifted by Shift. 1448 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1449 OffImm = 1450 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1451 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1452 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1453 MVT::i32); 1454 return true; 1455 } 1456 return false; 1457 } 1458 1459 template <int Min, int Max> 1460 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1461 int Val; 1462 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1463 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); 1464 return true; 1465 } 1466 return false; 1467 } 1468 1469 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1470 SDValue &Base, 1471 SDValue &OffReg, SDValue &ShImm) { 1472 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1473 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1474 return false; 1475 1476 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1477 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1478 int RHSC = (int)RHS->getZExtValue(); 1479 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1480 return false; 1481 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1482 return false; 1483 } 1484 1485 // Look for (R + R) or (R + (R << [1,2,3])). 1486 unsigned ShAmt = 0; 1487 Base = N.getOperand(0); 1488 OffReg = N.getOperand(1); 1489 1490 // Swap if it is ((R << c) + R). 1491 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1492 if (ShOpcVal != ARM_AM::lsl) { 1493 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1494 if (ShOpcVal == ARM_AM::lsl) 1495 std::swap(Base, OffReg); 1496 } 1497 1498 if (ShOpcVal == ARM_AM::lsl) { 1499 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1500 // it. 1501 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1502 ShAmt = Sh->getZExtValue(); 1503 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1504 OffReg = OffReg.getOperand(0); 1505 else { 1506 ShAmt = 0; 1507 } 1508 } 1509 } 1510 1511 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1512 // and use it in a shifted operand do so. 1513 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1514 unsigned PowerOfTwo = 0; 1515 SDValue NewMulConst; 1516 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1517 HandleSDNode Handle(OffReg); 1518 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1519 OffReg = Handle.getValue(); 1520 ShAmt = PowerOfTwo; 1521 } 1522 } 1523 1524 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1525 1526 return true; 1527 } 1528 1529 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1530 SDValue &OffImm) { 1531 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1532 // instructions. 1533 Base = N; 1534 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1535 1536 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1537 return true; 1538 1539 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1540 if (!RHS) 1541 return true; 1542 1543 uint32_t RHSC = (int)RHS->getZExtValue(); 1544 if (RHSC > 1020 || RHSC % 4 != 0) 1545 return true; 1546 1547 Base = N.getOperand(0); 1548 if (Base.getOpcode() == ISD::FrameIndex) { 1549 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1550 Base = CurDAG->getTargetFrameIndex( 1551 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1552 } 1553 1554 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1555 return true; 1556 } 1557 1558 //===--------------------------------------------------------------------===// 1559 1560 /// getAL - Returns a ARMCC::AL immediate node. 1561 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1562 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1563 } 1564 1565 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1566 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1567 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1568 } 1569 1570 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1571 LoadSDNode *LD = cast<LoadSDNode>(N); 1572 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1573 if (AM == ISD::UNINDEXED) 1574 return false; 1575 1576 EVT LoadedVT = LD->getMemoryVT(); 1577 SDValue Offset, AMOpc; 1578 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1579 unsigned Opcode = 0; 1580 bool Match = false; 1581 if (LoadedVT == MVT::i32 && isPre && 1582 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1583 Opcode = ARM::LDR_PRE_IMM; 1584 Match = true; 1585 } else if (LoadedVT == MVT::i32 && !isPre && 1586 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1587 Opcode = ARM::LDR_POST_IMM; 1588 Match = true; 1589 } else if (LoadedVT == MVT::i32 && 1590 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1591 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1592 Match = true; 1593 1594 } else if (LoadedVT == MVT::i16 && 1595 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1596 Match = true; 1597 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1598 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1599 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1600 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1601 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1602 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1603 Match = true; 1604 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1605 } 1606 } else { 1607 if (isPre && 1608 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1609 Match = true; 1610 Opcode = ARM::LDRB_PRE_IMM; 1611 } else if (!isPre && 1612 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1613 Match = true; 1614 Opcode = ARM::LDRB_POST_IMM; 1615 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1616 Match = true; 1617 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1618 } 1619 } 1620 } 1621 1622 if (Match) { 1623 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1624 SDValue Chain = LD->getChain(); 1625 SDValue Base = LD->getBasePtr(); 1626 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1627 CurDAG->getRegister(0, MVT::i32), Chain }; 1628 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1629 MVT::Other, Ops); 1630 transferMemOperands(N, New); 1631 ReplaceNode(N, New); 1632 return true; 1633 } else { 1634 SDValue Chain = LD->getChain(); 1635 SDValue Base = LD->getBasePtr(); 1636 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1637 CurDAG->getRegister(0, MVT::i32), Chain }; 1638 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1639 MVT::Other, Ops); 1640 transferMemOperands(N, New); 1641 ReplaceNode(N, New); 1642 return true; 1643 } 1644 } 1645 1646 return false; 1647 } 1648 1649 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1650 LoadSDNode *LD = cast<LoadSDNode>(N); 1651 EVT LoadedVT = LD->getMemoryVT(); 1652 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1653 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1654 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1655 return false; 1656 1657 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1658 if (!COffs || COffs->getZExtValue() != 4) 1659 return false; 1660 1661 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1662 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1663 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1664 // ISel. 1665 SDValue Chain = LD->getChain(); 1666 SDValue Base = LD->getBasePtr(); 1667 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1668 CurDAG->getRegister(0, MVT::i32), Chain }; 1669 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1670 MVT::i32, MVT::Other, Ops); 1671 transferMemOperands(N, New); 1672 ReplaceNode(N, New); 1673 return true; 1674 } 1675 1676 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1677 LoadSDNode *LD = cast<LoadSDNode>(N); 1678 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1679 if (AM == ISD::UNINDEXED) 1680 return false; 1681 1682 EVT LoadedVT = LD->getMemoryVT(); 1683 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1684 SDValue Offset; 1685 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1686 unsigned Opcode = 0; 1687 bool Match = false; 1688 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1689 switch (LoadedVT.getSimpleVT().SimpleTy) { 1690 case MVT::i32: 1691 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1692 break; 1693 case MVT::i16: 1694 if (isSExtLd) 1695 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1696 else 1697 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1698 break; 1699 case MVT::i8: 1700 case MVT::i1: 1701 if (isSExtLd) 1702 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1703 else 1704 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1705 break; 1706 default: 1707 return false; 1708 } 1709 Match = true; 1710 } 1711 1712 if (Match) { 1713 SDValue Chain = LD->getChain(); 1714 SDValue Base = LD->getBasePtr(); 1715 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1716 CurDAG->getRegister(0, MVT::i32), Chain }; 1717 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1718 MVT::Other, Ops); 1719 transferMemOperands(N, New); 1720 ReplaceNode(N, New); 1721 return true; 1722 } 1723 1724 return false; 1725 } 1726 1727 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1728 EVT LoadedVT; 1729 unsigned Opcode = 0; 1730 bool isSExtLd, isPre; 1731 Align Alignment; 1732 ARMVCC::VPTCodes Pred; 1733 SDValue PredReg; 1734 SDValue Chain, Base, Offset; 1735 1736 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1737 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1738 if (AM == ISD::UNINDEXED) 1739 return false; 1740 LoadedVT = LD->getMemoryVT(); 1741 if (!LoadedVT.isVector()) 1742 return false; 1743 1744 Chain = LD->getChain(); 1745 Base = LD->getBasePtr(); 1746 Offset = LD->getOffset(); 1747 Alignment = LD->getAlign(); 1748 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1749 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1750 Pred = ARMVCC::None; 1751 PredReg = CurDAG->getRegister(0, MVT::i32); 1752 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1753 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1754 if (AM == ISD::UNINDEXED) 1755 return false; 1756 LoadedVT = LD->getMemoryVT(); 1757 if (!LoadedVT.isVector()) 1758 return false; 1759 1760 Chain = LD->getChain(); 1761 Base = LD->getBasePtr(); 1762 Offset = LD->getOffset(); 1763 Alignment = LD->getAlign(); 1764 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1765 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1766 Pred = ARMVCC::Then; 1767 PredReg = LD->getMask(); 1768 } else 1769 llvm_unreachable("Expected a Load or a Masked Load!"); 1770 1771 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1772 // as opposed to a vldrw.32). This can allow extra addressing modes or 1773 // alignments for what is otherwise an equivalent instruction. 1774 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1775 1776 SDValue NewOffset; 1777 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && 1778 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1779 if (isSExtLd) 1780 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1781 else 1782 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1783 } else if (LoadedVT == MVT::v8i8 && 1784 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1785 if (isSExtLd) 1786 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1787 else 1788 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1789 } else if (LoadedVT == MVT::v4i8 && 1790 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1791 if (isSExtLd) 1792 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1793 else 1794 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1795 } else if (Alignment >= Align(4) && 1796 (CanChangeType || LoadedVT == MVT::v4i32 || 1797 LoadedVT == MVT::v4f32) && 1798 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1799 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1800 else if (Alignment >= Align(2) && 1801 (CanChangeType || LoadedVT == MVT::v8i16 || 1802 LoadedVT == MVT::v8f16) && 1803 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1804 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1805 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1806 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1807 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1808 else 1809 return false; 1810 1811 SDValue Ops[] = {Base, NewOffset, 1812 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg, 1813 Chain}; 1814 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1815 N->getValueType(0), MVT::Other, Ops); 1816 transferMemOperands(N, New); 1817 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1818 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1819 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1820 CurDAG->RemoveDeadNode(N); 1821 return true; 1822 } 1823 1824 /// Form a GPRPair pseudo register from a pair of GPR regs. 1825 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1826 SDLoc dl(V0.getNode()); 1827 SDValue RegClass = 1828 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1829 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1830 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1831 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1832 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1833 } 1834 1835 /// Form a D register from a pair of S registers. 1836 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1837 SDLoc dl(V0.getNode()); 1838 SDValue RegClass = 1839 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1840 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1841 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1842 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1843 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1844 } 1845 1846 /// Form a quad register from a pair of D registers. 1847 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1848 SDLoc dl(V0.getNode()); 1849 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1850 MVT::i32); 1851 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1852 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1853 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1854 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1855 } 1856 1857 /// Form 4 consecutive D registers from a pair of Q registers. 1858 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1859 SDLoc dl(V0.getNode()); 1860 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1861 MVT::i32); 1862 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1863 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1864 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1865 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1866 } 1867 1868 /// Form 4 consecutive S registers. 1869 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1870 SDValue V2, SDValue V3) { 1871 SDLoc dl(V0.getNode()); 1872 SDValue RegClass = 1873 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1874 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1875 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1876 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1877 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1878 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1879 V2, SubReg2, V3, SubReg3 }; 1880 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1881 } 1882 1883 /// Form 4 consecutive D registers. 1884 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1885 SDValue V2, SDValue V3) { 1886 SDLoc dl(V0.getNode()); 1887 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1888 MVT::i32); 1889 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1890 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1891 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1892 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1893 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1894 V2, SubReg2, V3, SubReg3 }; 1895 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1896 } 1897 1898 /// Form 4 consecutive Q registers. 1899 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1900 SDValue V2, SDValue V3) { 1901 SDLoc dl(V0.getNode()); 1902 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1903 MVT::i32); 1904 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1905 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1906 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1907 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1908 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1909 V2, SubReg2, V3, SubReg3 }; 1910 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1911 } 1912 1913 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1914 /// of a NEON VLD or VST instruction. The supported values depend on the 1915 /// number of registers being loaded. 1916 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1917 unsigned NumVecs, bool is64BitVector) { 1918 unsigned NumRegs = NumVecs; 1919 if (!is64BitVector && NumVecs < 3) 1920 NumRegs *= 2; 1921 1922 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1923 if (Alignment >= 32 && NumRegs == 4) 1924 Alignment = 32; 1925 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1926 Alignment = 16; 1927 else if (Alignment >= 8) 1928 Alignment = 8; 1929 else 1930 Alignment = 0; 1931 1932 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1933 } 1934 1935 static bool isVLDfixed(unsigned Opc) 1936 { 1937 switch (Opc) { 1938 default: return false; 1939 case ARM::VLD1d8wb_fixed : return true; 1940 case ARM::VLD1d16wb_fixed : return true; 1941 case ARM::VLD1d64Qwb_fixed : return true; 1942 case ARM::VLD1d32wb_fixed : return true; 1943 case ARM::VLD1d64wb_fixed : return true; 1944 case ARM::VLD1d64TPseudoWB_fixed : return true; 1945 case ARM::VLD1d64QPseudoWB_fixed : return true; 1946 case ARM::VLD1q8wb_fixed : return true; 1947 case ARM::VLD1q16wb_fixed : return true; 1948 case ARM::VLD1q32wb_fixed : return true; 1949 case ARM::VLD1q64wb_fixed : return true; 1950 case ARM::VLD1DUPd8wb_fixed : return true; 1951 case ARM::VLD1DUPd16wb_fixed : return true; 1952 case ARM::VLD1DUPd32wb_fixed : return true; 1953 case ARM::VLD1DUPq8wb_fixed : return true; 1954 case ARM::VLD1DUPq16wb_fixed : return true; 1955 case ARM::VLD1DUPq32wb_fixed : return true; 1956 case ARM::VLD2d8wb_fixed : return true; 1957 case ARM::VLD2d16wb_fixed : return true; 1958 case ARM::VLD2d32wb_fixed : return true; 1959 case ARM::VLD2q8PseudoWB_fixed : return true; 1960 case ARM::VLD2q16PseudoWB_fixed : return true; 1961 case ARM::VLD2q32PseudoWB_fixed : return true; 1962 case ARM::VLD2DUPd8wb_fixed : return true; 1963 case ARM::VLD2DUPd16wb_fixed : return true; 1964 case ARM::VLD2DUPd32wb_fixed : return true; 1965 } 1966 } 1967 1968 static bool isVSTfixed(unsigned Opc) 1969 { 1970 switch (Opc) { 1971 default: return false; 1972 case ARM::VST1d8wb_fixed : return true; 1973 case ARM::VST1d16wb_fixed : return true; 1974 case ARM::VST1d32wb_fixed : return true; 1975 case ARM::VST1d64wb_fixed : return true; 1976 case ARM::VST1q8wb_fixed : return true; 1977 case ARM::VST1q16wb_fixed : return true; 1978 case ARM::VST1q32wb_fixed : return true; 1979 case ARM::VST1q64wb_fixed : return true; 1980 case ARM::VST1d64TPseudoWB_fixed : return true; 1981 case ARM::VST1d64QPseudoWB_fixed : return true; 1982 case ARM::VST2d8wb_fixed : return true; 1983 case ARM::VST2d16wb_fixed : return true; 1984 case ARM::VST2d32wb_fixed : return true; 1985 case ARM::VST2q8PseudoWB_fixed : return true; 1986 case ARM::VST2q16PseudoWB_fixed : return true; 1987 case ARM::VST2q32PseudoWB_fixed : return true; 1988 } 1989 } 1990 1991 // Get the register stride update opcode of a VLD/VST instruction that 1992 // is otherwise equivalent to the given fixed stride updating instruction. 1993 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1994 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 1995 && "Incorrect fixed stride updating instruction."); 1996 switch (Opc) { 1997 default: break; 1998 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1999 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 2000 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 2001 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2002 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2003 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2004 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2005 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2006 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2007 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2008 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2009 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2010 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2011 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2012 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2013 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2014 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2015 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2016 2017 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2018 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2019 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2020 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2021 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2022 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2023 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2024 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2025 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2026 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2027 2028 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2029 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2030 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2031 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2032 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2033 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2034 2035 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2036 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2037 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2038 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2039 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2040 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2041 2042 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2043 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2044 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2045 } 2046 return Opc; // If not one we handle, return it unchanged. 2047 } 2048 2049 /// Returns true if the given increment is a Constant known to be equal to the 2050 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2051 /// be used. 2052 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2053 auto C = dyn_cast<ConstantSDNode>(Inc); 2054 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2055 } 2056 2057 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2058 const uint16_t *DOpcodes, 2059 const uint16_t *QOpcodes0, 2060 const uint16_t *QOpcodes1) { 2061 assert(Subtarget->hasNEON()); 2062 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2063 SDLoc dl(N); 2064 2065 SDValue MemAddr, Align; 2066 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2067 // nodes are not intrinsics. 2068 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2069 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2070 return; 2071 2072 SDValue Chain = N->getOperand(0); 2073 EVT VT = N->getValueType(0); 2074 bool is64BitVector = VT.is64BitVector(); 2075 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2076 2077 unsigned OpcodeIndex; 2078 switch (VT.getSimpleVT().SimpleTy) { 2079 default: llvm_unreachable("unhandled vld type"); 2080 // Double-register operations: 2081 case MVT::v8i8: OpcodeIndex = 0; break; 2082 case MVT::v4f16: 2083 case MVT::v4bf16: 2084 case MVT::v4i16: OpcodeIndex = 1; break; 2085 case MVT::v2f32: 2086 case MVT::v2i32: OpcodeIndex = 2; break; 2087 case MVT::v1i64: OpcodeIndex = 3; break; 2088 // Quad-register operations: 2089 case MVT::v16i8: OpcodeIndex = 0; break; 2090 case MVT::v8f16: 2091 case MVT::v8bf16: 2092 case MVT::v8i16: OpcodeIndex = 1; break; 2093 case MVT::v4f32: 2094 case MVT::v4i32: OpcodeIndex = 2; break; 2095 case MVT::v2f64: 2096 case MVT::v2i64: OpcodeIndex = 3; break; 2097 } 2098 2099 EVT ResTy; 2100 if (NumVecs == 1) 2101 ResTy = VT; 2102 else { 2103 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2104 if (!is64BitVector) 2105 ResTyElts *= 2; 2106 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2107 } 2108 std::vector<EVT> ResTys; 2109 ResTys.push_back(ResTy); 2110 if (isUpdating) 2111 ResTys.push_back(MVT::i32); 2112 ResTys.push_back(MVT::Other); 2113 2114 SDValue Pred = getAL(CurDAG, dl); 2115 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2116 SDNode *VLd; 2117 SmallVector<SDValue, 7> Ops; 2118 2119 // Double registers and VLD1/VLD2 quad registers are directly supported. 2120 if (is64BitVector || NumVecs <= 2) { 2121 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2122 QOpcodes0[OpcodeIndex]); 2123 Ops.push_back(MemAddr); 2124 Ops.push_back(Align); 2125 if (isUpdating) { 2126 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2127 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2128 if (!IsImmUpdate) { 2129 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2130 // check for the opcode rather than the number of vector elements. 2131 if (isVLDfixed(Opc)) 2132 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2133 Ops.push_back(Inc); 2134 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2135 // the operands if not such an opcode. 2136 } else if (!isVLDfixed(Opc)) 2137 Ops.push_back(Reg0); 2138 } 2139 Ops.push_back(Pred); 2140 Ops.push_back(Reg0); 2141 Ops.push_back(Chain); 2142 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2143 2144 } else { 2145 // Otherwise, quad registers are loaded with two separate instructions, 2146 // where one loads the even registers and the other loads the odd registers. 2147 EVT AddrTy = MemAddr.getValueType(); 2148 2149 // Load the even subregs. This is always an updating load, so that it 2150 // provides the address to the second load for the odd subregs. 2151 SDValue ImplDef = 2152 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2153 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2154 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2155 ResTy, AddrTy, MVT::Other, OpsA); 2156 Chain = SDValue(VLdA, 2); 2157 2158 // Load the odd subregs. 2159 Ops.push_back(SDValue(VLdA, 1)); 2160 Ops.push_back(Align); 2161 if (isUpdating) { 2162 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2163 assert(isa<ConstantSDNode>(Inc.getNode()) && 2164 "only constant post-increment update allowed for VLD3/4"); 2165 (void)Inc; 2166 Ops.push_back(Reg0); 2167 } 2168 Ops.push_back(SDValue(VLdA, 0)); 2169 Ops.push_back(Pred); 2170 Ops.push_back(Reg0); 2171 Ops.push_back(Chain); 2172 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2173 } 2174 2175 // Transfer memoperands. 2176 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2177 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2178 2179 if (NumVecs == 1) { 2180 ReplaceNode(N, VLd); 2181 return; 2182 } 2183 2184 // Extract out the subregisters. 2185 SDValue SuperReg = SDValue(VLd, 0); 2186 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2187 ARM::qsub_3 == ARM::qsub_0 + 3, 2188 "Unexpected subreg numbering"); 2189 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2190 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2191 ReplaceUses(SDValue(N, Vec), 2192 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2193 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2194 if (isUpdating) 2195 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2196 CurDAG->RemoveDeadNode(N); 2197 } 2198 2199 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2200 const uint16_t *DOpcodes, 2201 const uint16_t *QOpcodes0, 2202 const uint16_t *QOpcodes1) { 2203 assert(Subtarget->hasNEON()); 2204 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2205 SDLoc dl(N); 2206 2207 SDValue MemAddr, Align; 2208 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2209 // nodes are not intrinsics. 2210 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2211 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2212 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2213 return; 2214 2215 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2216 2217 SDValue Chain = N->getOperand(0); 2218 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2219 bool is64BitVector = VT.is64BitVector(); 2220 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2221 2222 unsigned OpcodeIndex; 2223 switch (VT.getSimpleVT().SimpleTy) { 2224 default: llvm_unreachable("unhandled vst type"); 2225 // Double-register operations: 2226 case MVT::v8i8: OpcodeIndex = 0; break; 2227 case MVT::v4f16: 2228 case MVT::v4bf16: 2229 case MVT::v4i16: OpcodeIndex = 1; break; 2230 case MVT::v2f32: 2231 case MVT::v2i32: OpcodeIndex = 2; break; 2232 case MVT::v1i64: OpcodeIndex = 3; break; 2233 // Quad-register operations: 2234 case MVT::v16i8: OpcodeIndex = 0; break; 2235 case MVT::v8f16: 2236 case MVT::v8bf16: 2237 case MVT::v8i16: OpcodeIndex = 1; break; 2238 case MVT::v4f32: 2239 case MVT::v4i32: OpcodeIndex = 2; break; 2240 case MVT::v2f64: 2241 case MVT::v2i64: OpcodeIndex = 3; break; 2242 } 2243 2244 std::vector<EVT> ResTys; 2245 if (isUpdating) 2246 ResTys.push_back(MVT::i32); 2247 ResTys.push_back(MVT::Other); 2248 2249 SDValue Pred = getAL(CurDAG, dl); 2250 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2251 SmallVector<SDValue, 7> Ops; 2252 2253 // Double registers and VST1/VST2 quad registers are directly supported. 2254 if (is64BitVector || NumVecs <= 2) { 2255 SDValue SrcReg; 2256 if (NumVecs == 1) { 2257 SrcReg = N->getOperand(Vec0Idx); 2258 } else if (is64BitVector) { 2259 // Form a REG_SEQUENCE to force register allocation. 2260 SDValue V0 = N->getOperand(Vec0Idx + 0); 2261 SDValue V1 = N->getOperand(Vec0Idx + 1); 2262 if (NumVecs == 2) 2263 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2264 else { 2265 SDValue V2 = N->getOperand(Vec0Idx + 2); 2266 // If it's a vst3, form a quad D-register and leave the last part as 2267 // an undef. 2268 SDValue V3 = (NumVecs == 3) 2269 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2270 : N->getOperand(Vec0Idx + 3); 2271 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2272 } 2273 } else { 2274 // Form a QQ register. 2275 SDValue Q0 = N->getOperand(Vec0Idx); 2276 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2277 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2278 } 2279 2280 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2281 QOpcodes0[OpcodeIndex]); 2282 Ops.push_back(MemAddr); 2283 Ops.push_back(Align); 2284 if (isUpdating) { 2285 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2286 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2287 if (!IsImmUpdate) { 2288 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2289 // check for the opcode rather than the number of vector elements. 2290 if (isVSTfixed(Opc)) 2291 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2292 Ops.push_back(Inc); 2293 } 2294 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2295 // the operands if not such an opcode. 2296 else if (!isVSTfixed(Opc)) 2297 Ops.push_back(Reg0); 2298 } 2299 Ops.push_back(SrcReg); 2300 Ops.push_back(Pred); 2301 Ops.push_back(Reg0); 2302 Ops.push_back(Chain); 2303 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2304 2305 // Transfer memoperands. 2306 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2307 2308 ReplaceNode(N, VSt); 2309 return; 2310 } 2311 2312 // Otherwise, quad registers are stored with two separate instructions, 2313 // where one stores the even registers and the other stores the odd registers. 2314 2315 // Form the QQQQ REG_SEQUENCE. 2316 SDValue V0 = N->getOperand(Vec0Idx + 0); 2317 SDValue V1 = N->getOperand(Vec0Idx + 1); 2318 SDValue V2 = N->getOperand(Vec0Idx + 2); 2319 SDValue V3 = (NumVecs == 3) 2320 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2321 : N->getOperand(Vec0Idx + 3); 2322 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2323 2324 // Store the even D registers. This is always an updating store, so that it 2325 // provides the address to the second store for the odd subregs. 2326 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2327 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2328 MemAddr.getValueType(), 2329 MVT::Other, OpsA); 2330 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2331 Chain = SDValue(VStA, 1); 2332 2333 // Store the odd D registers. 2334 Ops.push_back(SDValue(VStA, 0)); 2335 Ops.push_back(Align); 2336 if (isUpdating) { 2337 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2338 assert(isa<ConstantSDNode>(Inc.getNode()) && 2339 "only constant post-increment update allowed for VST3/4"); 2340 (void)Inc; 2341 Ops.push_back(Reg0); 2342 } 2343 Ops.push_back(RegSeq); 2344 Ops.push_back(Pred); 2345 Ops.push_back(Reg0); 2346 Ops.push_back(Chain); 2347 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2348 Ops); 2349 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2350 ReplaceNode(N, VStB); 2351 } 2352 2353 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2354 unsigned NumVecs, 2355 const uint16_t *DOpcodes, 2356 const uint16_t *QOpcodes) { 2357 assert(Subtarget->hasNEON()); 2358 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2359 SDLoc dl(N); 2360 2361 SDValue MemAddr, Align; 2362 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2363 // nodes are not intrinsics. 2364 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2365 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2366 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2367 return; 2368 2369 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2370 2371 SDValue Chain = N->getOperand(0); 2372 unsigned Lane = 2373 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2374 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2375 bool is64BitVector = VT.is64BitVector(); 2376 2377 unsigned Alignment = 0; 2378 if (NumVecs != 3) { 2379 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2380 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2381 if (Alignment > NumBytes) 2382 Alignment = NumBytes; 2383 if (Alignment < 8 && Alignment < NumBytes) 2384 Alignment = 0; 2385 // Alignment must be a power of two; make sure of that. 2386 Alignment = (Alignment & -Alignment); 2387 if (Alignment == 1) 2388 Alignment = 0; 2389 } 2390 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2391 2392 unsigned OpcodeIndex; 2393 switch (VT.getSimpleVT().SimpleTy) { 2394 default: llvm_unreachable("unhandled vld/vst lane type"); 2395 // Double-register operations: 2396 case MVT::v8i8: OpcodeIndex = 0; break; 2397 case MVT::v4f16: 2398 case MVT::v4bf16: 2399 case MVT::v4i16: OpcodeIndex = 1; break; 2400 case MVT::v2f32: 2401 case MVT::v2i32: OpcodeIndex = 2; break; 2402 // Quad-register operations: 2403 case MVT::v8f16: 2404 case MVT::v8bf16: 2405 case MVT::v8i16: OpcodeIndex = 0; break; 2406 case MVT::v4f32: 2407 case MVT::v4i32: OpcodeIndex = 1; break; 2408 } 2409 2410 std::vector<EVT> ResTys; 2411 if (IsLoad) { 2412 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2413 if (!is64BitVector) 2414 ResTyElts *= 2; 2415 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2416 MVT::i64, ResTyElts)); 2417 } 2418 if (isUpdating) 2419 ResTys.push_back(MVT::i32); 2420 ResTys.push_back(MVT::Other); 2421 2422 SDValue Pred = getAL(CurDAG, dl); 2423 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2424 2425 SmallVector<SDValue, 8> Ops; 2426 Ops.push_back(MemAddr); 2427 Ops.push_back(Align); 2428 if (isUpdating) { 2429 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2430 bool IsImmUpdate = 2431 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2432 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2433 } 2434 2435 SDValue SuperReg; 2436 SDValue V0 = N->getOperand(Vec0Idx + 0); 2437 SDValue V1 = N->getOperand(Vec0Idx + 1); 2438 if (NumVecs == 2) { 2439 if (is64BitVector) 2440 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2441 else 2442 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2443 } else { 2444 SDValue V2 = N->getOperand(Vec0Idx + 2); 2445 SDValue V3 = (NumVecs == 3) 2446 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2447 : N->getOperand(Vec0Idx + 3); 2448 if (is64BitVector) 2449 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2450 else 2451 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2452 } 2453 Ops.push_back(SuperReg); 2454 Ops.push_back(getI32Imm(Lane, dl)); 2455 Ops.push_back(Pred); 2456 Ops.push_back(Reg0); 2457 Ops.push_back(Chain); 2458 2459 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2460 QOpcodes[OpcodeIndex]); 2461 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2462 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2463 if (!IsLoad) { 2464 ReplaceNode(N, VLdLn); 2465 return; 2466 } 2467 2468 // Extract the subregisters. 2469 SuperReg = SDValue(VLdLn, 0); 2470 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2471 ARM::qsub_3 == ARM::qsub_0 + 3, 2472 "Unexpected subreg numbering"); 2473 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2474 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2475 ReplaceUses(SDValue(N, Vec), 2476 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2477 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2478 if (isUpdating) 2479 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2480 CurDAG->RemoveDeadNode(N); 2481 } 2482 2483 template <typename SDValueVector> 2484 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2485 SDValue PredicateMask) { 2486 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2487 Ops.push_back(PredicateMask); 2488 } 2489 2490 template <typename SDValueVector> 2491 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2492 SDValue PredicateMask, 2493 SDValue Inactive) { 2494 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2495 Ops.push_back(PredicateMask); 2496 Ops.push_back(Inactive); 2497 } 2498 2499 template <typename SDValueVector> 2500 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2501 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2502 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2503 } 2504 2505 template <typename SDValueVector> 2506 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2507 EVT InactiveTy) { 2508 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2509 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2510 Ops.push_back(SDValue( 2511 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2512 } 2513 2514 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2515 bool Predicated) { 2516 SDLoc Loc(N); 2517 SmallVector<SDValue, 8> Ops; 2518 2519 uint16_t Opcode; 2520 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2521 case 32: 2522 Opcode = Opcodes[0]; 2523 break; 2524 case 64: 2525 Opcode = Opcodes[1]; 2526 break; 2527 default: 2528 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2529 } 2530 2531 Ops.push_back(N->getOperand(2)); // vector of base addresses 2532 2533 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2534 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2535 2536 if (Predicated) 2537 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2538 else 2539 AddEmptyMVEPredicateToOps(Ops, Loc); 2540 2541 Ops.push_back(N->getOperand(0)); // chain 2542 2543 SmallVector<EVT, 8> VTs; 2544 VTs.push_back(N->getValueType(1)); 2545 VTs.push_back(N->getValueType(0)); 2546 VTs.push_back(N->getValueType(2)); 2547 2548 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2549 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2550 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2551 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2552 CurDAG->RemoveDeadNode(N); 2553 } 2554 2555 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2556 bool Immediate, 2557 bool HasSaturationOperand) { 2558 SDLoc Loc(N); 2559 SmallVector<SDValue, 8> Ops; 2560 2561 // Two 32-bit halves of the value to be shifted 2562 Ops.push_back(N->getOperand(1)); 2563 Ops.push_back(N->getOperand(2)); 2564 2565 // The shift count 2566 if (Immediate) { 2567 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2568 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2569 } else { 2570 Ops.push_back(N->getOperand(3)); 2571 } 2572 2573 // The immediate saturation operand, if any 2574 if (HasSaturationOperand) { 2575 int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); 2576 int SatBit = (SatOp == 64 ? 0 : 1); 2577 Ops.push_back(getI32Imm(SatBit, Loc)); 2578 } 2579 2580 // MVE scalar shifts are IT-predicable, so include the standard 2581 // predicate arguments. 2582 Ops.push_back(getAL(CurDAG, Loc)); 2583 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2584 2585 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2586 } 2587 2588 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2589 uint16_t OpcodeWithNoCarry, 2590 bool Add, bool Predicated) { 2591 SDLoc Loc(N); 2592 SmallVector<SDValue, 8> Ops; 2593 uint16_t Opcode; 2594 2595 unsigned FirstInputOp = Predicated ? 2 : 1; 2596 2597 // Two input vectors and the input carry flag 2598 Ops.push_back(N->getOperand(FirstInputOp)); 2599 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2600 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2601 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2602 uint32_t CarryMask = 1 << 29; 2603 uint32_t CarryExpected = Add ? 0 : CarryMask; 2604 if (CarryInConstant && 2605 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2606 Opcode = OpcodeWithNoCarry; 2607 } else { 2608 Ops.push_back(CarryIn); 2609 Opcode = OpcodeWithCarry; 2610 } 2611 2612 if (Predicated) 2613 AddMVEPredicateToOps(Ops, Loc, 2614 N->getOperand(FirstInputOp + 3), // predicate 2615 N->getOperand(FirstInputOp - 1)); // inactive 2616 else 2617 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2618 2619 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2620 } 2621 2622 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2623 SDLoc Loc(N); 2624 SmallVector<SDValue, 8> Ops; 2625 2626 // One vector input, followed by a 32-bit word of bits to shift in 2627 // and then an immediate shift count 2628 Ops.push_back(N->getOperand(1)); 2629 Ops.push_back(N->getOperand(2)); 2630 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2631 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2632 2633 if (Predicated) 2634 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2635 else 2636 AddEmptyMVEPredicateToOps(Ops, Loc); 2637 2638 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops)); 2639 } 2640 2641 static bool SDValueToConstBool(SDValue SDVal) { 2642 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2643 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2644 uint64_t Value = SDValConstant->getZExtValue(); 2645 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2646 return Value; 2647 } 2648 2649 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2650 const uint16_t *OpcodesS, 2651 const uint16_t *OpcodesU, 2652 size_t Stride, size_t TySize) { 2653 assert(TySize < Stride && "Invalid TySize"); 2654 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2655 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2656 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2657 if (IsUnsigned) { 2658 assert(!IsSub && 2659 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2660 assert(!IsExchange && 2661 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2662 } 2663 2664 auto OpIsZero = [N](size_t OpNo) { 2665 if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo))) 2666 if (OpConst->getZExtValue() == 0) 2667 return true; 2668 return false; 2669 }; 2670 2671 // If the input accumulator value is not zero, select an instruction with 2672 // accumulator, otherwise select an instruction without accumulator 2673 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2674 2675 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2676 if (IsSub) 2677 Opcodes += 4 * Stride; 2678 if (IsExchange) 2679 Opcodes += 2 * Stride; 2680 if (IsAccum) 2681 Opcodes += Stride; 2682 uint16_t Opcode = Opcodes[TySize]; 2683 2684 SDLoc Loc(N); 2685 SmallVector<SDValue, 8> Ops; 2686 // Push the accumulator operands, if they are used 2687 if (IsAccum) { 2688 Ops.push_back(N->getOperand(4)); 2689 Ops.push_back(N->getOperand(5)); 2690 } 2691 // Push the two vector operands 2692 Ops.push_back(N->getOperand(6)); 2693 Ops.push_back(N->getOperand(7)); 2694 2695 if (Predicated) 2696 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2697 else 2698 AddEmptyMVEPredicateToOps(Ops, Loc); 2699 2700 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2701 } 2702 2703 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2704 const uint16_t *OpcodesS, 2705 const uint16_t *OpcodesU) { 2706 EVT VecTy = N->getOperand(6).getValueType(); 2707 size_t SizeIndex; 2708 switch (VecTy.getVectorElementType().getSizeInBits()) { 2709 case 16: 2710 SizeIndex = 0; 2711 break; 2712 case 32: 2713 SizeIndex = 1; 2714 break; 2715 default: 2716 llvm_unreachable("bad vector element size"); 2717 } 2718 2719 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2720 } 2721 2722 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2723 const uint16_t *OpcodesS, 2724 const uint16_t *OpcodesU) { 2725 assert( 2726 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2727 32 && 2728 "bad vector element size"); 2729 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2730 } 2731 2732 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2733 const uint16_t *const *Opcodes, 2734 bool HasWriteback) { 2735 EVT VT = N->getValueType(0); 2736 SDLoc Loc(N); 2737 2738 const uint16_t *OurOpcodes; 2739 switch (VT.getVectorElementType().getSizeInBits()) { 2740 case 8: 2741 OurOpcodes = Opcodes[0]; 2742 break; 2743 case 16: 2744 OurOpcodes = Opcodes[1]; 2745 break; 2746 case 32: 2747 OurOpcodes = Opcodes[2]; 2748 break; 2749 default: 2750 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2751 } 2752 2753 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2754 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2755 unsigned PtrOperand = HasWriteback ? 1 : 2; 2756 2757 auto Data = SDValue( 2758 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2759 SDValue Chain = N->getOperand(0); 2760 // Add a MVE_VLDn instruction for each Vec, except the last 2761 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2762 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2763 auto LoadInst = 2764 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2765 Data = SDValue(LoadInst, 0); 2766 Chain = SDValue(LoadInst, 1); 2767 } 2768 // The last may need a writeback on it 2769 if (HasWriteback) 2770 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2771 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2772 auto LoadInst = 2773 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2774 2775 unsigned i; 2776 for (i = 0; i < NumVecs; i++) 2777 ReplaceUses(SDValue(N, i), 2778 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2779 SDValue(LoadInst, 0))); 2780 if (HasWriteback) 2781 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2782 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2783 CurDAG->RemoveDeadNode(N); 2784 } 2785 2786 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2787 bool Wrapping, bool Predicated) { 2788 EVT VT = N->getValueType(0); 2789 SDLoc Loc(N); 2790 2791 uint16_t Opcode; 2792 switch (VT.getScalarSizeInBits()) { 2793 case 8: 2794 Opcode = Opcodes[0]; 2795 break; 2796 case 16: 2797 Opcode = Opcodes[1]; 2798 break; 2799 case 32: 2800 Opcode = Opcodes[2]; 2801 break; 2802 default: 2803 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2804 } 2805 2806 SmallVector<SDValue, 8> Ops; 2807 unsigned OpIdx = 1; 2808 2809 SDValue Inactive; 2810 if (Predicated) 2811 Inactive = N->getOperand(OpIdx++); 2812 2813 Ops.push_back(N->getOperand(OpIdx++)); // base 2814 if (Wrapping) 2815 Ops.push_back(N->getOperand(OpIdx++)); // limit 2816 2817 SDValue ImmOp = N->getOperand(OpIdx++); // step 2818 int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue(); 2819 Ops.push_back(getI32Imm(ImmValue, Loc)); 2820 2821 if (Predicated) 2822 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2823 else 2824 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2825 2826 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2827 } 2828 2829 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2830 size_t NumExtraOps, bool HasAccum) { 2831 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2832 SDLoc Loc(N); 2833 SmallVector<SDValue, 8> Ops; 2834 2835 unsigned OpIdx = 1; 2836 2837 // Convert and append the immediate operand designating the coprocessor. 2838 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2839 uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue(); 2840 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2841 2842 // For accumulating variants copy the low and high order parts of the 2843 // accumulator into a register pair and add it to the operand vector. 2844 if (HasAccum) { 2845 SDValue AccLo = N->getOperand(OpIdx++); 2846 SDValue AccHi = N->getOperand(OpIdx++); 2847 if (IsBigEndian) 2848 std::swap(AccLo, AccHi); 2849 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2850 } 2851 2852 // Copy extra operands as-is. 2853 for (size_t I = 0; I < NumExtraOps; I++) 2854 Ops.push_back(N->getOperand(OpIdx++)); 2855 2856 // Convert and append the immediate operand 2857 SDValue Imm = N->getOperand(OpIdx); 2858 uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue(); 2859 Ops.push_back(getI32Imm(ImmVal, Loc)); 2860 2861 // Accumulating variants are IT-predicable, add predicate operands. 2862 if (HasAccum) { 2863 SDValue Pred = getAL(CurDAG, Loc); 2864 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2865 Ops.push_back(Pred); 2866 Ops.push_back(PredReg); 2867 } 2868 2869 // Create the CDE intruction 2870 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2871 SDValue ResultPair = SDValue(InstrNode, 0); 2872 2873 // The original intrinsic had two outputs, and the output of the dual-register 2874 // CDE instruction is a register pair. We need to extract the two subregisters 2875 // and replace all uses of the original outputs with the extracted 2876 // subregisters. 2877 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2878 if (IsBigEndian) 2879 std::swap(SubRegs[0], SubRegs[1]); 2880 2881 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2882 if (SDValue(N, ResIdx).use_empty()) 2883 continue; 2884 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2885 MVT::i32, ResultPair); 2886 ReplaceUses(SDValue(N, ResIdx), SubReg); 2887 } 2888 2889 CurDAG->RemoveDeadNode(N); 2890 } 2891 2892 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2893 bool isUpdating, unsigned NumVecs, 2894 const uint16_t *DOpcodes, 2895 const uint16_t *QOpcodes0, 2896 const uint16_t *QOpcodes1) { 2897 assert(Subtarget->hasNEON()); 2898 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2899 SDLoc dl(N); 2900 2901 SDValue MemAddr, Align; 2902 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2903 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2904 return; 2905 2906 SDValue Chain = N->getOperand(0); 2907 EVT VT = N->getValueType(0); 2908 bool is64BitVector = VT.is64BitVector(); 2909 2910 unsigned Alignment = 0; 2911 if (NumVecs != 3) { 2912 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2913 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2914 if (Alignment > NumBytes) 2915 Alignment = NumBytes; 2916 if (Alignment < 8 && Alignment < NumBytes) 2917 Alignment = 0; 2918 // Alignment must be a power of two; make sure of that. 2919 Alignment = (Alignment & -Alignment); 2920 if (Alignment == 1) 2921 Alignment = 0; 2922 } 2923 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2924 2925 unsigned OpcodeIndex; 2926 switch (VT.getSimpleVT().SimpleTy) { 2927 default: llvm_unreachable("unhandled vld-dup type"); 2928 case MVT::v8i8: 2929 case MVT::v16i8: OpcodeIndex = 0; break; 2930 case MVT::v4i16: 2931 case MVT::v8i16: 2932 case MVT::v4f16: 2933 case MVT::v8f16: 2934 case MVT::v4bf16: 2935 case MVT::v8bf16: 2936 OpcodeIndex = 1; break; 2937 case MVT::v2f32: 2938 case MVT::v2i32: 2939 case MVT::v4f32: 2940 case MVT::v4i32: OpcodeIndex = 2; break; 2941 case MVT::v1f64: 2942 case MVT::v1i64: OpcodeIndex = 3; break; 2943 } 2944 2945 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2946 if (!is64BitVector) 2947 ResTyElts *= 2; 2948 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2949 2950 std::vector<EVT> ResTys; 2951 ResTys.push_back(ResTy); 2952 if (isUpdating) 2953 ResTys.push_back(MVT::i32); 2954 ResTys.push_back(MVT::Other); 2955 2956 SDValue Pred = getAL(CurDAG, dl); 2957 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2958 2959 SDNode *VLdDup; 2960 if (is64BitVector || NumVecs == 1) { 2961 SmallVector<SDValue, 6> Ops; 2962 Ops.push_back(MemAddr); 2963 Ops.push_back(Align); 2964 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] : 2965 QOpcodes0[OpcodeIndex]; 2966 if (isUpdating) { 2967 // fixed-stride update instructions don't have an explicit writeback 2968 // operand. It's implicit in the opcode itself. 2969 SDValue Inc = N->getOperand(2); 2970 bool IsImmUpdate = 2971 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2972 if (NumVecs <= 2 && !IsImmUpdate) 2973 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2974 if (!IsImmUpdate) 2975 Ops.push_back(Inc); 2976 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2977 else if (NumVecs > 2) 2978 Ops.push_back(Reg0); 2979 } 2980 Ops.push_back(Pred); 2981 Ops.push_back(Reg0); 2982 Ops.push_back(Chain); 2983 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2984 } else if (NumVecs == 2) { 2985 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain }; 2986 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2987 dl, ResTys, OpsA); 2988 2989 Chain = SDValue(VLdA, 1); 2990 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain }; 2991 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2992 } else { 2993 SDValue ImplDef = 2994 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2995 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain }; 2996 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2997 dl, ResTys, OpsA); 2998 2999 SDValue SuperReg = SDValue(VLdA, 0); 3000 Chain = SDValue(VLdA, 1); 3001 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain }; 3002 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 3003 } 3004 3005 // Transfer memoperands. 3006 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3007 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 3008 3009 // Extract the subregisters. 3010 if (NumVecs == 1) { 3011 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3012 } else { 3013 SDValue SuperReg = SDValue(VLdDup, 0); 3014 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3015 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3016 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3017 ReplaceUses(SDValue(N, Vec), 3018 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3019 } 3020 } 3021 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3022 if (isUpdating) 3023 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3024 CurDAG->RemoveDeadNode(N); 3025 } 3026 3027 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { 3028 if (!Subtarget->hasMVEIntegerOps()) 3029 return false; 3030 3031 SDLoc dl(N); 3032 3033 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and 3034 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent 3035 // inserts of the correct type: 3036 SDValue Ins1 = SDValue(N, 0); 3037 SDValue Ins2 = N->getOperand(0); 3038 EVT VT = Ins1.getValueType(); 3039 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() || 3040 !isa<ConstantSDNode>(Ins1.getOperand(2)) || 3041 !isa<ConstantSDNode>(Ins2.getOperand(2)) || 3042 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT)) 3043 return false; 3044 3045 unsigned Lane1 = Ins1.getConstantOperandVal(2); 3046 unsigned Lane2 = Ins2.getConstantOperandVal(2); 3047 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1) 3048 return false; 3049 3050 // If the inserted values will be able to use T/B already, leave it to the 3051 // existing tablegen patterns. For example VCVTT/VCVTB. 3052 SDValue Val1 = Ins1.getOperand(1); 3053 SDValue Val2 = Ins2.getOperand(1); 3054 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND) 3055 return false; 3056 3057 // Check if the inserted values are both extracts. 3058 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3059 Val1.getOpcode() == ARMISD::VGETLANEu) && 3060 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3061 Val2.getOpcode() == ARMISD::VGETLANEu) && 3062 isa<ConstantSDNode>(Val1.getOperand(1)) && 3063 isa<ConstantSDNode>(Val2.getOperand(1)) && 3064 (Val1.getOperand(0).getValueType() == MVT::v8f16 || 3065 Val1.getOperand(0).getValueType() == MVT::v8i16) && 3066 (Val2.getOperand(0).getValueType() == MVT::v8f16 || 3067 Val2.getOperand(0).getValueType() == MVT::v8i16)) { 3068 unsigned ExtractLane1 = Val1.getConstantOperandVal(1); 3069 unsigned ExtractLane2 = Val2.getConstantOperandVal(1); 3070 3071 // If the two extracted lanes are from the same place and adjacent, this 3072 // simplifies into a f32 lane move. 3073 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 && 3074 ExtractLane1 == ExtractLane2 + 1) { 3075 SDValue NewExt = CurDAG->getTargetExtractSubreg( 3076 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0)); 3077 SDValue NewIns = CurDAG->getTargetInsertSubreg( 3078 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0), 3079 NewExt); 3080 ReplaceUses(Ins1, NewIns); 3081 return true; 3082 } 3083 3084 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for 3085 // extracting odd lanes. 3086 if (VT == MVT::v8i16) { 3087 SDValue Inp1 = CurDAG->getTargetExtractSubreg( 3088 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); 3089 SDValue Inp2 = CurDAG->getTargetExtractSubreg( 3090 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0)); 3091 if (ExtractLane1 % 2 != 0) 3092 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0); 3093 if (ExtractLane2 % 2 != 0) 3094 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0); 3095 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1); 3096 SDValue NewIns = 3097 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3098 Ins2.getOperand(0), SDValue(VINS, 0)); 3099 ReplaceUses(Ins1, NewIns); 3100 return true; 3101 } 3102 } 3103 3104 // The inserted values are not extracted - if they are f16 then insert them 3105 // directly using a VINS. 3106 if (VT == MVT::v8f16) { 3107 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); 3108 SDValue NewIns = 3109 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3110 Ins2.getOperand(0), SDValue(VINS, 0)); 3111 ReplaceUses(Ins1, NewIns); 3112 return true; 3113 } 3114 3115 return false; 3116 } 3117 3118 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3119 if (!Subtarget->hasV6T2Ops()) 3120 return false; 3121 3122 unsigned Opc = isSigned 3123 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3124 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3125 SDLoc dl(N); 3126 3127 // For unsigned extracts, check for a shift right and mask 3128 unsigned And_imm = 0; 3129 if (N->getOpcode() == ISD::AND) { 3130 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3131 3132 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3133 if (And_imm & (And_imm + 1)) 3134 return false; 3135 3136 unsigned Srl_imm = 0; 3137 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3138 Srl_imm)) { 3139 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3140 3141 // Mask off the unnecessary bits of the AND immediate; normally 3142 // DAGCombine will do this, but that might not happen if 3143 // targetShrinkDemandedConstant chooses a different immediate. 3144 And_imm &= -1U >> Srl_imm; 3145 3146 // Note: The width operand is encoded as width-1. 3147 unsigned Width = countTrailingOnes(And_imm) - 1; 3148 unsigned LSB = Srl_imm; 3149 3150 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3151 3152 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3153 // It's cheaper to use a right shift to extract the top bits. 3154 if (Subtarget->isThumb()) { 3155 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3156 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3157 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3158 getAL(CurDAG, dl), Reg0, Reg0 }; 3159 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3160 return true; 3161 } 3162 3163 // ARM models shift instructions as MOVsi with shifter operand. 3164 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3165 SDValue ShOpc = 3166 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3167 MVT::i32); 3168 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3169 getAL(CurDAG, dl), Reg0, Reg0 }; 3170 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3171 return true; 3172 } 3173 3174 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3175 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3176 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3177 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3178 getAL(CurDAG, dl), Reg0 }; 3179 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3180 return true; 3181 } 3182 } 3183 return false; 3184 } 3185 3186 // Otherwise, we're looking for a shift of a shift 3187 unsigned Shl_imm = 0; 3188 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3189 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3190 unsigned Srl_imm = 0; 3191 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3192 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3193 // Note: The width operand is encoded as width-1. 3194 unsigned Width = 32 - Srl_imm - 1; 3195 int LSB = Srl_imm - Shl_imm; 3196 if (LSB < 0) 3197 return false; 3198 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3199 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3200 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3201 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3202 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3203 getAL(CurDAG, dl), Reg0 }; 3204 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3205 return true; 3206 } 3207 } 3208 3209 // Or we are looking for a shift of an and, with a mask operand 3210 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3211 isShiftedMask_32(And_imm)) { 3212 unsigned Srl_imm = 0; 3213 unsigned LSB = countTrailingZeros(And_imm); 3214 // Shift must be the same as the ands lsb 3215 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3216 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3217 unsigned MSB = 31 - countLeadingZeros(And_imm); 3218 // Note: The width operand is encoded as width-1. 3219 unsigned Width = MSB - LSB; 3220 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3221 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3222 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3223 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3224 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3225 getAL(CurDAG, dl), Reg0 }; 3226 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3227 return true; 3228 } 3229 } 3230 3231 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3232 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3233 unsigned LSB = 0; 3234 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3235 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3236 return false; 3237 3238 if (LSB + Width > 32) 3239 return false; 3240 3241 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3242 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3243 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3244 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3245 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3246 getAL(CurDAG, dl), Reg0 }; 3247 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3248 return true; 3249 } 3250 3251 return false; 3252 } 3253 3254 /// Target-specific DAG combining for ISD::XOR. 3255 /// Target-independent combining lowers SELECT_CC nodes of the form 3256 /// select_cc setg[ge] X, 0, X, -X 3257 /// select_cc setgt X, -1, X, -X 3258 /// select_cc setl[te] X, 0, -X, X 3259 /// select_cc setlt X, 1, -X, X 3260 /// which represent Integer ABS into: 3261 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 3262 /// ARM instruction selection detects the latter and matches it to 3263 /// ARM::ABS or ARM::t2ABS machine node. 3264 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3265 SDValue XORSrc0 = N->getOperand(0); 3266 SDValue XORSrc1 = N->getOperand(1); 3267 EVT VT = N->getValueType(0); 3268 3269 if (Subtarget->isThumb1Only()) 3270 return false; 3271 3272 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 3273 return false; 3274 3275 SDValue ADDSrc0 = XORSrc0.getOperand(0); 3276 SDValue ADDSrc1 = XORSrc0.getOperand(1); 3277 SDValue SRASrc0 = XORSrc1.getOperand(0); 3278 SDValue SRASrc1 = XORSrc1.getOperand(1); 3279 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3280 EVT XType = SRASrc0.getValueType(); 3281 unsigned Size = XType.getSizeInBits() - 1; 3282 3283 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 3284 XType.isInteger() && SRAConstant != nullptr && 3285 Size == SRAConstant->getZExtValue()) { 3286 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3287 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 3288 return true; 3289 } 3290 3291 return false; 3292 } 3293 3294 /// We've got special pseudo-instructions for these 3295 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3296 unsigned Opcode; 3297 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3298 if (MemTy == MVT::i8) 3299 Opcode = ARM::CMP_SWAP_8; 3300 else if (MemTy == MVT::i16) 3301 Opcode = ARM::CMP_SWAP_16; 3302 else if (MemTy == MVT::i32) 3303 Opcode = ARM::CMP_SWAP_32; 3304 else 3305 llvm_unreachable("Unknown AtomicCmpSwap type"); 3306 3307 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3308 N->getOperand(0)}; 3309 SDNode *CmpSwap = CurDAG->getMachineNode( 3310 Opcode, SDLoc(N), 3311 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3312 3313 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3314 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3315 3316 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3317 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3318 CurDAG->RemoveDeadNode(N); 3319 } 3320 3321 static Optional<std::pair<unsigned, unsigned>> 3322 getContiguousRangeOfSetBits(const APInt &A) { 3323 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 3324 unsigned LastOne = A.countTrailingZeros(); 3325 if (A.countPopulation() != (FirstOne - LastOne + 1)) 3326 return Optional<std::pair<unsigned,unsigned>>(); 3327 return std::make_pair(FirstOne, LastOne); 3328 } 3329 3330 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3331 assert(N->getOpcode() == ARMISD::CMPZ); 3332 SwitchEQNEToPLMI = false; 3333 3334 if (!Subtarget->isThumb()) 3335 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3336 // LSR don't exist as standalone instructions - they need the barrel shifter. 3337 return; 3338 3339 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3340 SDValue And = N->getOperand(0); 3341 if (!And->hasOneUse()) 3342 return; 3343 3344 SDValue Zero = N->getOperand(1); 3345 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 3346 And->getOpcode() != ISD::AND) 3347 return; 3348 SDValue X = And.getOperand(0); 3349 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3350 3351 if (!C) 3352 return; 3353 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3354 if (!Range) 3355 return; 3356 3357 // There are several ways to lower this: 3358 SDNode *NewN; 3359 SDLoc dl(N); 3360 3361 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3362 if (Subtarget->isThumb2()) { 3363 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3364 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3365 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3366 CurDAG->getRegister(0, MVT::i32) }; 3367 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3368 } else { 3369 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3370 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3371 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3372 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3373 } 3374 }; 3375 3376 if (Range->second == 0) { 3377 // 1. Mask includes the LSB -> Simply shift the top N bits off 3378 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3379 ReplaceNode(And.getNode(), NewN); 3380 } else if (Range->first == 31) { 3381 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3382 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3383 ReplaceNode(And.getNode(), NewN); 3384 } else if (Range->first == Range->second) { 3385 // 3. Only one bit is set. We can shift this into the sign bit and use a 3386 // PL/MI comparison. 3387 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3388 ReplaceNode(And.getNode(), NewN); 3389 3390 SwitchEQNEToPLMI = true; 3391 } else if (!Subtarget->hasV6T2Ops()) { 3392 // 4. Do a double shift to clear bottom and top bits, but only in 3393 // thumb-1 mode as in thumb-2 we can use UBFX. 3394 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3395 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3396 Range->second + (31 - Range->first)); 3397 ReplaceNode(And.getNode(), NewN); 3398 } 3399 3400 } 3401 3402 void ARMDAGToDAGISel::Select(SDNode *N) { 3403 SDLoc dl(N); 3404 3405 if (N->isMachineOpcode()) { 3406 N->setNodeId(-1); 3407 return; // Already selected. 3408 } 3409 3410 switch (N->getOpcode()) { 3411 default: break; 3412 case ISD::STORE: { 3413 // For Thumb1, match an sp-relative store in C++. This is a little 3414 // unfortunate, but I don't think I can make the chain check work 3415 // otherwise. (The chain of the store has to be the same as the chain 3416 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3417 // a direct reference to "SP".) 3418 // 3419 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3420 // a different addressing mode from other four-byte stores. 3421 // 3422 // This pattern usually comes up with call arguments. 3423 StoreSDNode *ST = cast<StoreSDNode>(N); 3424 SDValue Ptr = ST->getBasePtr(); 3425 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3426 int RHSC = 0; 3427 if (Ptr.getOpcode() == ISD::ADD && 3428 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3429 Ptr = Ptr.getOperand(0); 3430 3431 if (Ptr.getOpcode() == ISD::CopyFromReg && 3432 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3433 Ptr.getOperand(0) == ST->getChain()) { 3434 SDValue Ops[] = {ST->getValue(), 3435 CurDAG->getRegister(ARM::SP, MVT::i32), 3436 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3437 getAL(CurDAG, dl), 3438 CurDAG->getRegister(0, MVT::i32), 3439 ST->getChain()}; 3440 MachineSDNode *ResNode = 3441 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3442 MachineMemOperand *MemOp = ST->getMemOperand(); 3443 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3444 ReplaceNode(N, ResNode); 3445 return; 3446 } 3447 } 3448 break; 3449 } 3450 case ISD::WRITE_REGISTER: 3451 if (tryWriteRegister(N)) 3452 return; 3453 break; 3454 case ISD::READ_REGISTER: 3455 if (tryReadRegister(N)) 3456 return; 3457 break; 3458 case ISD::INLINEASM: 3459 case ISD::INLINEASM_BR: 3460 if (tryInlineAsm(N)) 3461 return; 3462 break; 3463 case ISD::XOR: 3464 // Select special operations if XOR node forms integer ABS pattern 3465 if (tryABSOp(N)) 3466 return; 3467 // Other cases are autogenerated. 3468 break; 3469 case ISD::Constant: { 3470 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 3471 // If we can't materialize the constant we need to use a literal pool 3472 if (ConstantMaterializationCost(Val, Subtarget) > 2) { 3473 SDValue CPIdx = CurDAG->getTargetConstantPool( 3474 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3475 TLI->getPointerTy(CurDAG->getDataLayout())); 3476 3477 SDNode *ResNode; 3478 if (Subtarget->isThumb()) { 3479 SDValue Ops[] = { 3480 CPIdx, 3481 getAL(CurDAG, dl), 3482 CurDAG->getRegister(0, MVT::i32), 3483 CurDAG->getEntryNode() 3484 }; 3485 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3486 Ops); 3487 } else { 3488 SDValue Ops[] = { 3489 CPIdx, 3490 CurDAG->getTargetConstant(0, dl, MVT::i32), 3491 getAL(CurDAG, dl), 3492 CurDAG->getRegister(0, MVT::i32), 3493 CurDAG->getEntryNode() 3494 }; 3495 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3496 Ops); 3497 } 3498 // Annotate the Node with memory operand information so that MachineInstr 3499 // queries work properly. This e.g. gives the register allocation the 3500 // required information for rematerialization. 3501 MachineFunction& MF = CurDAG->getMachineFunction(); 3502 MachineMemOperand *MemOp = 3503 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3504 MachineMemOperand::MOLoad, 4, Align(4)); 3505 3506 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3507 3508 ReplaceNode(N, ResNode); 3509 return; 3510 } 3511 3512 // Other cases are autogenerated. 3513 break; 3514 } 3515 case ISD::FrameIndex: { 3516 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3517 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3518 SDValue TFI = CurDAG->getTargetFrameIndex( 3519 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3520 if (Subtarget->isThumb1Only()) { 3521 // Set the alignment of the frame object to 4, to avoid having to generate 3522 // more than one ADD 3523 MachineFrameInfo &MFI = MF->getFrameInfo(); 3524 if (MFI.getObjectAlign(FI) < Align(4)) 3525 MFI.setObjectAlignment(FI, Align(4)); 3526 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3527 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3528 return; 3529 } else { 3530 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3531 ARM::t2ADDri : ARM::ADDri); 3532 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3533 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3534 CurDAG->getRegister(0, MVT::i32) }; 3535 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3536 return; 3537 } 3538 } 3539 case ISD::INSERT_VECTOR_ELT: { 3540 if (tryInsertVectorElt(N)) 3541 return; 3542 break; 3543 } 3544 case ISD::SRL: 3545 if (tryV6T2BitfieldExtractOp(N, false)) 3546 return; 3547 break; 3548 case ISD::SIGN_EXTEND_INREG: 3549 case ISD::SRA: 3550 if (tryV6T2BitfieldExtractOp(N, true)) 3551 return; 3552 break; 3553 case ISD::MUL: 3554 if (Subtarget->isThumb1Only()) 3555 break; 3556 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3557 unsigned RHSV = C->getZExtValue(); 3558 if (!RHSV) break; 3559 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3560 unsigned ShImm = Log2_32(RHSV-1); 3561 if (ShImm >= 32) 3562 break; 3563 SDValue V = N->getOperand(0); 3564 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3565 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3566 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3567 if (Subtarget->isThumb()) { 3568 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3569 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3570 return; 3571 } else { 3572 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3573 Reg0 }; 3574 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3575 return; 3576 } 3577 } 3578 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3579 unsigned ShImm = Log2_32(RHSV+1); 3580 if (ShImm >= 32) 3581 break; 3582 SDValue V = N->getOperand(0); 3583 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3584 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3585 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3586 if (Subtarget->isThumb()) { 3587 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3588 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3589 return; 3590 } else { 3591 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3592 Reg0 }; 3593 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3594 return; 3595 } 3596 } 3597 } 3598 break; 3599 case ISD::AND: { 3600 // Check for unsigned bitfield extract 3601 if (tryV6T2BitfieldExtractOp(N, false)) 3602 return; 3603 3604 // If an immediate is used in an AND node, it is possible that the immediate 3605 // can be more optimally materialized when negated. If this is the case we 3606 // can negate the immediate and use a BIC instead. 3607 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3608 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3609 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3610 3611 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3612 // immediate can be negated and fit in the immediate operand of 3613 // a t2BIC, don't do any manual transform here as this can be 3614 // handled by the generic ISel machinery. 3615 bool PreferImmediateEncoding = 3616 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3617 if (!PreferImmediateEncoding && 3618 ConstantMaterializationCost(Imm, Subtarget) > 3619 ConstantMaterializationCost(~Imm, Subtarget)) { 3620 // The current immediate costs more to materialize than a negated 3621 // immediate, so negate the immediate and use a BIC. 3622 SDValue NewImm = 3623 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 3624 // If the new constant didn't exist before, reposition it in the topological 3625 // ordering so it is just before N. Otherwise, don't touch its location. 3626 if (NewImm->getNodeId() == -1) 3627 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3628 3629 if (!Subtarget->hasThumb2()) { 3630 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3631 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3632 CurDAG->getRegister(0, MVT::i32)}; 3633 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3634 return; 3635 } else { 3636 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3637 CurDAG->getRegister(0, MVT::i32), 3638 CurDAG->getRegister(0, MVT::i32)}; 3639 ReplaceNode(N, 3640 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3641 return; 3642 } 3643 } 3644 } 3645 3646 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3647 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3648 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3649 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3650 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3651 EVT VT = N->getValueType(0); 3652 if (VT != MVT::i32) 3653 break; 3654 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3655 ? ARM::t2MOVTi16 3656 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3657 if (!Opc) 3658 break; 3659 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3660 N1C = dyn_cast<ConstantSDNode>(N1); 3661 if (!N1C) 3662 break; 3663 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3664 SDValue N2 = N0.getOperand(1); 3665 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3666 if (!N2C) 3667 break; 3668 unsigned N1CVal = N1C->getZExtValue(); 3669 unsigned N2CVal = N2C->getZExtValue(); 3670 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3671 (N1CVal & 0xffffU) == 0xffffU && 3672 (N2CVal & 0xffffU) == 0x0U) { 3673 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3674 dl, MVT::i32); 3675 SDValue Ops[] = { N0.getOperand(0), Imm16, 3676 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3677 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3678 return; 3679 } 3680 } 3681 3682 break; 3683 } 3684 case ARMISD::UMAAL: { 3685 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3686 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3687 N->getOperand(2), N->getOperand(3), 3688 getAL(CurDAG, dl), 3689 CurDAG->getRegister(0, MVT::i32) }; 3690 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3691 return; 3692 } 3693 case ARMISD::UMLAL:{ 3694 if (Subtarget->isThumb()) { 3695 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3696 N->getOperand(3), getAL(CurDAG, dl), 3697 CurDAG->getRegister(0, MVT::i32)}; 3698 ReplaceNode( 3699 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3700 return; 3701 }else{ 3702 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3703 N->getOperand(3), getAL(CurDAG, dl), 3704 CurDAG->getRegister(0, MVT::i32), 3705 CurDAG->getRegister(0, MVT::i32) }; 3706 ReplaceNode(N, CurDAG->getMachineNode( 3707 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3708 MVT::i32, MVT::i32, Ops)); 3709 return; 3710 } 3711 } 3712 case ARMISD::SMLAL:{ 3713 if (Subtarget->isThumb()) { 3714 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3715 N->getOperand(3), getAL(CurDAG, dl), 3716 CurDAG->getRegister(0, MVT::i32)}; 3717 ReplaceNode( 3718 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3719 return; 3720 }else{ 3721 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3722 N->getOperand(3), getAL(CurDAG, dl), 3723 CurDAG->getRegister(0, MVT::i32), 3724 CurDAG->getRegister(0, MVT::i32) }; 3725 ReplaceNode(N, CurDAG->getMachineNode( 3726 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3727 MVT::i32, MVT::i32, Ops)); 3728 return; 3729 } 3730 } 3731 case ARMISD::SUBE: { 3732 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3733 break; 3734 // Look for a pattern to match SMMLS 3735 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3736 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3737 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3738 !SDValue(N, 1).use_empty()) 3739 break; 3740 3741 if (Subtarget->isThumb()) 3742 assert(Subtarget->hasThumb2() && 3743 "This pattern should not be generated for Thumb"); 3744 3745 SDValue SmulLoHi = N->getOperand(1); 3746 SDValue Subc = N->getOperand(2); 3747 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 3748 3749 if (!Zero || Zero->getZExtValue() != 0 || 3750 Subc.getOperand(1) != SmulLoHi.getValue(0) || 3751 N->getOperand(1) != SmulLoHi.getValue(1) || 3752 N->getOperand(2) != Subc.getValue(1)) 3753 break; 3754 3755 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3756 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3757 N->getOperand(0), getAL(CurDAG, dl), 3758 CurDAG->getRegister(0, MVT::i32) }; 3759 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3760 return; 3761 } 3762 case ISD::LOAD: { 3763 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3764 return; 3765 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3766 if (tryT2IndexedLoad(N)) 3767 return; 3768 } else if (Subtarget->isThumb()) { 3769 if (tryT1IndexedLoad(N)) 3770 return; 3771 } else if (tryARMIndexedLoad(N)) 3772 return; 3773 // Other cases are autogenerated. 3774 break; 3775 } 3776 case ISD::MLOAD: 3777 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3778 return; 3779 // Other cases are autogenerated. 3780 break; 3781 case ARMISD::WLS: 3782 case ARMISD::LE: { 3783 SDValue Ops[] = { N->getOperand(1), 3784 N->getOperand(2), 3785 N->getOperand(0) }; 3786 unsigned Opc = N->getOpcode() == ARMISD::WLS ? 3787 ARM::t2WhileLoopStart : ARM::t2LoopEnd; 3788 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 3789 ReplaceUses(N, New); 3790 CurDAG->RemoveDeadNode(N); 3791 return; 3792 } 3793 case ARMISD::LDRD: { 3794 if (Subtarget->isThumb2()) 3795 break; // TableGen handles isel in this case. 3796 SDValue Base, RegOffset, ImmOffset; 3797 const SDValue &Chain = N->getOperand(0); 3798 const SDValue &Addr = N->getOperand(1); 3799 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 3800 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 3801 // The register-offset variant of LDRD mandates that the register 3802 // allocated to RegOffset is not reused in any of the remaining operands. 3803 // This restriction is currently not enforced. Therefore emitting this 3804 // variant is explicitly avoided. 3805 Base = Addr; 3806 RegOffset = CurDAG->getRegister(0, MVT::i32); 3807 } 3808 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 3809 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 3810 {MVT::Untyped, MVT::Other}, Ops); 3811 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 3812 SDValue(New, 0)); 3813 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 3814 SDValue(New, 0)); 3815 transferMemOperands(N, New); 3816 ReplaceUses(SDValue(N, 0), Lo); 3817 ReplaceUses(SDValue(N, 1), Hi); 3818 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 3819 CurDAG->RemoveDeadNode(N); 3820 return; 3821 } 3822 case ARMISD::STRD: { 3823 if (Subtarget->isThumb2()) 3824 break; // TableGen handles isel in this case. 3825 SDValue Base, RegOffset, ImmOffset; 3826 const SDValue &Chain = N->getOperand(0); 3827 const SDValue &Addr = N->getOperand(3); 3828 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 3829 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 3830 // The register-offset variant of STRD mandates that the register 3831 // allocated to RegOffset is not reused in any of the remaining operands. 3832 // This restriction is currently not enforced. Therefore emitting this 3833 // variant is explicitly avoided. 3834 Base = Addr; 3835 RegOffset = CurDAG->getRegister(0, MVT::i32); 3836 } 3837 SDNode *RegPair = 3838 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 3839 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 3840 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 3841 transferMemOperands(N, New); 3842 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 3843 CurDAG->RemoveDeadNode(N); 3844 return; 3845 } 3846 case ARMISD::LOOP_DEC: { 3847 SDValue Ops[] = { N->getOperand(1), 3848 N->getOperand(2), 3849 N->getOperand(0) }; 3850 SDNode *Dec = 3851 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3852 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 3853 ReplaceUses(N, Dec); 3854 CurDAG->RemoveDeadNode(N); 3855 return; 3856 } 3857 case ARMISD::BRCOND: { 3858 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3859 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3860 // Pattern complexity = 6 cost = 1 size = 0 3861 3862 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3863 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 3864 // Pattern complexity = 6 cost = 1 size = 0 3865 3866 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3867 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3868 // Pattern complexity = 6 cost = 1 size = 0 3869 3870 unsigned Opc = Subtarget->isThumb() ? 3871 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 3872 SDValue Chain = N->getOperand(0); 3873 SDValue N1 = N->getOperand(1); 3874 SDValue N2 = N->getOperand(2); 3875 SDValue N3 = N->getOperand(3); 3876 SDValue InFlag = N->getOperand(4); 3877 assert(N1.getOpcode() == ISD::BasicBlock); 3878 assert(N2.getOpcode() == ISD::Constant); 3879 assert(N3.getOpcode() == ISD::Register); 3880 3881 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 3882 3883 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3884 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 3885 SDValue Int = InFlag.getOperand(0); 3886 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 3887 3888 // Handle low-overhead loops. 3889 if (ID == Intrinsic::loop_decrement_reg) { 3890 SDValue Elements = Int.getOperand(2); 3891 SDValue Size = CurDAG->getTargetConstant( 3892 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 3893 MVT::i32); 3894 3895 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 3896 SDNode *LoopDec = 3897 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3898 CurDAG->getVTList(MVT::i32, MVT::Other), 3899 Args); 3900 ReplaceUses(Int.getNode(), LoopDec); 3901 3902 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 3903 SDNode *LoopEnd = 3904 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 3905 3906 ReplaceUses(N, LoopEnd); 3907 CurDAG->RemoveDeadNode(N); 3908 CurDAG->RemoveDeadNode(InFlag.getNode()); 3909 CurDAG->RemoveDeadNode(Int.getNode()); 3910 return; 3911 } 3912 } 3913 3914 bool SwitchEQNEToPLMI; 3915 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3916 InFlag = N->getOperand(4); 3917 3918 if (SwitchEQNEToPLMI) { 3919 switch ((ARMCC::CondCodes)CC) { 3920 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3921 case ARMCC::NE: 3922 CC = (unsigned)ARMCC::MI; 3923 break; 3924 case ARMCC::EQ: 3925 CC = (unsigned)ARMCC::PL; 3926 break; 3927 } 3928 } 3929 } 3930 3931 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 3932 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 3933 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 3934 MVT::Glue, Ops); 3935 Chain = SDValue(ResNode, 0); 3936 if (N->getNumValues() == 2) { 3937 InFlag = SDValue(ResNode, 1); 3938 ReplaceUses(SDValue(N, 1), InFlag); 3939 } 3940 ReplaceUses(SDValue(N, 0), 3941 SDValue(Chain.getNode(), Chain.getResNo())); 3942 CurDAG->RemoveDeadNode(N); 3943 return; 3944 } 3945 3946 case ARMISD::CMPZ: { 3947 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 3948 // This allows us to avoid materializing the expensive negative constant. 3949 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 3950 // for its glue output. 3951 SDValue X = N->getOperand(0); 3952 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 3953 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 3954 int64_t Addend = -C->getSExtValue(); 3955 3956 SDNode *Add = nullptr; 3957 // ADDS can be better than CMN if the immediate fits in a 3958 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 3959 // Outside that range we can just use a CMN which is 32-bit but has a 3960 // 12-bit immediate range. 3961 if (Addend < 1<<8) { 3962 if (Subtarget->isThumb2()) { 3963 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3964 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3965 CurDAG->getRegister(0, MVT::i32) }; 3966 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 3967 } else { 3968 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 3969 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 3970 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3971 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3972 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3973 } 3974 } 3975 if (Add) { 3976 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 3977 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 3978 } 3979 } 3980 // Other cases are autogenerated. 3981 break; 3982 } 3983 3984 case ARMISD::CMOV: { 3985 SDValue InFlag = N->getOperand(4); 3986 3987 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3988 bool SwitchEQNEToPLMI; 3989 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3990 3991 if (SwitchEQNEToPLMI) { 3992 SDValue ARMcc = N->getOperand(2); 3993 ARMCC::CondCodes CC = 3994 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 3995 3996 switch (CC) { 3997 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3998 case ARMCC::NE: 3999 CC = ARMCC::MI; 4000 break; 4001 case ARMCC::EQ: 4002 CC = ARMCC::PL; 4003 break; 4004 } 4005 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 4006 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 4007 N->getOperand(3), N->getOperand(4)}; 4008 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 4009 } 4010 4011 } 4012 // Other cases are autogenerated. 4013 break; 4014 } 4015 4016 case ARMISD::VZIP: { 4017 unsigned Opc = 0; 4018 EVT VT = N->getValueType(0); 4019 switch (VT.getSimpleVT().SimpleTy) { 4020 default: return; 4021 case MVT::v8i8: Opc = ARM::VZIPd8; break; 4022 case MVT::v4f16: 4023 case MVT::v4i16: Opc = ARM::VZIPd16; break; 4024 case MVT::v2f32: 4025 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4026 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4027 case MVT::v16i8: Opc = ARM::VZIPq8; break; 4028 case MVT::v8f16: 4029 case MVT::v8i16: Opc = ARM::VZIPq16; break; 4030 case MVT::v4f32: 4031 case MVT::v4i32: Opc = ARM::VZIPq32; break; 4032 } 4033 SDValue Pred = getAL(CurDAG, dl); 4034 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4035 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4036 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4037 return; 4038 } 4039 case ARMISD::VUZP: { 4040 unsigned Opc = 0; 4041 EVT VT = N->getValueType(0); 4042 switch (VT.getSimpleVT().SimpleTy) { 4043 default: return; 4044 case MVT::v8i8: Opc = ARM::VUZPd8; break; 4045 case MVT::v4f16: 4046 case MVT::v4i16: Opc = ARM::VUZPd16; break; 4047 case MVT::v2f32: 4048 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4049 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4050 case MVT::v16i8: Opc = ARM::VUZPq8; break; 4051 case MVT::v8f16: 4052 case MVT::v8i16: Opc = ARM::VUZPq16; break; 4053 case MVT::v4f32: 4054 case MVT::v4i32: Opc = ARM::VUZPq32; break; 4055 } 4056 SDValue Pred = getAL(CurDAG, dl); 4057 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4058 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4059 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4060 return; 4061 } 4062 case ARMISD::VTRN: { 4063 unsigned Opc = 0; 4064 EVT VT = N->getValueType(0); 4065 switch (VT.getSimpleVT().SimpleTy) { 4066 default: return; 4067 case MVT::v8i8: Opc = ARM::VTRNd8; break; 4068 case MVT::v4f16: 4069 case MVT::v4i16: Opc = ARM::VTRNd16; break; 4070 case MVT::v2f32: 4071 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4072 case MVT::v16i8: Opc = ARM::VTRNq8; break; 4073 case MVT::v8f16: 4074 case MVT::v8i16: Opc = ARM::VTRNq16; break; 4075 case MVT::v4f32: 4076 case MVT::v4i32: Opc = ARM::VTRNq32; break; 4077 } 4078 SDValue Pred = getAL(CurDAG, dl); 4079 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4080 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4081 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4082 return; 4083 } 4084 case ARMISD::BUILD_VECTOR: { 4085 EVT VecVT = N->getValueType(0); 4086 EVT EltVT = VecVT.getVectorElementType(); 4087 unsigned NumElts = VecVT.getVectorNumElements(); 4088 if (EltVT == MVT::f64) { 4089 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 4090 ReplaceNode( 4091 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4092 return; 4093 } 4094 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 4095 if (NumElts == 2) { 4096 ReplaceNode( 4097 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4098 return; 4099 } 4100 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 4101 ReplaceNode(N, 4102 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 4103 N->getOperand(2), N->getOperand(3))); 4104 return; 4105 } 4106 4107 case ARMISD::VLD1DUP: { 4108 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4109 ARM::VLD1DUPd32 }; 4110 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4111 ARM::VLD1DUPq32 }; 4112 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4113 return; 4114 } 4115 4116 case ARMISD::VLD2DUP: { 4117 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4118 ARM::VLD2DUPd32 }; 4119 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4120 return; 4121 } 4122 4123 case ARMISD::VLD3DUP: { 4124 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4125 ARM::VLD3DUPd16Pseudo, 4126 ARM::VLD3DUPd32Pseudo }; 4127 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4128 return; 4129 } 4130 4131 case ARMISD::VLD4DUP: { 4132 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4133 ARM::VLD4DUPd16Pseudo, 4134 ARM::VLD4DUPd32Pseudo }; 4135 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4136 return; 4137 } 4138 4139 case ARMISD::VLD1DUP_UPD: { 4140 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4141 ARM::VLD1DUPd16wb_fixed, 4142 ARM::VLD1DUPd32wb_fixed }; 4143 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4144 ARM::VLD1DUPq16wb_fixed, 4145 ARM::VLD1DUPq32wb_fixed }; 4146 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4147 return; 4148 } 4149 4150 case ARMISD::VLD2DUP_UPD: { 4151 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 4152 ARM::VLD2DUPd16wb_fixed, 4153 ARM::VLD2DUPd32wb_fixed }; 4154 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes); 4155 return; 4156 } 4157 4158 case ARMISD::VLD3DUP_UPD: { 4159 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4160 ARM::VLD3DUPd16Pseudo_UPD, 4161 ARM::VLD3DUPd32Pseudo_UPD }; 4162 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes); 4163 return; 4164 } 4165 4166 case ARMISD::VLD4DUP_UPD: { 4167 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4168 ARM::VLD4DUPd16Pseudo_UPD, 4169 ARM::VLD4DUPd32Pseudo_UPD }; 4170 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes); 4171 return; 4172 } 4173 4174 case ARMISD::VLD1_UPD: { 4175 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4176 ARM::VLD1d16wb_fixed, 4177 ARM::VLD1d32wb_fixed, 4178 ARM::VLD1d64wb_fixed }; 4179 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4180 ARM::VLD1q16wb_fixed, 4181 ARM::VLD1q32wb_fixed, 4182 ARM::VLD1q64wb_fixed }; 4183 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4184 return; 4185 } 4186 4187 case ARMISD::VLD2_UPD: { 4188 if (Subtarget->hasNEON()) { 4189 static const uint16_t DOpcodes[] = { 4190 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4191 ARM::VLD1q64wb_fixed}; 4192 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4193 ARM::VLD2q16PseudoWB_fixed, 4194 ARM::VLD2q32PseudoWB_fixed}; 4195 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4196 } else { 4197 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4198 ARM::MVE_VLD21_8_wb}; 4199 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4200 ARM::MVE_VLD21_16_wb}; 4201 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4202 ARM::MVE_VLD21_32_wb}; 4203 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4204 SelectMVE_VLD(N, 2, Opcodes, true); 4205 } 4206 return; 4207 } 4208 4209 case ARMISD::VLD3_UPD: { 4210 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4211 ARM::VLD3d16Pseudo_UPD, 4212 ARM::VLD3d32Pseudo_UPD, 4213 ARM::VLD1d64TPseudoWB_fixed}; 4214 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4215 ARM::VLD3q16Pseudo_UPD, 4216 ARM::VLD3q32Pseudo_UPD }; 4217 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4218 ARM::VLD3q16oddPseudo_UPD, 4219 ARM::VLD3q32oddPseudo_UPD }; 4220 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4221 return; 4222 } 4223 4224 case ARMISD::VLD4_UPD: { 4225 if (Subtarget->hasNEON()) { 4226 static const uint16_t DOpcodes[] = { 4227 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4228 ARM::VLD1d64QPseudoWB_fixed}; 4229 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4230 ARM::VLD4q16Pseudo_UPD, 4231 ARM::VLD4q32Pseudo_UPD}; 4232 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4233 ARM::VLD4q16oddPseudo_UPD, 4234 ARM::VLD4q32oddPseudo_UPD}; 4235 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4236 } else { 4237 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4238 ARM::MVE_VLD42_8, 4239 ARM::MVE_VLD43_8_wb}; 4240 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4241 ARM::MVE_VLD42_16, 4242 ARM::MVE_VLD43_16_wb}; 4243 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4244 ARM::MVE_VLD42_32, 4245 ARM::MVE_VLD43_32_wb}; 4246 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4247 SelectMVE_VLD(N, 4, Opcodes, true); 4248 } 4249 return; 4250 } 4251 4252 case ARMISD::VLD2LN_UPD: { 4253 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4254 ARM::VLD2LNd16Pseudo_UPD, 4255 ARM::VLD2LNd32Pseudo_UPD }; 4256 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4257 ARM::VLD2LNq32Pseudo_UPD }; 4258 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4259 return; 4260 } 4261 4262 case ARMISD::VLD3LN_UPD: { 4263 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4264 ARM::VLD3LNd16Pseudo_UPD, 4265 ARM::VLD3LNd32Pseudo_UPD }; 4266 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4267 ARM::VLD3LNq32Pseudo_UPD }; 4268 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4269 return; 4270 } 4271 4272 case ARMISD::VLD4LN_UPD: { 4273 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4274 ARM::VLD4LNd16Pseudo_UPD, 4275 ARM::VLD4LNd32Pseudo_UPD }; 4276 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4277 ARM::VLD4LNq32Pseudo_UPD }; 4278 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4279 return; 4280 } 4281 4282 case ARMISD::VST1_UPD: { 4283 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4284 ARM::VST1d16wb_fixed, 4285 ARM::VST1d32wb_fixed, 4286 ARM::VST1d64wb_fixed }; 4287 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4288 ARM::VST1q16wb_fixed, 4289 ARM::VST1q32wb_fixed, 4290 ARM::VST1q64wb_fixed }; 4291 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4292 return; 4293 } 4294 4295 case ARMISD::VST2_UPD: { 4296 if (Subtarget->hasNEON()) { 4297 static const uint16_t DOpcodes[] = { 4298 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4299 ARM::VST1q64wb_fixed}; 4300 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4301 ARM::VST2q16PseudoWB_fixed, 4302 ARM::VST2q32PseudoWB_fixed}; 4303 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4304 return; 4305 } 4306 break; 4307 } 4308 4309 case ARMISD::VST3_UPD: { 4310 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4311 ARM::VST3d16Pseudo_UPD, 4312 ARM::VST3d32Pseudo_UPD, 4313 ARM::VST1d64TPseudoWB_fixed}; 4314 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4315 ARM::VST3q16Pseudo_UPD, 4316 ARM::VST3q32Pseudo_UPD }; 4317 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4318 ARM::VST3q16oddPseudo_UPD, 4319 ARM::VST3q32oddPseudo_UPD }; 4320 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4321 return; 4322 } 4323 4324 case ARMISD::VST4_UPD: { 4325 if (Subtarget->hasNEON()) { 4326 static const uint16_t DOpcodes[] = { 4327 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4328 ARM::VST1d64QPseudoWB_fixed}; 4329 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4330 ARM::VST4q16Pseudo_UPD, 4331 ARM::VST4q32Pseudo_UPD}; 4332 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4333 ARM::VST4q16oddPseudo_UPD, 4334 ARM::VST4q32oddPseudo_UPD}; 4335 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4336 return; 4337 } 4338 break; 4339 } 4340 4341 case ARMISD::VST2LN_UPD: { 4342 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4343 ARM::VST2LNd16Pseudo_UPD, 4344 ARM::VST2LNd32Pseudo_UPD }; 4345 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4346 ARM::VST2LNq32Pseudo_UPD }; 4347 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4348 return; 4349 } 4350 4351 case ARMISD::VST3LN_UPD: { 4352 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4353 ARM::VST3LNd16Pseudo_UPD, 4354 ARM::VST3LNd32Pseudo_UPD }; 4355 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4356 ARM::VST3LNq32Pseudo_UPD }; 4357 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4358 return; 4359 } 4360 4361 case ARMISD::VST4LN_UPD: { 4362 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4363 ARM::VST4LNd16Pseudo_UPD, 4364 ARM::VST4LNd32Pseudo_UPD }; 4365 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4366 ARM::VST4LNq32Pseudo_UPD }; 4367 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4368 return; 4369 } 4370 4371 case ISD::INTRINSIC_VOID: 4372 case ISD::INTRINSIC_W_CHAIN: { 4373 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 4374 switch (IntNo) { 4375 default: 4376 break; 4377 4378 case Intrinsic::arm_mrrc: 4379 case Intrinsic::arm_mrrc2: { 4380 SDLoc dl(N); 4381 SDValue Chain = N->getOperand(0); 4382 unsigned Opc; 4383 4384 if (Subtarget->isThumb()) 4385 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4386 else 4387 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4388 4389 SmallVector<SDValue, 5> Ops; 4390 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 4391 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 4392 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 4393 4394 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4395 // instruction will always be '1111' but it is possible in assembly language to specify 4396 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4397 if (Opc != ARM::MRRC2) { 4398 Ops.push_back(getAL(CurDAG, dl)); 4399 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4400 } 4401 4402 Ops.push_back(Chain); 4403 4404 // Writes to two registers. 4405 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4406 4407 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4408 return; 4409 } 4410 case Intrinsic::arm_ldaexd: 4411 case Intrinsic::arm_ldrexd: { 4412 SDLoc dl(N); 4413 SDValue Chain = N->getOperand(0); 4414 SDValue MemAddr = N->getOperand(2); 4415 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4416 4417 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4418 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4419 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4420 4421 // arm_ldrexd returns a i64 value in {i32, i32} 4422 std::vector<EVT> ResTys; 4423 if (isThumb) { 4424 ResTys.push_back(MVT::i32); 4425 ResTys.push_back(MVT::i32); 4426 } else 4427 ResTys.push_back(MVT::Untyped); 4428 ResTys.push_back(MVT::Other); 4429 4430 // Place arguments in the right order. 4431 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4432 CurDAG->getRegister(0, MVT::i32), Chain}; 4433 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4434 // Transfer memoperands. 4435 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4436 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4437 4438 // Remap uses. 4439 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4440 if (!SDValue(N, 0).use_empty()) { 4441 SDValue Result; 4442 if (isThumb) 4443 Result = SDValue(Ld, 0); 4444 else { 4445 SDValue SubRegIdx = 4446 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4447 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4448 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4449 Result = SDValue(ResNode,0); 4450 } 4451 ReplaceUses(SDValue(N, 0), Result); 4452 } 4453 if (!SDValue(N, 1).use_empty()) { 4454 SDValue Result; 4455 if (isThumb) 4456 Result = SDValue(Ld, 1); 4457 else { 4458 SDValue SubRegIdx = 4459 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4460 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4461 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4462 Result = SDValue(ResNode,0); 4463 } 4464 ReplaceUses(SDValue(N, 1), Result); 4465 } 4466 ReplaceUses(SDValue(N, 2), OutChain); 4467 CurDAG->RemoveDeadNode(N); 4468 return; 4469 } 4470 case Intrinsic::arm_stlexd: 4471 case Intrinsic::arm_strexd: { 4472 SDLoc dl(N); 4473 SDValue Chain = N->getOperand(0); 4474 SDValue Val0 = N->getOperand(2); 4475 SDValue Val1 = N->getOperand(3); 4476 SDValue MemAddr = N->getOperand(4); 4477 4478 // Store exclusive double return a i32 value which is the return status 4479 // of the issued store. 4480 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4481 4482 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4483 // Place arguments in the right order. 4484 SmallVector<SDValue, 7> Ops; 4485 if (isThumb) { 4486 Ops.push_back(Val0); 4487 Ops.push_back(Val1); 4488 } else 4489 // arm_strexd uses GPRPair. 4490 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4491 Ops.push_back(MemAddr); 4492 Ops.push_back(getAL(CurDAG, dl)); 4493 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4494 Ops.push_back(Chain); 4495 4496 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4497 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4498 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4499 4500 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4501 // Transfer memoperands. 4502 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4503 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4504 4505 ReplaceNode(N, St); 4506 return; 4507 } 4508 4509 case Intrinsic::arm_neon_vld1: { 4510 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4511 ARM::VLD1d32, ARM::VLD1d64 }; 4512 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4513 ARM::VLD1q32, ARM::VLD1q64}; 4514 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4515 return; 4516 } 4517 4518 case Intrinsic::arm_neon_vld1x2: { 4519 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4520 ARM::VLD1q32, ARM::VLD1q64 }; 4521 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4522 ARM::VLD1d16QPseudo, 4523 ARM::VLD1d32QPseudo, 4524 ARM::VLD1d64QPseudo }; 4525 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4526 return; 4527 } 4528 4529 case Intrinsic::arm_neon_vld1x3: { 4530 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4531 ARM::VLD1d16TPseudo, 4532 ARM::VLD1d32TPseudo, 4533 ARM::VLD1d64TPseudo }; 4534 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4535 ARM::VLD1q16LowTPseudo_UPD, 4536 ARM::VLD1q32LowTPseudo_UPD, 4537 ARM::VLD1q64LowTPseudo_UPD }; 4538 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4539 ARM::VLD1q16HighTPseudo, 4540 ARM::VLD1q32HighTPseudo, 4541 ARM::VLD1q64HighTPseudo }; 4542 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4543 return; 4544 } 4545 4546 case Intrinsic::arm_neon_vld1x4: { 4547 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4548 ARM::VLD1d16QPseudo, 4549 ARM::VLD1d32QPseudo, 4550 ARM::VLD1d64QPseudo }; 4551 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4552 ARM::VLD1q16LowQPseudo_UPD, 4553 ARM::VLD1q32LowQPseudo_UPD, 4554 ARM::VLD1q64LowQPseudo_UPD }; 4555 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4556 ARM::VLD1q16HighQPseudo, 4557 ARM::VLD1q32HighQPseudo, 4558 ARM::VLD1q64HighQPseudo }; 4559 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4560 return; 4561 } 4562 4563 case Intrinsic::arm_neon_vld2: { 4564 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4565 ARM::VLD2d32, ARM::VLD1q64 }; 4566 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4567 ARM::VLD2q32Pseudo }; 4568 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4569 return; 4570 } 4571 4572 case Intrinsic::arm_neon_vld3: { 4573 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4574 ARM::VLD3d16Pseudo, 4575 ARM::VLD3d32Pseudo, 4576 ARM::VLD1d64TPseudo }; 4577 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4578 ARM::VLD3q16Pseudo_UPD, 4579 ARM::VLD3q32Pseudo_UPD }; 4580 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4581 ARM::VLD3q16oddPseudo, 4582 ARM::VLD3q32oddPseudo }; 4583 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4584 return; 4585 } 4586 4587 case Intrinsic::arm_neon_vld4: { 4588 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4589 ARM::VLD4d16Pseudo, 4590 ARM::VLD4d32Pseudo, 4591 ARM::VLD1d64QPseudo }; 4592 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4593 ARM::VLD4q16Pseudo_UPD, 4594 ARM::VLD4q32Pseudo_UPD }; 4595 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4596 ARM::VLD4q16oddPseudo, 4597 ARM::VLD4q32oddPseudo }; 4598 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4599 return; 4600 } 4601 4602 case Intrinsic::arm_neon_vld2dup: { 4603 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4604 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4605 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4606 ARM::VLD2DUPq16EvenPseudo, 4607 ARM::VLD2DUPq32EvenPseudo }; 4608 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4609 ARM::VLD2DUPq16OddPseudo, 4610 ARM::VLD2DUPq32OddPseudo }; 4611 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4612 DOpcodes, QOpcodes0, QOpcodes1); 4613 return; 4614 } 4615 4616 case Intrinsic::arm_neon_vld3dup: { 4617 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4618 ARM::VLD3DUPd16Pseudo, 4619 ARM::VLD3DUPd32Pseudo, 4620 ARM::VLD1d64TPseudo }; 4621 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4622 ARM::VLD3DUPq16EvenPseudo, 4623 ARM::VLD3DUPq32EvenPseudo }; 4624 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4625 ARM::VLD3DUPq16OddPseudo, 4626 ARM::VLD3DUPq32OddPseudo }; 4627 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4628 DOpcodes, QOpcodes0, QOpcodes1); 4629 return; 4630 } 4631 4632 case Intrinsic::arm_neon_vld4dup: { 4633 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4634 ARM::VLD4DUPd16Pseudo, 4635 ARM::VLD4DUPd32Pseudo, 4636 ARM::VLD1d64QPseudo }; 4637 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4638 ARM::VLD4DUPq16EvenPseudo, 4639 ARM::VLD4DUPq32EvenPseudo }; 4640 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4641 ARM::VLD4DUPq16OddPseudo, 4642 ARM::VLD4DUPq32OddPseudo }; 4643 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4644 DOpcodes, QOpcodes0, QOpcodes1); 4645 return; 4646 } 4647 4648 case Intrinsic::arm_neon_vld2lane: { 4649 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4650 ARM::VLD2LNd16Pseudo, 4651 ARM::VLD2LNd32Pseudo }; 4652 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 4653 ARM::VLD2LNq32Pseudo }; 4654 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 4655 return; 4656 } 4657 4658 case Intrinsic::arm_neon_vld3lane: { 4659 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 4660 ARM::VLD3LNd16Pseudo, 4661 ARM::VLD3LNd32Pseudo }; 4662 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 4663 ARM::VLD3LNq32Pseudo }; 4664 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 4665 return; 4666 } 4667 4668 case Intrinsic::arm_neon_vld4lane: { 4669 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 4670 ARM::VLD4LNd16Pseudo, 4671 ARM::VLD4LNd32Pseudo }; 4672 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 4673 ARM::VLD4LNq32Pseudo }; 4674 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 4675 return; 4676 } 4677 4678 case Intrinsic::arm_neon_vst1: { 4679 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 4680 ARM::VST1d32, ARM::VST1d64 }; 4681 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4682 ARM::VST1q32, ARM::VST1q64 }; 4683 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 4684 return; 4685 } 4686 4687 case Intrinsic::arm_neon_vst1x2: { 4688 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4689 ARM::VST1q32, ARM::VST1q64 }; 4690 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 4691 ARM::VST1d16QPseudo, 4692 ARM::VST1d32QPseudo, 4693 ARM::VST1d64QPseudo }; 4694 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4695 return; 4696 } 4697 4698 case Intrinsic::arm_neon_vst1x3: { 4699 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 4700 ARM::VST1d16TPseudo, 4701 ARM::VST1d32TPseudo, 4702 ARM::VST1d64TPseudo }; 4703 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4704 ARM::VST1q16LowTPseudo_UPD, 4705 ARM::VST1q32LowTPseudo_UPD, 4706 ARM::VST1q64LowTPseudo_UPD }; 4707 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 4708 ARM::VST1q16HighTPseudo, 4709 ARM::VST1q32HighTPseudo, 4710 ARM::VST1q64HighTPseudo }; 4711 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4712 return; 4713 } 4714 4715 case Intrinsic::arm_neon_vst1x4: { 4716 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 4717 ARM::VST1d16QPseudo, 4718 ARM::VST1d32QPseudo, 4719 ARM::VST1d64QPseudo }; 4720 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4721 ARM::VST1q16LowQPseudo_UPD, 4722 ARM::VST1q32LowQPseudo_UPD, 4723 ARM::VST1q64LowQPseudo_UPD }; 4724 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 4725 ARM::VST1q16HighQPseudo, 4726 ARM::VST1q32HighQPseudo, 4727 ARM::VST1q64HighQPseudo }; 4728 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4729 return; 4730 } 4731 4732 case Intrinsic::arm_neon_vst2: { 4733 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 4734 ARM::VST2d32, ARM::VST1q64 }; 4735 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 4736 ARM::VST2q32Pseudo }; 4737 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4738 return; 4739 } 4740 4741 case Intrinsic::arm_neon_vst3: { 4742 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 4743 ARM::VST3d16Pseudo, 4744 ARM::VST3d32Pseudo, 4745 ARM::VST1d64TPseudo }; 4746 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4747 ARM::VST3q16Pseudo_UPD, 4748 ARM::VST3q32Pseudo_UPD }; 4749 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 4750 ARM::VST3q16oddPseudo, 4751 ARM::VST3q32oddPseudo }; 4752 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4753 return; 4754 } 4755 4756 case Intrinsic::arm_neon_vst4: { 4757 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 4758 ARM::VST4d16Pseudo, 4759 ARM::VST4d32Pseudo, 4760 ARM::VST1d64QPseudo }; 4761 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 4762 ARM::VST4q16Pseudo_UPD, 4763 ARM::VST4q32Pseudo_UPD }; 4764 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 4765 ARM::VST4q16oddPseudo, 4766 ARM::VST4q32oddPseudo }; 4767 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4768 return; 4769 } 4770 4771 case Intrinsic::arm_neon_vst2lane: { 4772 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 4773 ARM::VST2LNd16Pseudo, 4774 ARM::VST2LNd32Pseudo }; 4775 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 4776 ARM::VST2LNq32Pseudo }; 4777 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 4778 return; 4779 } 4780 4781 case Intrinsic::arm_neon_vst3lane: { 4782 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 4783 ARM::VST3LNd16Pseudo, 4784 ARM::VST3LNd32Pseudo }; 4785 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 4786 ARM::VST3LNq32Pseudo }; 4787 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 4788 return; 4789 } 4790 4791 case Intrinsic::arm_neon_vst4lane: { 4792 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 4793 ARM::VST4LNd16Pseudo, 4794 ARM::VST4LNd32Pseudo }; 4795 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 4796 ARM::VST4LNq32Pseudo }; 4797 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 4798 return; 4799 } 4800 4801 case Intrinsic::arm_mve_vldr_gather_base_wb: 4802 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 4803 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 4804 ARM::MVE_VLDRDU64_qi_pre}; 4805 SelectMVE_WB(N, Opcodes, 4806 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 4807 return; 4808 } 4809 4810 case Intrinsic::arm_mve_vld2q: { 4811 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 4812 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4813 ARM::MVE_VLD21_16}; 4814 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4815 ARM::MVE_VLD21_32}; 4816 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4817 SelectMVE_VLD(N, 2, Opcodes, false); 4818 return; 4819 } 4820 4821 case Intrinsic::arm_mve_vld4q: { 4822 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4823 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 4824 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4825 ARM::MVE_VLD42_16, 4826 ARM::MVE_VLD43_16}; 4827 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4828 ARM::MVE_VLD42_32, 4829 ARM::MVE_VLD43_32}; 4830 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4831 SelectMVE_VLD(N, 4, Opcodes, false); 4832 return; 4833 } 4834 } 4835 break; 4836 } 4837 4838 case ISD::INTRINSIC_WO_CHAIN: { 4839 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 4840 switch (IntNo) { 4841 default: 4842 break; 4843 4844 // Scalar f32 -> bf16 4845 case Intrinsic::arm_neon_vcvtbfp2bf: { 4846 SDLoc dl(N); 4847 const SDValue &Src = N->getOperand(1); 4848 llvm::EVT DestTy = N->getValueType(0); 4849 SDValue Pred = getAL(CurDAG, dl); 4850 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 4851 SDValue Ops[] = { Src, Src, Pred, Reg0 }; 4852 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops); 4853 return; 4854 } 4855 4856 // Vector v4f32 -> v4bf16 4857 case Intrinsic::arm_neon_vcvtfp2bf: { 4858 SDLoc dl(N); 4859 const SDValue &Src = N->getOperand(1); 4860 SDValue Pred = getAL(CurDAG, dl); 4861 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 4862 SDValue Ops[] = { Src, Pred, Reg0 }; 4863 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops); 4864 return; 4865 } 4866 4867 case Intrinsic::arm_mve_urshrl: 4868 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 4869 return; 4870 case Intrinsic::arm_mve_uqshll: 4871 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 4872 return; 4873 case Intrinsic::arm_mve_srshrl: 4874 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 4875 return; 4876 case Intrinsic::arm_mve_sqshll: 4877 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 4878 return; 4879 case Intrinsic::arm_mve_uqrshll: 4880 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 4881 return; 4882 case Intrinsic::arm_mve_sqrshrl: 4883 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 4884 return; 4885 4886 case Intrinsic::arm_mve_vadc: 4887 case Intrinsic::arm_mve_vadc_predicated: 4888 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 4889 IntNo == Intrinsic::arm_mve_vadc_predicated); 4890 return; 4891 case Intrinsic::arm_mve_vsbc: 4892 case Intrinsic::arm_mve_vsbc_predicated: 4893 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true, 4894 IntNo == Intrinsic::arm_mve_vsbc_predicated); 4895 return; 4896 case Intrinsic::arm_mve_vshlc: 4897 case Intrinsic::arm_mve_vshlc_predicated: 4898 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 4899 return; 4900 4901 case Intrinsic::arm_mve_vmlldava: 4902 case Intrinsic::arm_mve_vmlldava_predicated: { 4903 static const uint16_t OpcodesU[] = { 4904 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 4905 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 4906 }; 4907 static const uint16_t OpcodesS[] = { 4908 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 4909 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 4910 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 4911 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 4912 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 4913 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 4914 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 4915 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 4916 }; 4917 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 4918 OpcodesS, OpcodesU); 4919 return; 4920 } 4921 4922 case Intrinsic::arm_mve_vrmlldavha: 4923 case Intrinsic::arm_mve_vrmlldavha_predicated: { 4924 static const uint16_t OpcodesU[] = { 4925 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 4926 }; 4927 static const uint16_t OpcodesS[] = { 4928 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 4929 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 4930 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 4931 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 4932 }; 4933 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 4934 OpcodesS, OpcodesU); 4935 return; 4936 } 4937 4938 case Intrinsic::arm_mve_vidup: 4939 case Intrinsic::arm_mve_vidup_predicated: { 4940 static const uint16_t Opcodes[] = { 4941 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 4942 }; 4943 SelectMVE_VxDUP(N, Opcodes, false, 4944 IntNo == Intrinsic::arm_mve_vidup_predicated); 4945 return; 4946 } 4947 4948 case Intrinsic::arm_mve_vddup: 4949 case Intrinsic::arm_mve_vddup_predicated: { 4950 static const uint16_t Opcodes[] = { 4951 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 4952 }; 4953 SelectMVE_VxDUP(N, Opcodes, false, 4954 IntNo == Intrinsic::arm_mve_vddup_predicated); 4955 return; 4956 } 4957 4958 case Intrinsic::arm_mve_viwdup: 4959 case Intrinsic::arm_mve_viwdup_predicated: { 4960 static const uint16_t Opcodes[] = { 4961 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 4962 }; 4963 SelectMVE_VxDUP(N, Opcodes, true, 4964 IntNo == Intrinsic::arm_mve_viwdup_predicated); 4965 return; 4966 } 4967 4968 case Intrinsic::arm_mve_vdwdup: 4969 case Intrinsic::arm_mve_vdwdup_predicated: { 4970 static const uint16_t Opcodes[] = { 4971 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 4972 }; 4973 SelectMVE_VxDUP(N, Opcodes, true, 4974 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 4975 return; 4976 } 4977 4978 case Intrinsic::arm_cde_cx1d: 4979 case Intrinsic::arm_cde_cx1da: 4980 case Intrinsic::arm_cde_cx2d: 4981 case Intrinsic::arm_cde_cx2da: 4982 case Intrinsic::arm_cde_cx3d: 4983 case Intrinsic::arm_cde_cx3da: { 4984 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 4985 IntNo == Intrinsic::arm_cde_cx2da || 4986 IntNo == Intrinsic::arm_cde_cx3da; 4987 size_t NumExtraOps; 4988 uint16_t Opcode; 4989 switch (IntNo) { 4990 case Intrinsic::arm_cde_cx1d: 4991 case Intrinsic::arm_cde_cx1da: 4992 NumExtraOps = 0; 4993 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 4994 break; 4995 case Intrinsic::arm_cde_cx2d: 4996 case Intrinsic::arm_cde_cx2da: 4997 NumExtraOps = 1; 4998 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 4999 break; 5000 case Intrinsic::arm_cde_cx3d: 5001 case Intrinsic::arm_cde_cx3da: 5002 NumExtraOps = 2; 5003 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 5004 break; 5005 default: 5006 llvm_unreachable("Unexpected opcode"); 5007 } 5008 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 5009 return; 5010 } 5011 } 5012 break; 5013 } 5014 5015 case ISD::ATOMIC_CMP_SWAP: 5016 SelectCMP_SWAP(N); 5017 return; 5018 } 5019 5020 SelectCode(N); 5021 } 5022 5023 // Inspect a register string of the form 5024 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 5025 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 5026 // and obtain the integer operands from them, adding these operands to the 5027 // provided vector. 5028 static void getIntOperandsFromRegisterString(StringRef RegString, 5029 SelectionDAG *CurDAG, 5030 const SDLoc &DL, 5031 std::vector<SDValue> &Ops) { 5032 SmallVector<StringRef, 5> Fields; 5033 RegString.split(Fields, ':'); 5034 5035 if (Fields.size() > 1) { 5036 bool AllIntFields = true; 5037 5038 for (StringRef Field : Fields) { 5039 // Need to trim out leading 'cp' characters and get the integer field. 5040 unsigned IntField; 5041 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 5042 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 5043 } 5044 5045 assert(AllIntFields && 5046 "Unexpected non-integer value in special register string."); 5047 } 5048 } 5049 5050 // Maps a Banked Register string to its mask value. The mask value returned is 5051 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 5052 // mask operand, which expresses which register is to be used, e.g. r8, and in 5053 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 5054 // was invalid. 5055 static inline int getBankedRegisterMask(StringRef RegString) { 5056 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 5057 if (!TheReg) 5058 return -1; 5059 return TheReg->Encoding; 5060 } 5061 5062 // The flags here are common to those allowed for apsr in the A class cores and 5063 // those allowed for the special registers in the M class cores. Returns a 5064 // value representing which flags were present, -1 if invalid. 5065 static inline int getMClassFlagsMask(StringRef Flags) { 5066 return StringSwitch<int>(Flags) 5067 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 5068 // correct when flags are not permitted 5069 .Case("g", 0x1) 5070 .Case("nzcvq", 0x2) 5071 .Case("nzcvqg", 0x3) 5072 .Default(-1); 5073 } 5074 5075 // Maps MClass special registers string to its value for use in the 5076 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 5077 // Returns -1 to signify that the string was invalid. 5078 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 5079 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 5080 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 5081 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 5082 return -1; 5083 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 5084 } 5085 5086 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 5087 // The mask operand contains the special register (R Bit) in bit 4, whether 5088 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 5089 // bits 3-0 contains the fields to be accessed in the special register, set by 5090 // the flags provided with the register. 5091 int Mask = 0; 5092 if (Reg == "apsr") { 5093 // The flags permitted for apsr are the same flags that are allowed in 5094 // M class registers. We get the flag value and then shift the flags into 5095 // the correct place to combine with the mask. 5096 Mask = getMClassFlagsMask(Flags); 5097 if (Mask == -1) 5098 return -1; 5099 return Mask << 2; 5100 } 5101 5102 if (Reg != "cpsr" && Reg != "spsr") { 5103 return -1; 5104 } 5105 5106 // This is the same as if the flags were "fc" 5107 if (Flags.empty() || Flags == "all") 5108 return Mask | 0x9; 5109 5110 // Inspect the supplied flags string and set the bits in the mask for 5111 // the relevant and valid flags allowed for cpsr and spsr. 5112 for (char Flag : Flags) { 5113 int FlagVal; 5114 switch (Flag) { 5115 case 'c': 5116 FlagVal = 0x1; 5117 break; 5118 case 'x': 5119 FlagVal = 0x2; 5120 break; 5121 case 's': 5122 FlagVal = 0x4; 5123 break; 5124 case 'f': 5125 FlagVal = 0x8; 5126 break; 5127 default: 5128 FlagVal = 0; 5129 } 5130 5131 // This avoids allowing strings where the same flag bit appears twice. 5132 if (!FlagVal || (Mask & FlagVal)) 5133 return -1; 5134 Mask |= FlagVal; 5135 } 5136 5137 // If the register is spsr then we need to set the R bit. 5138 if (Reg == "spsr") 5139 Mask |= 0x10; 5140 5141 return Mask; 5142 } 5143 5144 // Lower the read_register intrinsic to ARM specific DAG nodes 5145 // using the supplied metadata string to select the instruction node to use 5146 // and the registers/masks to construct as operands for the node. 5147 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5148 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5149 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5150 bool IsThumb2 = Subtarget->isThumb2(); 5151 SDLoc DL(N); 5152 5153 std::vector<SDValue> Ops; 5154 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5155 5156 if (!Ops.empty()) { 5157 // If the special register string was constructed of fields (as defined 5158 // in the ACLE) then need to lower to MRC node (32 bit) or 5159 // MRRC node(64 bit), we can make the distinction based on the number of 5160 // operands we have. 5161 unsigned Opcode; 5162 SmallVector<EVT, 3> ResTypes; 5163 if (Ops.size() == 5){ 5164 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5165 ResTypes.append({ MVT::i32, MVT::Other }); 5166 } else { 5167 assert(Ops.size() == 3 && 5168 "Invalid number of fields in special register string."); 5169 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5170 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5171 } 5172 5173 Ops.push_back(getAL(CurDAG, DL)); 5174 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5175 Ops.push_back(N->getOperand(0)); 5176 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5177 return true; 5178 } 5179 5180 std::string SpecialReg = RegString->getString().lower(); 5181 5182 int BankedReg = getBankedRegisterMask(SpecialReg); 5183 if (BankedReg != -1) { 5184 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5185 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5186 N->getOperand(0) }; 5187 ReplaceNode( 5188 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5189 DL, MVT::i32, MVT::Other, Ops)); 5190 return true; 5191 } 5192 5193 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5194 // corresponding to the register that is being read from. So we switch on the 5195 // string to find which opcode we need to use. 5196 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5197 .Case("fpscr", ARM::VMRS) 5198 .Case("fpexc", ARM::VMRS_FPEXC) 5199 .Case("fpsid", ARM::VMRS_FPSID) 5200 .Case("mvfr0", ARM::VMRS_MVFR0) 5201 .Case("mvfr1", ARM::VMRS_MVFR1) 5202 .Case("mvfr2", ARM::VMRS_MVFR2) 5203 .Case("fpinst", ARM::VMRS_FPINST) 5204 .Case("fpinst2", ARM::VMRS_FPINST2) 5205 .Default(0); 5206 5207 // If an opcode was found then we can lower the read to a VFP instruction. 5208 if (Opcode) { 5209 if (!Subtarget->hasVFP2Base()) 5210 return false; 5211 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5212 return false; 5213 5214 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5215 N->getOperand(0) }; 5216 ReplaceNode(N, 5217 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5218 return true; 5219 } 5220 5221 // If the target is M Class then need to validate that the register string 5222 // is an acceptable value, so check that a mask can be constructed from the 5223 // string. 5224 if (Subtarget->isMClass()) { 5225 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5226 if (SYSmValue == -1) 5227 return false; 5228 5229 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5230 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5231 N->getOperand(0) }; 5232 ReplaceNode( 5233 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5234 return true; 5235 } 5236 5237 // Here we know the target is not M Class so we need to check if it is one 5238 // of the remaining possible values which are apsr, cpsr or spsr. 5239 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5240 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5241 N->getOperand(0) }; 5242 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5243 DL, MVT::i32, MVT::Other, Ops)); 5244 return true; 5245 } 5246 5247 if (SpecialReg == "spsr") { 5248 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5249 N->getOperand(0) }; 5250 ReplaceNode( 5251 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5252 MVT::i32, MVT::Other, Ops)); 5253 return true; 5254 } 5255 5256 return false; 5257 } 5258 5259 // Lower the write_register intrinsic to ARM specific DAG nodes 5260 // using the supplied metadata string to select the instruction node to use 5261 // and the registers/masks to use in the nodes 5262 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5263 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5264 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5265 bool IsThumb2 = Subtarget->isThumb2(); 5266 SDLoc DL(N); 5267 5268 std::vector<SDValue> Ops; 5269 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5270 5271 if (!Ops.empty()) { 5272 // If the special register string was constructed of fields (as defined 5273 // in the ACLE) then need to lower to MCR node (32 bit) or 5274 // MCRR node(64 bit), we can make the distinction based on the number of 5275 // operands we have. 5276 unsigned Opcode; 5277 if (Ops.size() == 5) { 5278 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5279 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5280 } else { 5281 assert(Ops.size() == 3 && 5282 "Invalid number of fields in special register string."); 5283 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5284 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5285 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5286 } 5287 5288 Ops.push_back(getAL(CurDAG, DL)); 5289 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5290 Ops.push_back(N->getOperand(0)); 5291 5292 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5293 return true; 5294 } 5295 5296 std::string SpecialReg = RegString->getString().lower(); 5297 int BankedReg = getBankedRegisterMask(SpecialReg); 5298 if (BankedReg != -1) { 5299 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5300 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5301 N->getOperand(0) }; 5302 ReplaceNode( 5303 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5304 DL, MVT::Other, Ops)); 5305 return true; 5306 } 5307 5308 // The VFP registers are written to by creating SelectionDAG nodes with 5309 // opcodes corresponding to the register that is being written. So we switch 5310 // on the string to find which opcode we need to use. 5311 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5312 .Case("fpscr", ARM::VMSR) 5313 .Case("fpexc", ARM::VMSR_FPEXC) 5314 .Case("fpsid", ARM::VMSR_FPSID) 5315 .Case("fpinst", ARM::VMSR_FPINST) 5316 .Case("fpinst2", ARM::VMSR_FPINST2) 5317 .Default(0); 5318 5319 if (Opcode) { 5320 if (!Subtarget->hasVFP2Base()) 5321 return false; 5322 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5323 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5324 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5325 return true; 5326 } 5327 5328 std::pair<StringRef, StringRef> Fields; 5329 Fields = StringRef(SpecialReg).rsplit('_'); 5330 std::string Reg = Fields.first.str(); 5331 StringRef Flags = Fields.second; 5332 5333 // If the target was M Class then need to validate the special register value 5334 // and retrieve the mask for use in the instruction node. 5335 if (Subtarget->isMClass()) { 5336 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5337 if (SYSmValue == -1) 5338 return false; 5339 5340 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5341 N->getOperand(2), getAL(CurDAG, DL), 5342 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5343 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5344 return true; 5345 } 5346 5347 // We then check to see if a valid mask can be constructed for one of the 5348 // register string values permitted for the A and R class cores. These values 5349 // are apsr, spsr and cpsr; these are also valid on older cores. 5350 int Mask = getARClassRegisterMask(Reg, Flags); 5351 if (Mask != -1) { 5352 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5353 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5354 N->getOperand(0) }; 5355 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5356 DL, MVT::Other, Ops)); 5357 return true; 5358 } 5359 5360 return false; 5361 } 5362 5363 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5364 std::vector<SDValue> AsmNodeOperands; 5365 unsigned Flag, Kind; 5366 bool Changed = false; 5367 unsigned NumOps = N->getNumOperands(); 5368 5369 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5370 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5371 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5372 // respectively. Since there is no constraint to explicitly specify a 5373 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5374 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5375 // them into a GPRPair. 5376 5377 SDLoc dl(N); 5378 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 5379 : SDValue(nullptr,0); 5380 5381 SmallVector<bool, 8> OpChanged; 5382 // Glue node will be appended late. 5383 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5384 SDValue op = N->getOperand(i); 5385 AsmNodeOperands.push_back(op); 5386 5387 if (i < InlineAsm::Op_FirstOperand) 5388 continue; 5389 5390 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 5391 Flag = C->getZExtValue(); 5392 Kind = InlineAsm::getKind(Flag); 5393 } 5394 else 5395 continue; 5396 5397 // Immediate operands to inline asm in the SelectionDAG are modeled with 5398 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 5399 // the second is a constant with the value of the immediate. If we get here 5400 // and we have a Kind_Imm, skip the next operand, and continue. 5401 if (Kind == InlineAsm::Kind_Imm) { 5402 SDValue op = N->getOperand(++i); 5403 AsmNodeOperands.push_back(op); 5404 continue; 5405 } 5406 5407 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 5408 if (NumRegs) 5409 OpChanged.push_back(false); 5410 5411 unsigned DefIdx = 0; 5412 bool IsTiedToChangedOp = false; 5413 // If it's a use that is tied with a previous def, it has no 5414 // reg class constraint. 5415 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 5416 IsTiedToChangedOp = OpChanged[DefIdx]; 5417 5418 // Memory operands to inline asm in the SelectionDAG are modeled with two 5419 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 5420 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 5421 // it doesn't get misinterpreted), and continue. We do this here because 5422 // it's important to update the OpChanged array correctly before moving on. 5423 if (Kind == InlineAsm::Kind_Mem) { 5424 SDValue op = N->getOperand(++i); 5425 AsmNodeOperands.push_back(op); 5426 continue; 5427 } 5428 5429 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 5430 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 5431 continue; 5432 5433 unsigned RC; 5434 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 5435 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5436 || NumRegs != 2) 5437 continue; 5438 5439 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5440 SDValue V0 = N->getOperand(i+1); 5441 SDValue V1 = N->getOperand(i+2); 5442 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5443 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5444 SDValue PairedReg; 5445 MachineRegisterInfo &MRI = MF->getRegInfo(); 5446 5447 if (Kind == InlineAsm::Kind_RegDef || 5448 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 5449 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5450 // the original GPRs. 5451 5452 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5453 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5454 SDValue Chain = SDValue(N,0); 5455 5456 SDNode *GU = N->getGluedUser(); 5457 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5458 Chain.getValue(1)); 5459 5460 // Extract values from a GPRPair reg and copy to the original GPR reg. 5461 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5462 RegCopy); 5463 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5464 RegCopy); 5465 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5466 RegCopy.getValue(1)); 5467 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5468 5469 // Update the original glue user. 5470 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5471 Ops.push_back(T1.getValue(1)); 5472 CurDAG->UpdateNodeOperands(GU, Ops); 5473 } 5474 else { 5475 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 5476 // GPRPair and then pass the GPRPair to the inline asm. 5477 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5478 5479 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5480 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5481 Chain.getValue(1)); 5482 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5483 T0.getValue(1)); 5484 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5485 5486 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5487 // i32 VRs of inline asm with it. 5488 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5489 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5490 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5491 5492 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5493 Glue = Chain.getValue(1); 5494 } 5495 5496 Changed = true; 5497 5498 if(PairedReg.getNode()) { 5499 OpChanged[OpChanged.size() -1 ] = true; 5500 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 5501 if (IsTiedToChangedOp) 5502 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 5503 else 5504 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 5505 // Replace the current flag. 5506 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5507 Flag, dl, MVT::i32); 5508 // Add the new register node and skip the original two GPRs. 5509 AsmNodeOperands.push_back(PairedReg); 5510 // Skip the next two GPRs. 5511 i += 2; 5512 } 5513 } 5514 5515 if (Glue.getNode()) 5516 AsmNodeOperands.push_back(Glue); 5517 if (!Changed) 5518 return false; 5519 5520 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5521 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5522 New->setNodeId(-1); 5523 ReplaceNode(N, New.getNode()); 5524 return true; 5525 } 5526 5527 5528 bool ARMDAGToDAGISel:: 5529 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 5530 std::vector<SDValue> &OutOps) { 5531 switch(ConstraintID) { 5532 default: 5533 llvm_unreachable("Unexpected asm memory constraint"); 5534 case InlineAsm::Constraint_m: 5535 case InlineAsm::Constraint_o: 5536 case InlineAsm::Constraint_Q: 5537 case InlineAsm::Constraint_Um: 5538 case InlineAsm::Constraint_Un: 5539 case InlineAsm::Constraint_Uq: 5540 case InlineAsm::Constraint_Us: 5541 case InlineAsm::Constraint_Ut: 5542 case InlineAsm::Constraint_Uv: 5543 case InlineAsm::Constraint_Uy: 5544 // Require the address to be in a register. That is safe for all ARM 5545 // variants and it is hard to do anything much smarter without knowing 5546 // how the operand is used. 5547 OutOps.push_back(Op); 5548 return false; 5549 } 5550 return true; 5551 } 5552 5553 /// createARMISelDag - This pass converts a legalized DAG into a 5554 /// ARM-specific DAG, ready for instruction scheduling. 5555 /// 5556 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5557 CodeGenOpt::Level OptLevel) { 5558 return new ARMDAGToDAGISel(TM, OptLevel); 5559 } 5560