1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines an instruction selector for the ARM target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARM.h" 15 #include "ARMBaseInstrInfo.h" 16 #include "ARMTargetMachine.h" 17 #include "MCTargetDesc/ARMAddressingModes.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/SelectionDAG.h" 23 #include "llvm/CodeGen/SelectionDAGISel.h" 24 #include "llvm/IR/CallingConv.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/DerivedTypes.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/Intrinsics.h" 29 #include "llvm/IR/LLVMContext.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Compiler.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Target/TargetLowering.h" 35 #include "llvm/Target/TargetOptions.h" 36 37 using namespace llvm; 38 39 #define DEBUG_TYPE "arm-isel" 40 41 static cl::opt<bool> 42 DisableShifterOp("disable-shifter-op", cl::Hidden, 43 cl::desc("Disable isel of shifter-op"), 44 cl::init(false)); 45 46 static cl::opt<bool> 47 CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, 48 cl::desc("Check fp vmla / vmls hazard at isel time"), 49 cl::init(true)); 50 51 //===--------------------------------------------------------------------===// 52 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 53 /// instructions for SelectionDAG operations. 54 /// 55 namespace { 56 57 enum AddrMode2Type { 58 AM2_BASE, // Simple AM2 (+-imm12) 59 AM2_SHOP // Shifter-op AM2 60 }; 61 62 class ARMDAGToDAGISel : public SelectionDAGISel { 63 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 64 /// make the right decision when generating code for different targets. 65 const ARMSubtarget *Subtarget; 66 67 public: 68 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 69 : SelectionDAGISel(tm, OptLevel) {} 70 71 bool runOnMachineFunction(MachineFunction &MF) override { 72 // Reset the subtarget each time through. 73 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 74 SelectionDAGISel::runOnMachineFunction(MF); 75 return true; 76 } 77 78 const char *getPassName() const override { 79 return "ARM Instruction Selection"; 80 } 81 82 void PreprocessISelDAG() override; 83 84 /// getI32Imm - Return a target constant of type i32 with the specified 85 /// value. 86 inline SDValue getI32Imm(unsigned Imm, SDLoc dl) { 87 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 88 } 89 90 SDNode *Select(SDNode *N) override; 91 92 93 bool hasNoVMLxHazardUse(SDNode *N) const; 94 bool isShifterOpProfitable(const SDValue &Shift, 95 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 96 bool SelectRegShifterOperand(SDValue N, SDValue &A, 97 SDValue &B, SDValue &C, 98 bool CheckProfitability = true); 99 bool SelectImmShifterOperand(SDValue N, SDValue &A, 100 SDValue &B, bool CheckProfitability = true); 101 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 102 SDValue &B, SDValue &C) { 103 // Don't apply the profitability check 104 return SelectRegShifterOperand(N, A, B, C, false); 105 } 106 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 107 SDValue &B) { 108 // Don't apply the profitability check 109 return SelectImmShifterOperand(N, A, B, false); 110 } 111 112 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 113 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 114 115 AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base, 116 SDValue &Offset, SDValue &Opc); 117 bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset, 118 SDValue &Opc) { 119 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE; 120 } 121 122 bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset, 123 SDValue &Opc) { 124 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP; 125 } 126 127 bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset, 128 SDValue &Opc) { 129 SelectAddrMode2Worker(N, Base, Offset, Opc); 130 // return SelectAddrMode2ShOp(N, Base, Offset, Opc); 131 // This always matches one way or another. 132 return true; 133 } 134 135 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 136 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 137 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 138 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 139 return true; 140 } 141 142 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 143 SDValue &Offset, SDValue &Opc); 144 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 145 SDValue &Offset, SDValue &Opc); 146 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 147 SDValue &Offset, SDValue &Opc); 148 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 149 bool SelectAddrMode3(SDValue N, SDValue &Base, 150 SDValue &Offset, SDValue &Opc); 151 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 152 SDValue &Offset, SDValue &Opc); 153 bool SelectAddrMode5(SDValue N, SDValue &Base, 154 SDValue &Offset); 155 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 156 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 157 158 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 159 160 // Thumb Addressing Modes: 161 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 162 bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset, 163 unsigned Scale); 164 bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset); 165 bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset); 166 bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset); 167 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 168 SDValue &OffImm); 169 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 170 SDValue &OffImm); 171 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 172 SDValue &OffImm); 173 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 174 SDValue &OffImm); 175 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 176 177 // Thumb 2 Addressing Modes: 178 bool SelectT2ShifterOperandReg(SDValue N, 179 SDValue &BaseReg, SDValue &Opc); 180 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 181 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 182 SDValue &OffImm); 183 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 184 SDValue &OffImm); 185 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 186 SDValue &OffReg, SDValue &ShImm); 187 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 188 189 inline bool is_so_imm(unsigned Imm) const { 190 return ARM_AM::getSOImmVal(Imm) != -1; 191 } 192 193 inline bool is_so_imm_not(unsigned Imm) const { 194 return ARM_AM::getSOImmVal(~Imm) != -1; 195 } 196 197 inline bool is_t2_so_imm(unsigned Imm) const { 198 return ARM_AM::getT2SOImmVal(Imm) != -1; 199 } 200 201 inline bool is_t2_so_imm_not(unsigned Imm) const { 202 return ARM_AM::getT2SOImmVal(~Imm) != -1; 203 } 204 205 // Include the pieces autogenerated from the target description. 206 #include "ARMGenDAGISel.inc" 207 208 private: 209 /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for 210 /// ARM. 211 SDNode *SelectARMIndexedLoad(SDNode *N); 212 SDNode *SelectT2IndexedLoad(SDNode *N); 213 214 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 215 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 216 /// loads of D registers and even subregs and odd subregs of Q registers. 217 /// For NumVecs <= 2, QOpcodes1 is not used. 218 SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 219 const uint16_t *DOpcodes, 220 const uint16_t *QOpcodes0, const uint16_t *QOpcodes1); 221 222 /// SelectVST - Select NEON store intrinsics. NumVecs should 223 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 224 /// stores of D registers and even subregs and odd subregs of Q registers. 225 /// For NumVecs <= 2, QOpcodes1 is not used. 226 SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 227 const uint16_t *DOpcodes, 228 const uint16_t *QOpcodes0, const uint16_t *QOpcodes1); 229 230 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 231 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 232 /// load/store of D registers and Q registers. 233 SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, 234 bool isUpdating, unsigned NumVecs, 235 const uint16_t *DOpcodes, const uint16_t *QOpcodes); 236 237 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 238 /// should be 2, 3 or 4. The opcode array specifies the instructions used 239 /// for loading D registers. (Q registers are not supported.) 240 SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, 241 const uint16_t *Opcodes); 242 243 /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, 244 /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be 245 /// generated to force the table registers to be consecutive. 246 SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); 247 248 /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. 249 SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 250 251 // Select special operations if node forms integer ABS pattern 252 SDNode *SelectABSOp(SDNode *N); 253 254 SDNode *SelectInlineAsm(SDNode *N); 255 256 SDNode *SelectConcatVector(SDNode *N); 257 258 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 259 /// inline asm expressions. 260 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 261 std::vector<SDValue> &OutOps) override; 262 263 // Form pairs of consecutive R, S, D, or Q registers. 264 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 265 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 266 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 267 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 268 269 // Form sequences of 4 consecutive S, D, or Q registers. 270 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 271 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 272 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 273 274 // Get the alignment operand for a NEON VLD or VST instruction. 275 SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs, 276 bool is64BitVector); 277 }; 278 } 279 280 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 281 /// operand. If so Imm will receive the 32-bit value. 282 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 283 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 284 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 285 return true; 286 } 287 return false; 288 } 289 290 // isInt32Immediate - This method tests to see if a constant operand. 291 // If so Imm will receive the 32 bit value. 292 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 293 return isInt32Immediate(N.getNode(), Imm); 294 } 295 296 // isOpcWithIntImmediate - This method tests to see if the node is a specific 297 // opcode and that it has a immediate integer right operand. 298 // If so Imm will receive the 32 bit value. 299 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 300 return N->getOpcode() == Opc && 301 isInt32Immediate(N->getOperand(1).getNode(), Imm); 302 } 303 304 /// \brief Check whether a particular node is a constant value representable as 305 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 306 /// 307 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 308 static bool isScaledConstantInRange(SDValue Node, int Scale, 309 int RangeMin, int RangeMax, 310 int &ScaledConstant) { 311 assert(Scale > 0 && "Invalid scale!"); 312 313 // Check that this is a constant. 314 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 315 if (!C) 316 return false; 317 318 ScaledConstant = (int) C->getZExtValue(); 319 if ((ScaledConstant % Scale) != 0) 320 return false; 321 322 ScaledConstant /= Scale; 323 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 324 } 325 326 void ARMDAGToDAGISel::PreprocessISelDAG() { 327 if (!Subtarget->hasV6T2Ops()) 328 return; 329 330 bool isThumb2 = Subtarget->isThumb(); 331 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 332 E = CurDAG->allnodes_end(); I != E; ) { 333 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 334 335 if (N->getOpcode() != ISD::ADD) 336 continue; 337 338 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 339 // leading zeros, followed by consecutive set bits, followed by 1 or 2 340 // trailing zeros, e.g. 1020. 341 // Transform the expression to 342 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 343 // of trailing zeros of c2. The left shift would be folded as an shifter 344 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 345 // node (UBFX). 346 347 SDValue N0 = N->getOperand(0); 348 SDValue N1 = N->getOperand(1); 349 unsigned And_imm = 0; 350 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 351 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 352 std::swap(N0, N1); 353 } 354 if (!And_imm) 355 continue; 356 357 // Check if the AND mask is an immediate of the form: 000.....1111111100 358 unsigned TZ = countTrailingZeros(And_imm); 359 if (TZ != 1 && TZ != 2) 360 // Be conservative here. Shifter operands aren't always free. e.g. On 361 // Swift, left shifter operand of 1 / 2 for free but others are not. 362 // e.g. 363 // ubfx r3, r1, #16, #8 364 // ldr.w r3, [r0, r3, lsl #2] 365 // vs. 366 // mov.w r9, #1020 367 // and.w r2, r9, r1, lsr #14 368 // ldr r2, [r0, r2] 369 continue; 370 And_imm >>= TZ; 371 if (And_imm & (And_imm + 1)) 372 continue; 373 374 // Look for (and (srl X, c1), c2). 375 SDValue Srl = N1.getOperand(0); 376 unsigned Srl_imm = 0; 377 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 378 (Srl_imm <= 2)) 379 continue; 380 381 // Make sure first operand is not a shifter operand which would prevent 382 // folding of the left shift. 383 SDValue CPTmp0; 384 SDValue CPTmp1; 385 SDValue CPTmp2; 386 if (isThumb2) { 387 if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1)) 388 continue; 389 } else { 390 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 391 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 392 continue; 393 } 394 395 // Now make the transformation. 396 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 397 Srl.getOperand(0), 398 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 399 MVT::i32)); 400 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 401 Srl, 402 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 403 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 404 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 405 CurDAG->UpdateNodeOperands(N, N0, N1); 406 } 407 } 408 409 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 410 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 411 /// least on current ARM implementations) which should be avoidded. 412 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 413 if (OptLevel == CodeGenOpt::None) 414 return true; 415 416 if (!CheckVMLxHazard) 417 return true; 418 419 if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() && 420 !Subtarget->isCortexA9() && !Subtarget->isSwift()) 421 return true; 422 423 if (!N->hasOneUse()) 424 return false; 425 426 SDNode *Use = *N->use_begin(); 427 if (Use->getOpcode() == ISD::CopyToReg) 428 return true; 429 if (Use->isMachineOpcode()) { 430 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 431 CurDAG->getSubtarget().getInstrInfo()); 432 433 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 434 if (MCID.mayStore()) 435 return true; 436 unsigned Opcode = MCID.getOpcode(); 437 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 438 return true; 439 // vmlx feeding into another vmlx. We actually want to unfold 440 // the use later in the MLxExpansion pass. e.g. 441 // vmla 442 // vmla (stall 8 cycles) 443 // 444 // vmul (5 cycles) 445 // vadd (5 cycles) 446 // vmla 447 // This adds up to about 18 - 19 cycles. 448 // 449 // vmla 450 // vmul (stall 4 cycles) 451 // vadd adds up to about 14 cycles. 452 return TII->isFpMLxInstruction(Opcode); 453 } 454 455 return false; 456 } 457 458 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 459 ARM_AM::ShiftOpc ShOpcVal, 460 unsigned ShAmt) { 461 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 462 return true; 463 if (Shift.hasOneUse()) 464 return true; 465 // R << 2 is free. 466 return ShOpcVal == ARM_AM::lsl && 467 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 468 } 469 470 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 471 SDValue &BaseReg, 472 SDValue &Opc, 473 bool CheckProfitability) { 474 if (DisableShifterOp) 475 return false; 476 477 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 478 479 // Don't match base register only case. That is matched to a separate 480 // lower complexity pattern with explicit register operand. 481 if (ShOpcVal == ARM_AM::no_shift) return false; 482 483 BaseReg = N.getOperand(0); 484 unsigned ShImmVal = 0; 485 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 486 if (!RHS) return false; 487 ShImmVal = RHS->getZExtValue() & 31; 488 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 489 SDLoc(N), MVT::i32); 490 return true; 491 } 492 493 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 494 SDValue &BaseReg, 495 SDValue &ShReg, 496 SDValue &Opc, 497 bool CheckProfitability) { 498 if (DisableShifterOp) 499 return false; 500 501 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 502 503 // Don't match base register only case. That is matched to a separate 504 // lower complexity pattern with explicit register operand. 505 if (ShOpcVal == ARM_AM::no_shift) return false; 506 507 BaseReg = N.getOperand(0); 508 unsigned ShImmVal = 0; 509 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 510 if (RHS) return false; 511 512 ShReg = N.getOperand(1); 513 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 514 return false; 515 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 516 SDLoc(N), MVT::i32); 517 return true; 518 } 519 520 521 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 522 SDValue &Base, 523 SDValue &OffImm) { 524 // Match simple R + imm12 operands. 525 526 // Base only. 527 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 528 !CurDAG->isBaseWithConstantOffset(N)) { 529 if (N.getOpcode() == ISD::FrameIndex) { 530 // Match frame index. 531 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 532 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 533 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 534 return true; 535 } 536 537 if (N.getOpcode() == ARMISD::Wrapper && 538 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { 539 Base = N.getOperand(0); 540 } else 541 Base = N; 542 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 543 return true; 544 } 545 546 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 547 int RHSC = (int)RHS->getSExtValue(); 548 if (N.getOpcode() == ISD::SUB) 549 RHSC = -RHSC; 550 551 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 552 Base = N.getOperand(0); 553 if (Base.getOpcode() == ISD::FrameIndex) { 554 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 555 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 556 } 557 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 558 return true; 559 } 560 } 561 562 // Base only. 563 Base = N; 564 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 565 return true; 566 } 567 568 569 570 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 571 SDValue &Opc) { 572 if (N.getOpcode() == ISD::MUL && 573 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 574 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 575 // X * [3,5,9] -> X + X * [2,4,8] etc. 576 int RHSC = (int)RHS->getZExtValue(); 577 if (RHSC & 1) { 578 RHSC = RHSC & ~1; 579 ARM_AM::AddrOpc AddSub = ARM_AM::add; 580 if (RHSC < 0) { 581 AddSub = ARM_AM::sub; 582 RHSC = - RHSC; 583 } 584 if (isPowerOf2_32(RHSC)) { 585 unsigned ShAmt = Log2_32(RHSC); 586 Base = Offset = N.getOperand(0); 587 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 588 ARM_AM::lsl), 589 SDLoc(N), MVT::i32); 590 return true; 591 } 592 } 593 } 594 } 595 596 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 597 // ISD::OR that is equivalent to an ISD::ADD. 598 !CurDAG->isBaseWithConstantOffset(N)) 599 return false; 600 601 // Leave simple R +/- imm12 operands for LDRi12 602 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 603 int RHSC; 604 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 605 -0x1000+1, 0x1000, RHSC)) // 12 bits. 606 return false; 607 } 608 609 // Otherwise this is R +/- [possibly shifted] R. 610 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 611 ARM_AM::ShiftOpc ShOpcVal = 612 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 613 unsigned ShAmt = 0; 614 615 Base = N.getOperand(0); 616 Offset = N.getOperand(1); 617 618 if (ShOpcVal != ARM_AM::no_shift) { 619 // Check to see if the RHS of the shift is a constant, if not, we can't fold 620 // it. 621 if (ConstantSDNode *Sh = 622 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 623 ShAmt = Sh->getZExtValue(); 624 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 625 Offset = N.getOperand(1).getOperand(0); 626 else { 627 ShAmt = 0; 628 ShOpcVal = ARM_AM::no_shift; 629 } 630 } else { 631 ShOpcVal = ARM_AM::no_shift; 632 } 633 } 634 635 // Try matching (R shl C) + (R). 636 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 637 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 638 N.getOperand(0).hasOneUse())) { 639 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 640 if (ShOpcVal != ARM_AM::no_shift) { 641 // Check to see if the RHS of the shift is a constant, if not, we can't 642 // fold it. 643 if (ConstantSDNode *Sh = 644 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 645 ShAmt = Sh->getZExtValue(); 646 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 647 Offset = N.getOperand(0).getOperand(0); 648 Base = N.getOperand(1); 649 } else { 650 ShAmt = 0; 651 ShOpcVal = ARM_AM::no_shift; 652 } 653 } else { 654 ShOpcVal = ARM_AM::no_shift; 655 } 656 } 657 } 658 659 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 660 SDLoc(N), MVT::i32); 661 return true; 662 } 663 664 665 //----- 666 667 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, 668 SDValue &Base, 669 SDValue &Offset, 670 SDValue &Opc) { 671 if (N.getOpcode() == ISD::MUL && 672 (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) { 673 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 674 // X * [3,5,9] -> X + X * [2,4,8] etc. 675 int RHSC = (int)RHS->getZExtValue(); 676 if (RHSC & 1) { 677 RHSC = RHSC & ~1; 678 ARM_AM::AddrOpc AddSub = ARM_AM::add; 679 if (RHSC < 0) { 680 AddSub = ARM_AM::sub; 681 RHSC = - RHSC; 682 } 683 if (isPowerOf2_32(RHSC)) { 684 unsigned ShAmt = Log2_32(RHSC); 685 Base = Offset = N.getOperand(0); 686 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 687 ARM_AM::lsl), 688 SDLoc(N), MVT::i32); 689 return AM2_SHOP; 690 } 691 } 692 } 693 } 694 695 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 696 // ISD::OR that is equivalent to an ADD. 697 !CurDAG->isBaseWithConstantOffset(N)) { 698 Base = N; 699 if (N.getOpcode() == ISD::FrameIndex) { 700 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 701 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 702 } else if (N.getOpcode() == ARMISD::Wrapper && 703 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { 704 Base = N.getOperand(0); 705 } 706 Offset = CurDAG->getRegister(0, MVT::i32); 707 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, 708 ARM_AM::no_shift), 709 SDLoc(N), MVT::i32); 710 return AM2_BASE; 711 } 712 713 // Match simple R +/- imm12 operands. 714 if (N.getOpcode() != ISD::SUB) { 715 int RHSC; 716 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 717 -0x1000+1, 0x1000, RHSC)) { // 12 bits. 718 Base = N.getOperand(0); 719 if (Base.getOpcode() == ISD::FrameIndex) { 720 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 721 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 722 } 723 Offset = CurDAG->getRegister(0, MVT::i32); 724 725 ARM_AM::AddrOpc AddSub = ARM_AM::add; 726 if (RHSC < 0) { 727 AddSub = ARM_AM::sub; 728 RHSC = - RHSC; 729 } 730 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC, 731 ARM_AM::no_shift), 732 SDLoc(N), MVT::i32); 733 return AM2_BASE; 734 } 735 } 736 737 if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) { 738 // Compute R +/- (R << N) and reuse it. 739 Base = N; 740 Offset = CurDAG->getRegister(0, MVT::i32); 741 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, 742 ARM_AM::no_shift), 743 SDLoc(N), MVT::i32); 744 return AM2_BASE; 745 } 746 747 // Otherwise this is R +/- [possibly shifted] R. 748 ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub; 749 ARM_AM::ShiftOpc ShOpcVal = 750 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 751 unsigned ShAmt = 0; 752 753 Base = N.getOperand(0); 754 Offset = N.getOperand(1); 755 756 if (ShOpcVal != ARM_AM::no_shift) { 757 // Check to see if the RHS of the shift is a constant, if not, we can't fold 758 // it. 759 if (ConstantSDNode *Sh = 760 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 761 ShAmt = Sh->getZExtValue(); 762 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 763 Offset = N.getOperand(1).getOperand(0); 764 else { 765 ShAmt = 0; 766 ShOpcVal = ARM_AM::no_shift; 767 } 768 } else { 769 ShOpcVal = ARM_AM::no_shift; 770 } 771 } 772 773 // Try matching (R shl C) + (R). 774 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 775 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 776 N.getOperand(0).hasOneUse())) { 777 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 778 if (ShOpcVal != ARM_AM::no_shift) { 779 // Check to see if the RHS of the shift is a constant, if not, we can't 780 // fold it. 781 if (ConstantSDNode *Sh = 782 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 783 ShAmt = Sh->getZExtValue(); 784 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 785 Offset = N.getOperand(0).getOperand(0); 786 Base = N.getOperand(1); 787 } else { 788 ShAmt = 0; 789 ShOpcVal = ARM_AM::no_shift; 790 } 791 } else { 792 ShOpcVal = ARM_AM::no_shift; 793 } 794 } 795 } 796 797 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 798 SDLoc(N), MVT::i32); 799 return AM2_SHOP; 800 } 801 802 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 803 SDValue &Offset, SDValue &Opc) { 804 unsigned Opcode = Op->getOpcode(); 805 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 806 ? cast<LoadSDNode>(Op)->getAddressingMode() 807 : cast<StoreSDNode>(Op)->getAddressingMode(); 808 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 809 ? ARM_AM::add : ARM_AM::sub; 810 int Val; 811 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 812 return false; 813 814 Offset = N; 815 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 816 unsigned ShAmt = 0; 817 if (ShOpcVal != ARM_AM::no_shift) { 818 // Check to see if the RHS of the shift is a constant, if not, we can't fold 819 // it. 820 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 821 ShAmt = Sh->getZExtValue(); 822 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 823 Offset = N.getOperand(0); 824 else { 825 ShAmt = 0; 826 ShOpcVal = ARM_AM::no_shift; 827 } 828 } else { 829 ShOpcVal = ARM_AM::no_shift; 830 } 831 } 832 833 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 834 SDLoc(N), MVT::i32); 835 return true; 836 } 837 838 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 839 SDValue &Offset, SDValue &Opc) { 840 unsigned Opcode = Op->getOpcode(); 841 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 842 ? cast<LoadSDNode>(Op)->getAddressingMode() 843 : cast<StoreSDNode>(Op)->getAddressingMode(); 844 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 845 ? ARM_AM::add : ARM_AM::sub; 846 int Val; 847 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 848 if (AddSub == ARM_AM::sub) Val *= -1; 849 Offset = CurDAG->getRegister(0, MVT::i32); 850 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 851 return true; 852 } 853 854 return false; 855 } 856 857 858 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 859 SDValue &Offset, SDValue &Opc) { 860 unsigned Opcode = Op->getOpcode(); 861 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 862 ? cast<LoadSDNode>(Op)->getAddressingMode() 863 : cast<StoreSDNode>(Op)->getAddressingMode(); 864 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 865 ? ARM_AM::add : ARM_AM::sub; 866 int Val; 867 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 868 Offset = CurDAG->getRegister(0, MVT::i32); 869 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 870 ARM_AM::no_shift), 871 SDLoc(Op), MVT::i32); 872 return true; 873 } 874 875 return false; 876 } 877 878 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 879 Base = N; 880 return true; 881 } 882 883 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 884 SDValue &Base, SDValue &Offset, 885 SDValue &Opc) { 886 if (N.getOpcode() == ISD::SUB) { 887 // X - C is canonicalize to X + -C, no need to handle it here. 888 Base = N.getOperand(0); 889 Offset = N.getOperand(1); 890 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 891 MVT::i32); 892 return true; 893 } 894 895 if (!CurDAG->isBaseWithConstantOffset(N)) { 896 Base = N; 897 if (N.getOpcode() == ISD::FrameIndex) { 898 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 899 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 900 } 901 Offset = CurDAG->getRegister(0, MVT::i32); 902 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 903 MVT::i32); 904 return true; 905 } 906 907 // If the RHS is +/- imm8, fold into addr mode. 908 int RHSC; 909 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 910 -256 + 1, 256, RHSC)) { // 8 bits. 911 Base = N.getOperand(0); 912 if (Base.getOpcode() == ISD::FrameIndex) { 913 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 914 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 915 } 916 Offset = CurDAG->getRegister(0, MVT::i32); 917 918 ARM_AM::AddrOpc AddSub = ARM_AM::add; 919 if (RHSC < 0) { 920 AddSub = ARM_AM::sub; 921 RHSC = -RHSC; 922 } 923 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 924 MVT::i32); 925 return true; 926 } 927 928 Base = N.getOperand(0); 929 Offset = N.getOperand(1); 930 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 931 MVT::i32); 932 return true; 933 } 934 935 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 936 SDValue &Offset, SDValue &Opc) { 937 unsigned Opcode = Op->getOpcode(); 938 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 939 ? cast<LoadSDNode>(Op)->getAddressingMode() 940 : cast<StoreSDNode>(Op)->getAddressingMode(); 941 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 942 ? ARM_AM::add : ARM_AM::sub; 943 int Val; 944 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 945 Offset = CurDAG->getRegister(0, MVT::i32); 946 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 947 MVT::i32); 948 return true; 949 } 950 951 Offset = N; 952 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 953 MVT::i32); 954 return true; 955 } 956 957 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 958 SDValue &Base, SDValue &Offset) { 959 if (!CurDAG->isBaseWithConstantOffset(N)) { 960 Base = N; 961 if (N.getOpcode() == ISD::FrameIndex) { 962 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 963 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 964 } else if (N.getOpcode() == ARMISD::Wrapper && 965 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { 966 Base = N.getOperand(0); 967 } 968 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 969 SDLoc(N), MVT::i32); 970 return true; 971 } 972 973 // If the RHS is +/- imm8, fold into addr mode. 974 int RHSC; 975 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 976 -256 + 1, 256, RHSC)) { 977 Base = N.getOperand(0); 978 if (Base.getOpcode() == ISD::FrameIndex) { 979 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 980 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 981 } 982 983 ARM_AM::AddrOpc AddSub = ARM_AM::add; 984 if (RHSC < 0) { 985 AddSub = ARM_AM::sub; 986 RHSC = -RHSC; 987 } 988 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 989 SDLoc(N), MVT::i32); 990 return true; 991 } 992 993 Base = N; 994 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 995 SDLoc(N), MVT::i32); 996 return true; 997 } 998 999 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1000 SDValue &Align) { 1001 Addr = N; 1002 1003 unsigned Alignment = 0; 1004 1005 MemSDNode *MemN = cast<MemSDNode>(Parent); 1006 1007 if (isa<LSBaseSDNode>(MemN) || 1008 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1009 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1010 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1011 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1012 // The maximum alignment is equal to the memory size being referenced. 1013 unsigned MMOAlign = MemN->getAlignment(); 1014 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1015 if (MMOAlign >= MemSize && MemSize > 1) 1016 Alignment = MemSize; 1017 } else { 1018 // All other uses of addrmode6 are for intrinsics. For now just record 1019 // the raw alignment value; it will be refined later based on the legal 1020 // alignment operands for the intrinsic. 1021 Alignment = MemN->getAlignment(); 1022 } 1023 1024 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1025 return true; 1026 } 1027 1028 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1029 SDValue &Offset) { 1030 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1031 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1032 if (AM != ISD::POST_INC) 1033 return false; 1034 Offset = N; 1035 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1036 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1037 Offset = CurDAG->getRegister(0, MVT::i32); 1038 } 1039 return true; 1040 } 1041 1042 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1043 SDValue &Offset, SDValue &Label) { 1044 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1045 Offset = N.getOperand(0); 1046 SDValue N1 = N.getOperand(1); 1047 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1048 SDLoc(N), MVT::i32); 1049 return true; 1050 } 1051 1052 return false; 1053 } 1054 1055 1056 //===----------------------------------------------------------------------===// 1057 // Thumb Addressing Modes 1058 //===----------------------------------------------------------------------===// 1059 1060 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, 1061 SDValue &Base, SDValue &Offset){ 1062 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1063 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1064 if (!NC || !NC->isNullValue()) 1065 return false; 1066 1067 Base = Offset = N; 1068 return true; 1069 } 1070 1071 Base = N.getOperand(0); 1072 Offset = N.getOperand(1); 1073 return true; 1074 } 1075 1076 bool 1077 ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base, 1078 SDValue &Offset, unsigned Scale) { 1079 if (Scale == 4) { 1080 SDValue TmpBase, TmpOffImm; 1081 if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm)) 1082 return false; // We want to select tLDRspi / tSTRspi instead. 1083 1084 if (N.getOpcode() == ARMISD::Wrapper && 1085 N.getOperand(0).getOpcode() == ISD::TargetConstantPool) 1086 return false; // We want to select tLDRpci instead. 1087 } 1088 1089 if (!CurDAG->isBaseWithConstantOffset(N)) 1090 return false; 1091 1092 // Thumb does not have [sp, r] address mode. 1093 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); 1094 RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1)); 1095 if ((LHSR && LHSR->getReg() == ARM::SP) || 1096 (RHSR && RHSR->getReg() == ARM::SP)) 1097 return false; 1098 1099 // FIXME: Why do we explicitly check for a match here and then return false? 1100 // Presumably to allow something else to match, but shouldn't this be 1101 // documented? 1102 int RHSC; 1103 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) 1104 return false; 1105 1106 Base = N.getOperand(0); 1107 Offset = N.getOperand(1); 1108 return true; 1109 } 1110 1111 bool 1112 ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N, 1113 SDValue &Base, 1114 SDValue &Offset) { 1115 return SelectThumbAddrModeRI(N, Base, Offset, 1); 1116 } 1117 1118 bool 1119 ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N, 1120 SDValue &Base, 1121 SDValue &Offset) { 1122 return SelectThumbAddrModeRI(N, Base, Offset, 2); 1123 } 1124 1125 bool 1126 ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N, 1127 SDValue &Base, 1128 SDValue &Offset) { 1129 return SelectThumbAddrModeRI(N, Base, Offset, 4); 1130 } 1131 1132 bool 1133 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1134 SDValue &Base, SDValue &OffImm) { 1135 if (Scale == 4) { 1136 SDValue TmpBase, TmpOffImm; 1137 if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm)) 1138 return false; // We want to select tLDRspi / tSTRspi instead. 1139 1140 if (N.getOpcode() == ARMISD::Wrapper && 1141 N.getOperand(0).getOpcode() == ISD::TargetConstantPool) 1142 return false; // We want to select tLDRpci instead. 1143 } 1144 1145 if (!CurDAG->isBaseWithConstantOffset(N)) { 1146 if (N.getOpcode() == ARMISD::Wrapper && 1147 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { 1148 Base = N.getOperand(0); 1149 } else { 1150 Base = N; 1151 } 1152 1153 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1154 return true; 1155 } 1156 1157 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); 1158 RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1)); 1159 if ((LHSR && LHSR->getReg() == ARM::SP) || 1160 (RHSR && RHSR->getReg() == ARM::SP)) { 1161 ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0)); 1162 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1163 unsigned LHSC = LHS ? LHS->getZExtValue() : 0; 1164 unsigned RHSC = RHS ? RHS->getZExtValue() : 0; 1165 1166 // Thumb does not have [sp, #imm5] address mode for non-zero imm5. 1167 if (LHSC != 0 || RHSC != 0) return false; 1168 1169 Base = N; 1170 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1171 return true; 1172 } 1173 1174 // If the RHS is + imm5 * scale, fold into addr mode. 1175 int RHSC; 1176 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1177 Base = N.getOperand(0); 1178 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1179 return true; 1180 } 1181 1182 Base = N.getOperand(0); 1183 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1184 return true; 1185 } 1186 1187 bool 1188 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1189 SDValue &OffImm) { 1190 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1191 } 1192 1193 bool 1194 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1195 SDValue &OffImm) { 1196 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1197 } 1198 1199 bool 1200 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1201 SDValue &OffImm) { 1202 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1203 } 1204 1205 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1206 SDValue &Base, SDValue &OffImm) { 1207 if (N.getOpcode() == ISD::FrameIndex) { 1208 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1209 // Only multiples of 4 are allowed for the offset, so the frame object 1210 // alignment must be at least 4. 1211 MachineFrameInfo *MFI = MF->getFrameInfo(); 1212 if (MFI->getObjectAlignment(FI) < 4) 1213 MFI->setObjectAlignment(FI, 4); 1214 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 1215 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1216 return true; 1217 } 1218 1219 if (!CurDAG->isBaseWithConstantOffset(N)) 1220 return false; 1221 1222 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); 1223 if (N.getOperand(0).getOpcode() == ISD::FrameIndex || 1224 (LHSR && LHSR->getReg() == ARM::SP)) { 1225 // If the RHS is + imm8 * scale, fold into addr mode. 1226 int RHSC; 1227 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1228 Base = N.getOperand(0); 1229 if (Base.getOpcode() == ISD::FrameIndex) { 1230 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1231 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1232 // indexed by the LHS must be 4-byte aligned. 1233 MachineFrameInfo *MFI = MF->getFrameInfo(); 1234 if (MFI->getObjectAlignment(FI) < 4) 1235 MFI->setObjectAlignment(FI, 4); 1236 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 1237 } 1238 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1239 return true; 1240 } 1241 } 1242 1243 return false; 1244 } 1245 1246 1247 //===----------------------------------------------------------------------===// 1248 // Thumb 2 Addressing Modes 1249 //===----------------------------------------------------------------------===// 1250 1251 1252 bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg, 1253 SDValue &Opc) { 1254 if (DisableShifterOp) 1255 return false; 1256 1257 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 1258 1259 // Don't match base register only case. That is matched to a separate 1260 // lower complexity pattern with explicit register operand. 1261 if (ShOpcVal == ARM_AM::no_shift) return false; 1262 1263 BaseReg = N.getOperand(0); 1264 unsigned ShImmVal = 0; 1265 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1266 ShImmVal = RHS->getZExtValue() & 31; 1267 Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), SDLoc(N)); 1268 return true; 1269 } 1270 1271 return false; 1272 } 1273 1274 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1275 SDValue &Base, SDValue &OffImm) { 1276 // Match simple R + imm12 operands. 1277 1278 // Base only. 1279 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1280 !CurDAG->isBaseWithConstantOffset(N)) { 1281 if (N.getOpcode() == ISD::FrameIndex) { 1282 // Match frame index. 1283 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1284 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 1285 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1286 return true; 1287 } 1288 1289 if (N.getOpcode() == ARMISD::Wrapper && 1290 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { 1291 Base = N.getOperand(0); 1292 if (Base.getOpcode() == ISD::TargetConstantPool) 1293 return false; // We want to select t2LDRpci instead. 1294 } else 1295 Base = N; 1296 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1297 return true; 1298 } 1299 1300 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1301 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1302 // Let t2LDRi8 handle (R - imm8). 1303 return false; 1304 1305 int RHSC = (int)RHS->getZExtValue(); 1306 if (N.getOpcode() == ISD::SUB) 1307 RHSC = -RHSC; 1308 1309 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1310 Base = N.getOperand(0); 1311 if (Base.getOpcode() == ISD::FrameIndex) { 1312 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1313 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 1314 } 1315 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1316 return true; 1317 } 1318 } 1319 1320 // Base only. 1321 Base = N; 1322 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1323 return true; 1324 } 1325 1326 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1327 SDValue &Base, SDValue &OffImm) { 1328 // Match simple R - imm8 operands. 1329 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1330 !CurDAG->isBaseWithConstantOffset(N)) 1331 return false; 1332 1333 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1334 int RHSC = (int)RHS->getSExtValue(); 1335 if (N.getOpcode() == ISD::SUB) 1336 RHSC = -RHSC; 1337 1338 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1339 Base = N.getOperand(0); 1340 if (Base.getOpcode() == ISD::FrameIndex) { 1341 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1342 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 1343 } 1344 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1345 return true; 1346 } 1347 } 1348 1349 return false; 1350 } 1351 1352 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1353 SDValue &OffImm){ 1354 unsigned Opcode = Op->getOpcode(); 1355 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1356 ? cast<LoadSDNode>(Op)->getAddressingMode() 1357 : cast<StoreSDNode>(Op)->getAddressingMode(); 1358 int RHSC; 1359 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1360 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1361 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1362 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1363 return true; 1364 } 1365 1366 return false; 1367 } 1368 1369 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1370 SDValue &Base, 1371 SDValue &OffReg, SDValue &ShImm) { 1372 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1373 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1374 return false; 1375 1376 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1377 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1378 int RHSC = (int)RHS->getZExtValue(); 1379 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1380 return false; 1381 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1382 return false; 1383 } 1384 1385 // Look for (R + R) or (R + (R << [1,2,3])). 1386 unsigned ShAmt = 0; 1387 Base = N.getOperand(0); 1388 OffReg = N.getOperand(1); 1389 1390 // Swap if it is ((R << c) + R). 1391 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1392 if (ShOpcVal != ARM_AM::lsl) { 1393 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1394 if (ShOpcVal == ARM_AM::lsl) 1395 std::swap(Base, OffReg); 1396 } 1397 1398 if (ShOpcVal == ARM_AM::lsl) { 1399 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1400 // it. 1401 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1402 ShAmt = Sh->getZExtValue(); 1403 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1404 OffReg = OffReg.getOperand(0); 1405 else { 1406 ShAmt = 0; 1407 } 1408 } 1409 } 1410 1411 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1412 1413 return true; 1414 } 1415 1416 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1417 SDValue &OffImm) { 1418 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1419 // instructions. 1420 Base = N; 1421 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1422 1423 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1424 return true; 1425 1426 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1427 if (!RHS) 1428 return true; 1429 1430 uint32_t RHSC = (int)RHS->getZExtValue(); 1431 if (RHSC > 1020 || RHSC % 4 != 0) 1432 return true; 1433 1434 Base = N.getOperand(0); 1435 if (Base.getOpcode() == ISD::FrameIndex) { 1436 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1437 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 1438 } 1439 1440 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1441 return true; 1442 } 1443 1444 //===--------------------------------------------------------------------===// 1445 1446 /// getAL - Returns a ARMCC::AL immediate node. 1447 static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) { 1448 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1449 } 1450 1451 SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { 1452 LoadSDNode *LD = cast<LoadSDNode>(N); 1453 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1454 if (AM == ISD::UNINDEXED) 1455 return nullptr; 1456 1457 EVT LoadedVT = LD->getMemoryVT(); 1458 SDValue Offset, AMOpc; 1459 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1460 unsigned Opcode = 0; 1461 bool Match = false; 1462 if (LoadedVT == MVT::i32 && isPre && 1463 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1464 Opcode = ARM::LDR_PRE_IMM; 1465 Match = true; 1466 } else if (LoadedVT == MVT::i32 && !isPre && 1467 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1468 Opcode = ARM::LDR_POST_IMM; 1469 Match = true; 1470 } else if (LoadedVT == MVT::i32 && 1471 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1472 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1473 Match = true; 1474 1475 } else if (LoadedVT == MVT::i16 && 1476 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1477 Match = true; 1478 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1479 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1480 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1481 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1482 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1483 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1484 Match = true; 1485 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1486 } 1487 } else { 1488 if (isPre && 1489 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1490 Match = true; 1491 Opcode = ARM::LDRB_PRE_IMM; 1492 } else if (!isPre && 1493 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1494 Match = true; 1495 Opcode = ARM::LDRB_POST_IMM; 1496 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1497 Match = true; 1498 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1499 } 1500 } 1501 } 1502 1503 if (Match) { 1504 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1505 SDValue Chain = LD->getChain(); 1506 SDValue Base = LD->getBasePtr(); 1507 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1508 CurDAG->getRegister(0, MVT::i32), Chain }; 1509 return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1510 MVT::i32, MVT::Other, Ops); 1511 } else { 1512 SDValue Chain = LD->getChain(); 1513 SDValue Base = LD->getBasePtr(); 1514 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1515 CurDAG->getRegister(0, MVT::i32), Chain }; 1516 return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1517 MVT::i32, MVT::Other, Ops); 1518 } 1519 } 1520 1521 return nullptr; 1522 } 1523 1524 SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { 1525 LoadSDNode *LD = cast<LoadSDNode>(N); 1526 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1527 if (AM == ISD::UNINDEXED) 1528 return nullptr; 1529 1530 EVT LoadedVT = LD->getMemoryVT(); 1531 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1532 SDValue Offset; 1533 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1534 unsigned Opcode = 0; 1535 bool Match = false; 1536 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1537 switch (LoadedVT.getSimpleVT().SimpleTy) { 1538 case MVT::i32: 1539 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1540 break; 1541 case MVT::i16: 1542 if (isSExtLd) 1543 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1544 else 1545 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1546 break; 1547 case MVT::i8: 1548 case MVT::i1: 1549 if (isSExtLd) 1550 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1551 else 1552 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1553 break; 1554 default: 1555 return nullptr; 1556 } 1557 Match = true; 1558 } 1559 1560 if (Match) { 1561 SDValue Chain = LD->getChain(); 1562 SDValue Base = LD->getBasePtr(); 1563 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1564 CurDAG->getRegister(0, MVT::i32), Chain }; 1565 return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1566 MVT::Other, Ops); 1567 } 1568 1569 return nullptr; 1570 } 1571 1572 /// \brief Form a GPRPair pseudo register from a pair of GPR regs. 1573 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1574 SDLoc dl(V0.getNode()); 1575 SDValue RegClass = 1576 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1577 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1578 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1579 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1580 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1581 } 1582 1583 /// \brief Form a D register from a pair of S registers. 1584 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1585 SDLoc dl(V0.getNode()); 1586 SDValue RegClass = 1587 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1588 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1589 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1590 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1591 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1592 } 1593 1594 /// \brief Form a quad register from a pair of D registers. 1595 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1596 SDLoc dl(V0.getNode()); 1597 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1598 MVT::i32); 1599 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1600 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1601 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1602 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1603 } 1604 1605 /// \brief Form 4 consecutive D registers from a pair of Q registers. 1606 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1607 SDLoc dl(V0.getNode()); 1608 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1609 MVT::i32); 1610 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1611 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1612 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1613 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1614 } 1615 1616 /// \brief Form 4 consecutive S registers. 1617 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1618 SDValue V2, SDValue V3) { 1619 SDLoc dl(V0.getNode()); 1620 SDValue RegClass = 1621 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1622 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1623 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1624 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1625 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1626 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1627 V2, SubReg2, V3, SubReg3 }; 1628 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1629 } 1630 1631 /// \brief Form 4 consecutive D registers. 1632 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1633 SDValue V2, SDValue V3) { 1634 SDLoc dl(V0.getNode()); 1635 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1636 MVT::i32); 1637 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1638 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1639 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1640 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1641 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1642 V2, SubReg2, V3, SubReg3 }; 1643 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1644 } 1645 1646 /// \brief Form 4 consecutive Q registers. 1647 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1648 SDValue V2, SDValue V3) { 1649 SDLoc dl(V0.getNode()); 1650 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1651 MVT::i32); 1652 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1653 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1654 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1655 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1656 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1657 V2, SubReg2, V3, SubReg3 }; 1658 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1659 } 1660 1661 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1662 /// of a NEON VLD or VST instruction. The supported values depend on the 1663 /// number of registers being loaded. 1664 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl, 1665 unsigned NumVecs, bool is64BitVector) { 1666 unsigned NumRegs = NumVecs; 1667 if (!is64BitVector && NumVecs < 3) 1668 NumRegs *= 2; 1669 1670 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1671 if (Alignment >= 32 && NumRegs == 4) 1672 Alignment = 32; 1673 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1674 Alignment = 16; 1675 else if (Alignment >= 8) 1676 Alignment = 8; 1677 else 1678 Alignment = 0; 1679 1680 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1681 } 1682 1683 static bool isVLDfixed(unsigned Opc) 1684 { 1685 switch (Opc) { 1686 default: return false; 1687 case ARM::VLD1d8wb_fixed : return true; 1688 case ARM::VLD1d16wb_fixed : return true; 1689 case ARM::VLD1d64Qwb_fixed : return true; 1690 case ARM::VLD1d32wb_fixed : return true; 1691 case ARM::VLD1d64wb_fixed : return true; 1692 case ARM::VLD1d64TPseudoWB_fixed : return true; 1693 case ARM::VLD1d64QPseudoWB_fixed : return true; 1694 case ARM::VLD1q8wb_fixed : return true; 1695 case ARM::VLD1q16wb_fixed : return true; 1696 case ARM::VLD1q32wb_fixed : return true; 1697 case ARM::VLD1q64wb_fixed : return true; 1698 case ARM::VLD2d8wb_fixed : return true; 1699 case ARM::VLD2d16wb_fixed : return true; 1700 case ARM::VLD2d32wb_fixed : return true; 1701 case ARM::VLD2q8PseudoWB_fixed : return true; 1702 case ARM::VLD2q16PseudoWB_fixed : return true; 1703 case ARM::VLD2q32PseudoWB_fixed : return true; 1704 case ARM::VLD2DUPd8wb_fixed : return true; 1705 case ARM::VLD2DUPd16wb_fixed : return true; 1706 case ARM::VLD2DUPd32wb_fixed : return true; 1707 } 1708 } 1709 1710 static bool isVSTfixed(unsigned Opc) 1711 { 1712 switch (Opc) { 1713 default: return false; 1714 case ARM::VST1d8wb_fixed : return true; 1715 case ARM::VST1d16wb_fixed : return true; 1716 case ARM::VST1d32wb_fixed : return true; 1717 case ARM::VST1d64wb_fixed : return true; 1718 case ARM::VST1q8wb_fixed : return true; 1719 case ARM::VST1q16wb_fixed : return true; 1720 case ARM::VST1q32wb_fixed : return true; 1721 case ARM::VST1q64wb_fixed : return true; 1722 case ARM::VST1d64TPseudoWB_fixed : return true; 1723 case ARM::VST1d64QPseudoWB_fixed : return true; 1724 case ARM::VST2d8wb_fixed : return true; 1725 case ARM::VST2d16wb_fixed : return true; 1726 case ARM::VST2d32wb_fixed : return true; 1727 case ARM::VST2q8PseudoWB_fixed : return true; 1728 case ARM::VST2q16PseudoWB_fixed : return true; 1729 case ARM::VST2q32PseudoWB_fixed : return true; 1730 } 1731 } 1732 1733 // Get the register stride update opcode of a VLD/VST instruction that 1734 // is otherwise equivalent to the given fixed stride updating instruction. 1735 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1736 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 1737 && "Incorrect fixed stride updating instruction."); 1738 switch (Opc) { 1739 default: break; 1740 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1741 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 1742 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 1743 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 1744 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 1745 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 1746 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 1747 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 1748 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 1749 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 1750 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 1751 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 1752 1753 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 1754 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 1755 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 1756 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 1757 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 1758 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 1759 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 1760 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 1761 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 1762 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 1763 1764 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 1765 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 1766 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 1767 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 1768 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 1769 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 1770 1771 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 1772 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 1773 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 1774 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 1775 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 1776 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 1777 1778 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 1779 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 1780 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 1781 } 1782 return Opc; // If not one we handle, return it unchanged. 1783 } 1784 1785 SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 1786 const uint16_t *DOpcodes, 1787 const uint16_t *QOpcodes0, 1788 const uint16_t *QOpcodes1) { 1789 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 1790 SDLoc dl(N); 1791 1792 SDValue MemAddr, Align; 1793 unsigned AddrOpIdx = isUpdating ? 1 : 2; 1794 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1795 return nullptr; 1796 1797 SDValue Chain = N->getOperand(0); 1798 EVT VT = N->getValueType(0); 1799 bool is64BitVector = VT.is64BitVector(); 1800 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 1801 1802 unsigned OpcodeIndex; 1803 switch (VT.getSimpleVT().SimpleTy) { 1804 default: llvm_unreachable("unhandled vld type"); 1805 // Double-register operations: 1806 case MVT::v8i8: OpcodeIndex = 0; break; 1807 case MVT::v4i16: OpcodeIndex = 1; break; 1808 case MVT::v2f32: 1809 case MVT::v2i32: OpcodeIndex = 2; break; 1810 case MVT::v1i64: OpcodeIndex = 3; break; 1811 // Quad-register operations: 1812 case MVT::v16i8: OpcodeIndex = 0; break; 1813 case MVT::v8i16: OpcodeIndex = 1; break; 1814 case MVT::v4f32: 1815 case MVT::v4i32: OpcodeIndex = 2; break; 1816 case MVT::v2f64: 1817 case MVT::v2i64: OpcodeIndex = 3; 1818 assert(NumVecs == 1 && "v2i64 type only supported for VLD1"); 1819 break; 1820 } 1821 1822 EVT ResTy; 1823 if (NumVecs == 1) 1824 ResTy = VT; 1825 else { 1826 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 1827 if (!is64BitVector) 1828 ResTyElts *= 2; 1829 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 1830 } 1831 std::vector<EVT> ResTys; 1832 ResTys.push_back(ResTy); 1833 if (isUpdating) 1834 ResTys.push_back(MVT::i32); 1835 ResTys.push_back(MVT::Other); 1836 1837 SDValue Pred = getAL(CurDAG, dl); 1838 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1839 SDNode *VLd; 1840 SmallVector<SDValue, 7> Ops; 1841 1842 // Double registers and VLD1/VLD2 quad registers are directly supported. 1843 if (is64BitVector || NumVecs <= 2) { 1844 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 1845 QOpcodes0[OpcodeIndex]); 1846 Ops.push_back(MemAddr); 1847 Ops.push_back(Align); 1848 if (isUpdating) { 1849 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1850 // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 1851 // case entirely when the rest are updated to that form, too. 1852 if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode())) 1853 Opc = getVLDSTRegisterUpdateOpcode(Opc); 1854 // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 1855 // check for that explicitly too. Horribly hacky, but temporary. 1856 if ((NumVecs > 2 && !isVLDfixed(Opc)) || 1857 !isa<ConstantSDNode>(Inc.getNode())) 1858 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); 1859 } 1860 Ops.push_back(Pred); 1861 Ops.push_back(Reg0); 1862 Ops.push_back(Chain); 1863 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1864 1865 } else { 1866 // Otherwise, quad registers are loaded with two separate instructions, 1867 // where one loads the even registers and the other loads the odd registers. 1868 EVT AddrTy = MemAddr.getValueType(); 1869 1870 // Load the even subregs. This is always an updating load, so that it 1871 // provides the address to the second load for the odd subregs. 1872 SDValue ImplDef = 1873 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 1874 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 1875 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 1876 ResTy, AddrTy, MVT::Other, OpsA); 1877 Chain = SDValue(VLdA, 2); 1878 1879 // Load the odd subregs. 1880 Ops.push_back(SDValue(VLdA, 1)); 1881 Ops.push_back(Align); 1882 if (isUpdating) { 1883 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1884 assert(isa<ConstantSDNode>(Inc.getNode()) && 1885 "only constant post-increment update allowed for VLD3/4"); 1886 (void)Inc; 1887 Ops.push_back(Reg0); 1888 } 1889 Ops.push_back(SDValue(VLdA, 0)); 1890 Ops.push_back(Pred); 1891 Ops.push_back(Reg0); 1892 Ops.push_back(Chain); 1893 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 1894 } 1895 1896 // Transfer memoperands. 1897 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1898 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1899 cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1); 1900 1901 if (NumVecs == 1) 1902 return VLd; 1903 1904 // Extract out the subregisters. 1905 SDValue SuperReg = SDValue(VLd, 0); 1906 assert(ARM::dsub_7 == ARM::dsub_0+7 && 1907 ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); 1908 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 1909 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 1910 ReplaceUses(SDValue(N, Vec), 1911 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 1912 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 1913 if (isUpdating) 1914 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 1915 return nullptr; 1916 } 1917 1918 SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 1919 const uint16_t *DOpcodes, 1920 const uint16_t *QOpcodes0, 1921 const uint16_t *QOpcodes1) { 1922 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 1923 SDLoc dl(N); 1924 1925 SDValue MemAddr, Align; 1926 unsigned AddrOpIdx = isUpdating ? 1 : 2; 1927 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 1928 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1929 return nullptr; 1930 1931 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1932 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1933 1934 SDValue Chain = N->getOperand(0); 1935 EVT VT = N->getOperand(Vec0Idx).getValueType(); 1936 bool is64BitVector = VT.is64BitVector(); 1937 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 1938 1939 unsigned OpcodeIndex; 1940 switch (VT.getSimpleVT().SimpleTy) { 1941 default: llvm_unreachable("unhandled vst type"); 1942 // Double-register operations: 1943 case MVT::v8i8: OpcodeIndex = 0; break; 1944 case MVT::v4i16: OpcodeIndex = 1; break; 1945 case MVT::v2f32: 1946 case MVT::v2i32: OpcodeIndex = 2; break; 1947 case MVT::v1i64: OpcodeIndex = 3; break; 1948 // Quad-register operations: 1949 case MVT::v16i8: OpcodeIndex = 0; break; 1950 case MVT::v8i16: OpcodeIndex = 1; break; 1951 case MVT::v4f32: 1952 case MVT::v4i32: OpcodeIndex = 2; break; 1953 case MVT::v2f64: 1954 case MVT::v2i64: OpcodeIndex = 3; 1955 assert(NumVecs == 1 && "v2i64 type only supported for VST1"); 1956 break; 1957 } 1958 1959 std::vector<EVT> ResTys; 1960 if (isUpdating) 1961 ResTys.push_back(MVT::i32); 1962 ResTys.push_back(MVT::Other); 1963 1964 SDValue Pred = getAL(CurDAG, dl); 1965 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1966 SmallVector<SDValue, 7> Ops; 1967 1968 // Double registers and VST1/VST2 quad registers are directly supported. 1969 if (is64BitVector || NumVecs <= 2) { 1970 SDValue SrcReg; 1971 if (NumVecs == 1) { 1972 SrcReg = N->getOperand(Vec0Idx); 1973 } else if (is64BitVector) { 1974 // Form a REG_SEQUENCE to force register allocation. 1975 SDValue V0 = N->getOperand(Vec0Idx + 0); 1976 SDValue V1 = N->getOperand(Vec0Idx + 1); 1977 if (NumVecs == 2) 1978 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 1979 else { 1980 SDValue V2 = N->getOperand(Vec0Idx + 2); 1981 // If it's a vst3, form a quad D-register and leave the last part as 1982 // an undef. 1983 SDValue V3 = (NumVecs == 3) 1984 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 1985 : N->getOperand(Vec0Idx + 3); 1986 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 1987 } 1988 } else { 1989 // Form a QQ register. 1990 SDValue Q0 = N->getOperand(Vec0Idx); 1991 SDValue Q1 = N->getOperand(Vec0Idx + 1); 1992 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 1993 } 1994 1995 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 1996 QOpcodes0[OpcodeIndex]); 1997 Ops.push_back(MemAddr); 1998 Ops.push_back(Align); 1999 if (isUpdating) { 2000 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2001 // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 2002 // case entirely when the rest are updated to that form, too. 2003 if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) 2004 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2005 // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so 2006 // check for that explicitly too. Horribly hacky, but temporary. 2007 if (!isa<ConstantSDNode>(Inc.getNode())) 2008 Ops.push_back(Inc); 2009 else if (NumVecs > 2 && !isVSTfixed(Opc)) 2010 Ops.push_back(Reg0); 2011 } 2012 Ops.push_back(SrcReg); 2013 Ops.push_back(Pred); 2014 Ops.push_back(Reg0); 2015 Ops.push_back(Chain); 2016 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2017 2018 // Transfer memoperands. 2019 cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); 2020 2021 return VSt; 2022 } 2023 2024 // Otherwise, quad registers are stored with two separate instructions, 2025 // where one stores the even registers and the other stores the odd registers. 2026 2027 // Form the QQQQ REG_SEQUENCE. 2028 SDValue V0 = N->getOperand(Vec0Idx + 0); 2029 SDValue V1 = N->getOperand(Vec0Idx + 1); 2030 SDValue V2 = N->getOperand(Vec0Idx + 2); 2031 SDValue V3 = (NumVecs == 3) 2032 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2033 : N->getOperand(Vec0Idx + 3); 2034 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2035 2036 // Store the even D registers. This is always an updating store, so that it 2037 // provides the address to the second store for the odd subregs. 2038 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2039 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2040 MemAddr.getValueType(), 2041 MVT::Other, OpsA); 2042 cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1); 2043 Chain = SDValue(VStA, 1); 2044 2045 // Store the odd D registers. 2046 Ops.push_back(SDValue(VStA, 0)); 2047 Ops.push_back(Align); 2048 if (isUpdating) { 2049 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2050 assert(isa<ConstantSDNode>(Inc.getNode()) && 2051 "only constant post-increment update allowed for VST3/4"); 2052 (void)Inc; 2053 Ops.push_back(Reg0); 2054 } 2055 Ops.push_back(RegSeq); 2056 Ops.push_back(Pred); 2057 Ops.push_back(Reg0); 2058 Ops.push_back(Chain); 2059 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2060 Ops); 2061 cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1); 2062 return VStB; 2063 } 2064 2065 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, 2066 bool isUpdating, unsigned NumVecs, 2067 const uint16_t *DOpcodes, 2068 const uint16_t *QOpcodes) { 2069 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2070 SDLoc dl(N); 2071 2072 SDValue MemAddr, Align; 2073 unsigned AddrOpIdx = isUpdating ? 1 : 2; 2074 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2075 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2076 return nullptr; 2077 2078 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2079 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2080 2081 SDValue Chain = N->getOperand(0); 2082 unsigned Lane = 2083 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2084 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2085 bool is64BitVector = VT.is64BitVector(); 2086 2087 unsigned Alignment = 0; 2088 if (NumVecs != 3) { 2089 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2090 unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8; 2091 if (Alignment > NumBytes) 2092 Alignment = NumBytes; 2093 if (Alignment < 8 && Alignment < NumBytes) 2094 Alignment = 0; 2095 // Alignment must be a power of two; make sure of that. 2096 Alignment = (Alignment & -Alignment); 2097 if (Alignment == 1) 2098 Alignment = 0; 2099 } 2100 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2101 2102 unsigned OpcodeIndex; 2103 switch (VT.getSimpleVT().SimpleTy) { 2104 default: llvm_unreachable("unhandled vld/vst lane type"); 2105 // Double-register operations: 2106 case MVT::v8i8: OpcodeIndex = 0; break; 2107 case MVT::v4i16: OpcodeIndex = 1; break; 2108 case MVT::v2f32: 2109 case MVT::v2i32: OpcodeIndex = 2; break; 2110 // Quad-register operations: 2111 case MVT::v8i16: OpcodeIndex = 0; break; 2112 case MVT::v4f32: 2113 case MVT::v4i32: OpcodeIndex = 1; break; 2114 } 2115 2116 std::vector<EVT> ResTys; 2117 if (IsLoad) { 2118 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2119 if (!is64BitVector) 2120 ResTyElts *= 2; 2121 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2122 MVT::i64, ResTyElts)); 2123 } 2124 if (isUpdating) 2125 ResTys.push_back(MVT::i32); 2126 ResTys.push_back(MVT::Other); 2127 2128 SDValue Pred = getAL(CurDAG, dl); 2129 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2130 2131 SmallVector<SDValue, 8> Ops; 2132 Ops.push_back(MemAddr); 2133 Ops.push_back(Align); 2134 if (isUpdating) { 2135 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2136 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); 2137 } 2138 2139 SDValue SuperReg; 2140 SDValue V0 = N->getOperand(Vec0Idx + 0); 2141 SDValue V1 = N->getOperand(Vec0Idx + 1); 2142 if (NumVecs == 2) { 2143 if (is64BitVector) 2144 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2145 else 2146 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2147 } else { 2148 SDValue V2 = N->getOperand(Vec0Idx + 2); 2149 SDValue V3 = (NumVecs == 3) 2150 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2151 : N->getOperand(Vec0Idx + 3); 2152 if (is64BitVector) 2153 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2154 else 2155 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2156 } 2157 Ops.push_back(SuperReg); 2158 Ops.push_back(getI32Imm(Lane, dl)); 2159 Ops.push_back(Pred); 2160 Ops.push_back(Reg0); 2161 Ops.push_back(Chain); 2162 2163 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2164 QOpcodes[OpcodeIndex]); 2165 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2166 cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1); 2167 if (!IsLoad) 2168 return VLdLn; 2169 2170 // Extract the subregisters. 2171 SuperReg = SDValue(VLdLn, 0); 2172 assert(ARM::dsub_7 == ARM::dsub_0+7 && 2173 ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); 2174 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2175 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2176 ReplaceUses(SDValue(N, Vec), 2177 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2178 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2179 if (isUpdating) 2180 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2181 return nullptr; 2182 } 2183 2184 SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, 2185 unsigned NumVecs, 2186 const uint16_t *Opcodes) { 2187 assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2188 SDLoc dl(N); 2189 2190 SDValue MemAddr, Align; 2191 if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) 2192 return nullptr; 2193 2194 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2195 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2196 2197 SDValue Chain = N->getOperand(0); 2198 EVT VT = N->getValueType(0); 2199 2200 unsigned Alignment = 0; 2201 if (NumVecs != 3) { 2202 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2203 unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8; 2204 if (Alignment > NumBytes) 2205 Alignment = NumBytes; 2206 if (Alignment < 8 && Alignment < NumBytes) 2207 Alignment = 0; 2208 // Alignment must be a power of two; make sure of that. 2209 Alignment = (Alignment & -Alignment); 2210 if (Alignment == 1) 2211 Alignment = 0; 2212 } 2213 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2214 2215 unsigned OpcodeIndex; 2216 switch (VT.getSimpleVT().SimpleTy) { 2217 default: llvm_unreachable("unhandled vld-dup type"); 2218 case MVT::v8i8: OpcodeIndex = 0; break; 2219 case MVT::v4i16: OpcodeIndex = 1; break; 2220 case MVT::v2f32: 2221 case MVT::v2i32: OpcodeIndex = 2; break; 2222 } 2223 2224 SDValue Pred = getAL(CurDAG, dl); 2225 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2226 SDValue SuperReg; 2227 unsigned Opc = Opcodes[OpcodeIndex]; 2228 SmallVector<SDValue, 6> Ops; 2229 Ops.push_back(MemAddr); 2230 Ops.push_back(Align); 2231 if (isUpdating) { 2232 // fixed-stride update instructions don't have an explicit writeback 2233 // operand. It's implicit in the opcode itself. 2234 SDValue Inc = N->getOperand(2); 2235 if (!isa<ConstantSDNode>(Inc.getNode())) 2236 Ops.push_back(Inc); 2237 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2238 else if (NumVecs > 2) 2239 Ops.push_back(Reg0); 2240 } 2241 Ops.push_back(Pred); 2242 Ops.push_back(Reg0); 2243 Ops.push_back(Chain); 2244 2245 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2246 std::vector<EVT> ResTys; 2247 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts)); 2248 if (isUpdating) 2249 ResTys.push_back(MVT::i32); 2250 ResTys.push_back(MVT::Other); 2251 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2252 cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); 2253 SuperReg = SDValue(VLdDup, 0); 2254 2255 // Extract the subregisters. 2256 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2257 unsigned SubIdx = ARM::dsub_0; 2258 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2259 ReplaceUses(SDValue(N, Vec), 2260 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 2261 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 2262 if (isUpdating) 2263 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 2264 return nullptr; 2265 } 2266 2267 SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, 2268 unsigned Opc) { 2269 assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); 2270 SDLoc dl(N); 2271 EVT VT = N->getValueType(0); 2272 unsigned FirstTblReg = IsExt ? 2 : 1; 2273 2274 // Form a REG_SEQUENCE to force register allocation. 2275 SDValue RegSeq; 2276 SDValue V0 = N->getOperand(FirstTblReg + 0); 2277 SDValue V1 = N->getOperand(FirstTblReg + 1); 2278 if (NumVecs == 2) 2279 RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); 2280 else { 2281 SDValue V2 = N->getOperand(FirstTblReg + 2); 2282 // If it's a vtbl3, form a quad D-register and leave the last part as 2283 // an undef. 2284 SDValue V3 = (NumVecs == 3) 2285 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2286 : N->getOperand(FirstTblReg + 3); 2287 RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2288 } 2289 2290 SmallVector<SDValue, 6> Ops; 2291 if (IsExt) 2292 Ops.push_back(N->getOperand(1)); 2293 Ops.push_back(RegSeq); 2294 Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); 2295 Ops.push_back(getAL(CurDAG, dl)); // predicate 2296 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register 2297 return CurDAG->getMachineNode(Opc, dl, VT, Ops); 2298 } 2299 2300 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, 2301 bool isSigned) { 2302 if (!Subtarget->hasV6T2Ops()) 2303 return nullptr; 2304 2305 unsigned Opc = isSigned 2306 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 2307 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 2308 SDLoc dl(N); 2309 2310 // For unsigned extracts, check for a shift right and mask 2311 unsigned And_imm = 0; 2312 if (N->getOpcode() == ISD::AND) { 2313 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 2314 2315 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2316 if (And_imm & (And_imm + 1)) 2317 return nullptr; 2318 2319 unsigned Srl_imm = 0; 2320 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 2321 Srl_imm)) { 2322 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2323 2324 // Note: The width operand is encoded as width-1. 2325 unsigned Width = countTrailingOnes(And_imm) - 1; 2326 unsigned LSB = Srl_imm; 2327 2328 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2329 2330 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 2331 // It's cheaper to use a right shift to extract the top bits. 2332 if (Subtarget->isThumb()) { 2333 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 2334 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2335 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2336 getAL(CurDAG, dl), Reg0, Reg0 }; 2337 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2338 } 2339 2340 // ARM models shift instructions as MOVsi with shifter operand. 2341 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 2342 SDValue ShOpc = 2343 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 2344 MVT::i32); 2345 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 2346 getAL(CurDAG, dl), Reg0, Reg0 }; 2347 return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 2348 } 2349 2350 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2351 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2352 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2353 getAL(CurDAG, dl), Reg0 }; 2354 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2355 } 2356 } 2357 return nullptr; 2358 } 2359 2360 // Otherwise, we're looking for a shift of a shift 2361 unsigned Shl_imm = 0; 2362 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 2363 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 2364 unsigned Srl_imm = 0; 2365 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 2366 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2367 // Note: The width operand is encoded as width-1. 2368 unsigned Width = 32 - Srl_imm - 1; 2369 int LSB = Srl_imm - Shl_imm; 2370 if (LSB < 0) 2371 return nullptr; 2372 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2373 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2374 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2375 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2376 getAL(CurDAG, dl), Reg0 }; 2377 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2378 } 2379 } 2380 2381 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 2382 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2383 unsigned LSB = 0; 2384 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 2385 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 2386 return nullptr; 2387 2388 if (LSB + Width > 32) 2389 return nullptr; 2390 2391 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2392 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2393 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2394 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 2395 getAL(CurDAG, dl), Reg0 }; 2396 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2397 } 2398 2399 return nullptr; 2400 } 2401 2402 /// Target-specific DAG combining for ISD::XOR. 2403 /// Target-independent combining lowers SELECT_CC nodes of the form 2404 /// select_cc setg[ge] X, 0, X, -X 2405 /// select_cc setgt X, -1, X, -X 2406 /// select_cc setl[te] X, 0, -X, X 2407 /// select_cc setlt X, 1, -X, X 2408 /// which represent Integer ABS into: 2409 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 2410 /// ARM instruction selection detects the latter and matches it to 2411 /// ARM::ABS or ARM::t2ABS machine node. 2412 SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ 2413 SDValue XORSrc0 = N->getOperand(0); 2414 SDValue XORSrc1 = N->getOperand(1); 2415 EVT VT = N->getValueType(0); 2416 2417 if (Subtarget->isThumb1Only()) 2418 return nullptr; 2419 2420 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 2421 return nullptr; 2422 2423 SDValue ADDSrc0 = XORSrc0.getOperand(0); 2424 SDValue ADDSrc1 = XORSrc0.getOperand(1); 2425 SDValue SRASrc0 = XORSrc1.getOperand(0); 2426 SDValue SRASrc1 = XORSrc1.getOperand(1); 2427 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 2428 EVT XType = SRASrc0.getValueType(); 2429 unsigned Size = XType.getSizeInBits() - 1; 2430 2431 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 2432 XType.isInteger() && SRAConstant != nullptr && 2433 Size == SRAConstant->getZExtValue()) { 2434 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 2435 return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 2436 } 2437 2438 return nullptr; 2439 } 2440 2441 SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { 2442 // The only time a CONCAT_VECTORS operation can have legal types is when 2443 // two 64-bit vectors are concatenated to a 128-bit vector. 2444 EVT VT = N->getValueType(0); 2445 if (!VT.is128BitVector() || N->getNumOperands() != 2) 2446 llvm_unreachable("unexpected CONCAT_VECTORS"); 2447 return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); 2448 } 2449 2450 SDNode *ARMDAGToDAGISel::Select(SDNode *N) { 2451 SDLoc dl(N); 2452 2453 if (N->isMachineOpcode()) { 2454 N->setNodeId(-1); 2455 return nullptr; // Already selected. 2456 } 2457 2458 switch (N->getOpcode()) { 2459 default: break; 2460 case ISD::INLINEASM: { 2461 SDNode *ResNode = SelectInlineAsm(N); 2462 if (ResNode) 2463 return ResNode; 2464 break; 2465 } 2466 case ISD::XOR: { 2467 // Select special operations if XOR node forms integer ABS pattern 2468 SDNode *ResNode = SelectABSOp(N); 2469 if (ResNode) 2470 return ResNode; 2471 // Other cases are autogenerated. 2472 break; 2473 } 2474 case ISD::Constant: { 2475 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 2476 bool UseCP = true; 2477 if (Subtarget->useMovt(*MF)) 2478 // Thumb2-aware targets have the MOVT instruction, so all immediates can 2479 // be done with MOV + MOVT, at worst. 2480 UseCP = false; 2481 else { 2482 if (Subtarget->isThumb()) { 2483 UseCP = (Val > 255 && // MOV 2484 ~Val > 255 && // MOV + MVN 2485 !ARM_AM::isThumbImmShiftedVal(Val) && // MOV + LSL 2486 !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW 2487 } else 2488 UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV 2489 ARM_AM::getSOImmVal(~Val) == -1 && // MVN 2490 !ARM_AM::isSOImmTwoPartVal(Val) && // two instrs. 2491 !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW 2492 } 2493 2494 if (UseCP) { 2495 SDValue CPIdx = CurDAG->getTargetConstantPool( 2496 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 2497 TLI->getPointerTy()); 2498 2499 SDNode *ResNode; 2500 if (Subtarget->isThumb()) { 2501 SDValue Pred = getAL(CurDAG, dl); 2502 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2503 SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; 2504 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 2505 Ops); 2506 } else { 2507 SDValue Ops[] = { 2508 CPIdx, 2509 CurDAG->getTargetConstant(0, dl, MVT::i32), 2510 getAL(CurDAG, dl), 2511 CurDAG->getRegister(0, MVT::i32), 2512 CurDAG->getEntryNode() 2513 }; 2514 ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 2515 Ops); 2516 } 2517 ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); 2518 return nullptr; 2519 } 2520 2521 // Other cases are autogenerated. 2522 break; 2523 } 2524 case ISD::FrameIndex: { 2525 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 2526 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 2527 SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 2528 if (Subtarget->isThumb1Only()) { 2529 // Set the alignment of the frame object to 4, to avoid having to generate 2530 // more than one ADD 2531 MachineFrameInfo *MFI = MF->getFrameInfo(); 2532 if (MFI->getObjectAlignment(FI) < 4) 2533 MFI->setObjectAlignment(FI, 4); 2534 return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 2535 CurDAG->getTargetConstant(0, dl, MVT::i32)); 2536 } else { 2537 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 2538 ARM::t2ADDri : ARM::ADDri); 2539 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 2540 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2541 CurDAG->getRegister(0, MVT::i32) }; 2542 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2543 } 2544 } 2545 case ISD::SRL: 2546 if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) 2547 return I; 2548 break; 2549 case ISD::SIGN_EXTEND_INREG: 2550 case ISD::SRA: 2551 if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true)) 2552 return I; 2553 break; 2554 case ISD::MUL: 2555 if (Subtarget->isThumb1Only()) 2556 break; 2557 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 2558 unsigned RHSV = C->getZExtValue(); 2559 if (!RHSV) break; 2560 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 2561 unsigned ShImm = Log2_32(RHSV-1); 2562 if (ShImm >= 32) 2563 break; 2564 SDValue V = N->getOperand(0); 2565 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2566 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2567 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2568 if (Subtarget->isThumb()) { 2569 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2570 return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 2571 } else { 2572 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2573 Reg0 }; 2574 return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 2575 } 2576 } 2577 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 2578 unsigned ShImm = Log2_32(RHSV+1); 2579 if (ShImm >= 32) 2580 break; 2581 SDValue V = N->getOperand(0); 2582 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2583 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2584 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2585 if (Subtarget->isThumb()) { 2586 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2587 return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 2588 } else { 2589 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2590 Reg0 }; 2591 return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 2592 } 2593 } 2594 } 2595 break; 2596 case ISD::AND: { 2597 // Check for unsigned bitfield extract 2598 if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) 2599 return I; 2600 2601 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 2602 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 2603 // are entirely contributed by c2 and lower 16-bits are entirely contributed 2604 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 2605 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 2606 EVT VT = N->getValueType(0); 2607 if (VT != MVT::i32) 2608 break; 2609 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 2610 ? ARM::t2MOVTi16 2611 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 2612 if (!Opc) 2613 break; 2614 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2615 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2616 if (!N1C) 2617 break; 2618 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 2619 SDValue N2 = N0.getOperand(1); 2620 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 2621 if (!N2C) 2622 break; 2623 unsigned N1CVal = N1C->getZExtValue(); 2624 unsigned N2CVal = N2C->getZExtValue(); 2625 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 2626 (N1CVal & 0xffffU) == 0xffffU && 2627 (N2CVal & 0xffffU) == 0x0U) { 2628 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 2629 dl, MVT::i32); 2630 SDValue Ops[] = { N0.getOperand(0), Imm16, 2631 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 2632 return CurDAG->getMachineNode(Opc, dl, VT, Ops); 2633 } 2634 } 2635 break; 2636 } 2637 case ARMISD::VMOVRRD: 2638 return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, 2639 N->getOperand(0), getAL(CurDAG, dl), 2640 CurDAG->getRegister(0, MVT::i32)); 2641 case ISD::UMUL_LOHI: { 2642 if (Subtarget->isThumb1Only()) 2643 break; 2644 if (Subtarget->isThumb()) { 2645 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2646 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 2647 return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops); 2648 } else { 2649 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2650 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2651 CurDAG->getRegister(0, MVT::i32) }; 2652 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? 2653 ARM::UMULL : ARM::UMULLv5, 2654 dl, MVT::i32, MVT::i32, Ops); 2655 } 2656 } 2657 case ISD::SMUL_LOHI: { 2658 if (Subtarget->isThumb1Only()) 2659 break; 2660 if (Subtarget->isThumb()) { 2661 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2662 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 2663 return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops); 2664 } else { 2665 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2666 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2667 CurDAG->getRegister(0, MVT::i32) }; 2668 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? 2669 ARM::SMULL : ARM::SMULLv5, 2670 dl, MVT::i32, MVT::i32, Ops); 2671 } 2672 } 2673 case ARMISD::UMLAL:{ 2674 if (Subtarget->isThumb()) { 2675 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2676 N->getOperand(3), getAL(CurDAG, dl), 2677 CurDAG->getRegister(0, MVT::i32)}; 2678 return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops); 2679 }else{ 2680 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2681 N->getOperand(3), getAL(CurDAG, dl), 2682 CurDAG->getRegister(0, MVT::i32), 2683 CurDAG->getRegister(0, MVT::i32) }; 2684 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? 2685 ARM::UMLAL : ARM::UMLALv5, 2686 dl, MVT::i32, MVT::i32, Ops); 2687 } 2688 } 2689 case ARMISD::SMLAL:{ 2690 if (Subtarget->isThumb()) { 2691 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2692 N->getOperand(3), getAL(CurDAG, dl), 2693 CurDAG->getRegister(0, MVT::i32)}; 2694 return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops); 2695 }else{ 2696 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2697 N->getOperand(3), getAL(CurDAG, dl), 2698 CurDAG->getRegister(0, MVT::i32), 2699 CurDAG->getRegister(0, MVT::i32) }; 2700 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? 2701 ARM::SMLAL : ARM::SMLALv5, 2702 dl, MVT::i32, MVT::i32, Ops); 2703 } 2704 } 2705 case ISD::LOAD: { 2706 SDNode *ResNode = nullptr; 2707 if (Subtarget->isThumb() && Subtarget->hasThumb2()) 2708 ResNode = SelectT2IndexedLoad(N); 2709 else 2710 ResNode = SelectARMIndexedLoad(N); 2711 if (ResNode) 2712 return ResNode; 2713 // Other cases are autogenerated. 2714 break; 2715 } 2716 case ARMISD::BRCOND: { 2717 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 2718 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 2719 // Pattern complexity = 6 cost = 1 size = 0 2720 2721 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 2722 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 2723 // Pattern complexity = 6 cost = 1 size = 0 2724 2725 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 2726 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 2727 // Pattern complexity = 6 cost = 1 size = 0 2728 2729 unsigned Opc = Subtarget->isThumb() ? 2730 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 2731 SDValue Chain = N->getOperand(0); 2732 SDValue N1 = N->getOperand(1); 2733 SDValue N2 = N->getOperand(2); 2734 SDValue N3 = N->getOperand(3); 2735 SDValue InFlag = N->getOperand(4); 2736 assert(N1.getOpcode() == ISD::BasicBlock); 2737 assert(N2.getOpcode() == ISD::Constant); 2738 assert(N3.getOpcode() == ISD::Register); 2739 2740 SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) 2741 cast<ConstantSDNode>(N2)->getZExtValue()), dl, 2742 MVT::i32); 2743 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 2744 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 2745 MVT::Glue, Ops); 2746 Chain = SDValue(ResNode, 0); 2747 if (N->getNumValues() == 2) { 2748 InFlag = SDValue(ResNode, 1); 2749 ReplaceUses(SDValue(N, 1), InFlag); 2750 } 2751 ReplaceUses(SDValue(N, 0), 2752 SDValue(Chain.getNode(), Chain.getResNo())); 2753 return nullptr; 2754 } 2755 case ARMISD::VZIP: { 2756 unsigned Opc = 0; 2757 EVT VT = N->getValueType(0); 2758 switch (VT.getSimpleVT().SimpleTy) { 2759 default: return nullptr; 2760 case MVT::v8i8: Opc = ARM::VZIPd8; break; 2761 case MVT::v4i16: Opc = ARM::VZIPd16; break; 2762 case MVT::v2f32: 2763 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 2764 case MVT::v2i32: Opc = ARM::VTRNd32; break; 2765 case MVT::v16i8: Opc = ARM::VZIPq8; break; 2766 case MVT::v8i16: Opc = ARM::VZIPq16; break; 2767 case MVT::v4f32: 2768 case MVT::v4i32: Opc = ARM::VZIPq32; break; 2769 } 2770 SDValue Pred = getAL(CurDAG, dl); 2771 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2772 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 2773 return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); 2774 } 2775 case ARMISD::VUZP: { 2776 unsigned Opc = 0; 2777 EVT VT = N->getValueType(0); 2778 switch (VT.getSimpleVT().SimpleTy) { 2779 default: return nullptr; 2780 case MVT::v8i8: Opc = ARM::VUZPd8; break; 2781 case MVT::v4i16: Opc = ARM::VUZPd16; break; 2782 case MVT::v2f32: 2783 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 2784 case MVT::v2i32: Opc = ARM::VTRNd32; break; 2785 case MVT::v16i8: Opc = ARM::VUZPq8; break; 2786 case MVT::v8i16: Opc = ARM::VUZPq16; break; 2787 case MVT::v4f32: 2788 case MVT::v4i32: Opc = ARM::VUZPq32; break; 2789 } 2790 SDValue Pred = getAL(CurDAG, dl); 2791 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2792 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 2793 return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); 2794 } 2795 case ARMISD::VTRN: { 2796 unsigned Opc = 0; 2797 EVT VT = N->getValueType(0); 2798 switch (VT.getSimpleVT().SimpleTy) { 2799 default: return nullptr; 2800 case MVT::v8i8: Opc = ARM::VTRNd8; break; 2801 case MVT::v4i16: Opc = ARM::VTRNd16; break; 2802 case MVT::v2f32: 2803 case MVT::v2i32: Opc = ARM::VTRNd32; break; 2804 case MVT::v16i8: Opc = ARM::VTRNq8; break; 2805 case MVT::v8i16: Opc = ARM::VTRNq16; break; 2806 case MVT::v4f32: 2807 case MVT::v4i32: Opc = ARM::VTRNq32; break; 2808 } 2809 SDValue Pred = getAL(CurDAG, dl); 2810 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2811 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 2812 return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); 2813 } 2814 case ARMISD::BUILD_VECTOR: { 2815 EVT VecVT = N->getValueType(0); 2816 EVT EltVT = VecVT.getVectorElementType(); 2817 unsigned NumElts = VecVT.getVectorNumElements(); 2818 if (EltVT == MVT::f64) { 2819 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 2820 return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); 2821 } 2822 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 2823 if (NumElts == 2) 2824 return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); 2825 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 2826 return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 2827 N->getOperand(2), N->getOperand(3)); 2828 } 2829 2830 case ARMISD::VLD2DUP: { 2831 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 2832 ARM::VLD2DUPd32 }; 2833 return SelectVLDDup(N, false, 2, Opcodes); 2834 } 2835 2836 case ARMISD::VLD3DUP: { 2837 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 2838 ARM::VLD3DUPd16Pseudo, 2839 ARM::VLD3DUPd32Pseudo }; 2840 return SelectVLDDup(N, false, 3, Opcodes); 2841 } 2842 2843 case ARMISD::VLD4DUP: { 2844 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 2845 ARM::VLD4DUPd16Pseudo, 2846 ARM::VLD4DUPd32Pseudo }; 2847 return SelectVLDDup(N, false, 4, Opcodes); 2848 } 2849 2850 case ARMISD::VLD2DUP_UPD: { 2851 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 2852 ARM::VLD2DUPd16wb_fixed, 2853 ARM::VLD2DUPd32wb_fixed }; 2854 return SelectVLDDup(N, true, 2, Opcodes); 2855 } 2856 2857 case ARMISD::VLD3DUP_UPD: { 2858 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 2859 ARM::VLD3DUPd16Pseudo_UPD, 2860 ARM::VLD3DUPd32Pseudo_UPD }; 2861 return SelectVLDDup(N, true, 3, Opcodes); 2862 } 2863 2864 case ARMISD::VLD4DUP_UPD: { 2865 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 2866 ARM::VLD4DUPd16Pseudo_UPD, 2867 ARM::VLD4DUPd32Pseudo_UPD }; 2868 return SelectVLDDup(N, true, 4, Opcodes); 2869 } 2870 2871 case ARMISD::VLD1_UPD: { 2872 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 2873 ARM::VLD1d16wb_fixed, 2874 ARM::VLD1d32wb_fixed, 2875 ARM::VLD1d64wb_fixed }; 2876 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 2877 ARM::VLD1q16wb_fixed, 2878 ARM::VLD1q32wb_fixed, 2879 ARM::VLD1q64wb_fixed }; 2880 return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 2881 } 2882 2883 case ARMISD::VLD2_UPD: { 2884 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed, 2885 ARM::VLD2d16wb_fixed, 2886 ARM::VLD2d32wb_fixed, 2887 ARM::VLD1q64wb_fixed}; 2888 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, 2889 ARM::VLD2q16PseudoWB_fixed, 2890 ARM::VLD2q32PseudoWB_fixed }; 2891 return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 2892 } 2893 2894 case ARMISD::VLD3_UPD: { 2895 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 2896 ARM::VLD3d16Pseudo_UPD, 2897 ARM::VLD3d32Pseudo_UPD, 2898 ARM::VLD1d64TPseudoWB_fixed}; 2899 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 2900 ARM::VLD3q16Pseudo_UPD, 2901 ARM::VLD3q32Pseudo_UPD }; 2902 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 2903 ARM::VLD3q16oddPseudo_UPD, 2904 ARM::VLD3q32oddPseudo_UPD }; 2905 return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 2906 } 2907 2908 case ARMISD::VLD4_UPD: { 2909 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, 2910 ARM::VLD4d16Pseudo_UPD, 2911 ARM::VLD4d32Pseudo_UPD, 2912 ARM::VLD1d64QPseudoWB_fixed}; 2913 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 2914 ARM::VLD4q16Pseudo_UPD, 2915 ARM::VLD4q32Pseudo_UPD }; 2916 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, 2917 ARM::VLD4q16oddPseudo_UPD, 2918 ARM::VLD4q32oddPseudo_UPD }; 2919 return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 2920 } 2921 2922 case ARMISD::VLD2LN_UPD: { 2923 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 2924 ARM::VLD2LNd16Pseudo_UPD, 2925 ARM::VLD2LNd32Pseudo_UPD }; 2926 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 2927 ARM::VLD2LNq32Pseudo_UPD }; 2928 return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 2929 } 2930 2931 case ARMISD::VLD3LN_UPD: { 2932 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 2933 ARM::VLD3LNd16Pseudo_UPD, 2934 ARM::VLD3LNd32Pseudo_UPD }; 2935 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 2936 ARM::VLD3LNq32Pseudo_UPD }; 2937 return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 2938 } 2939 2940 case ARMISD::VLD4LN_UPD: { 2941 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 2942 ARM::VLD4LNd16Pseudo_UPD, 2943 ARM::VLD4LNd32Pseudo_UPD }; 2944 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 2945 ARM::VLD4LNq32Pseudo_UPD }; 2946 return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 2947 } 2948 2949 case ARMISD::VST1_UPD: { 2950 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 2951 ARM::VST1d16wb_fixed, 2952 ARM::VST1d32wb_fixed, 2953 ARM::VST1d64wb_fixed }; 2954 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 2955 ARM::VST1q16wb_fixed, 2956 ARM::VST1q32wb_fixed, 2957 ARM::VST1q64wb_fixed }; 2958 return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 2959 } 2960 2961 case ARMISD::VST2_UPD: { 2962 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed, 2963 ARM::VST2d16wb_fixed, 2964 ARM::VST2d32wb_fixed, 2965 ARM::VST1q64wb_fixed}; 2966 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, 2967 ARM::VST2q16PseudoWB_fixed, 2968 ARM::VST2q32PseudoWB_fixed }; 2969 return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 2970 } 2971 2972 case ARMISD::VST3_UPD: { 2973 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 2974 ARM::VST3d16Pseudo_UPD, 2975 ARM::VST3d32Pseudo_UPD, 2976 ARM::VST1d64TPseudoWB_fixed}; 2977 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 2978 ARM::VST3q16Pseudo_UPD, 2979 ARM::VST3q32Pseudo_UPD }; 2980 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 2981 ARM::VST3q16oddPseudo_UPD, 2982 ARM::VST3q32oddPseudo_UPD }; 2983 return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 2984 } 2985 2986 case ARMISD::VST4_UPD: { 2987 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD, 2988 ARM::VST4d16Pseudo_UPD, 2989 ARM::VST4d32Pseudo_UPD, 2990 ARM::VST1d64QPseudoWB_fixed}; 2991 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 2992 ARM::VST4q16Pseudo_UPD, 2993 ARM::VST4q32Pseudo_UPD }; 2994 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, 2995 ARM::VST4q16oddPseudo_UPD, 2996 ARM::VST4q32oddPseudo_UPD }; 2997 return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 2998 } 2999 3000 case ARMISD::VST2LN_UPD: { 3001 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 3002 ARM::VST2LNd16Pseudo_UPD, 3003 ARM::VST2LNd32Pseudo_UPD }; 3004 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 3005 ARM::VST2LNq32Pseudo_UPD }; 3006 return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 3007 } 3008 3009 case ARMISD::VST3LN_UPD: { 3010 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 3011 ARM::VST3LNd16Pseudo_UPD, 3012 ARM::VST3LNd32Pseudo_UPD }; 3013 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 3014 ARM::VST3LNq32Pseudo_UPD }; 3015 return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 3016 } 3017 3018 case ARMISD::VST4LN_UPD: { 3019 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 3020 ARM::VST4LNd16Pseudo_UPD, 3021 ARM::VST4LNd32Pseudo_UPD }; 3022 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 3023 ARM::VST4LNq32Pseudo_UPD }; 3024 return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 3025 } 3026 3027 case ISD::INTRINSIC_VOID: 3028 case ISD::INTRINSIC_W_CHAIN: { 3029 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 3030 switch (IntNo) { 3031 default: 3032 break; 3033 3034 case Intrinsic::arm_ldaexd: 3035 case Intrinsic::arm_ldrexd: { 3036 SDLoc dl(N); 3037 SDValue Chain = N->getOperand(0); 3038 SDValue MemAddr = N->getOperand(2); 3039 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 3040 3041 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 3042 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 3043 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 3044 3045 // arm_ldrexd returns a i64 value in {i32, i32} 3046 std::vector<EVT> ResTys; 3047 if (isThumb) { 3048 ResTys.push_back(MVT::i32); 3049 ResTys.push_back(MVT::i32); 3050 } else 3051 ResTys.push_back(MVT::Untyped); 3052 ResTys.push_back(MVT::Other); 3053 3054 // Place arguments in the right order. 3055 SmallVector<SDValue, 7> Ops; 3056 Ops.push_back(MemAddr); 3057 Ops.push_back(getAL(CurDAG, dl)); 3058 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3059 Ops.push_back(Chain); 3060 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3061 // Transfer memoperands. 3062 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3063 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3064 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); 3065 3066 // Remap uses. 3067 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 3068 if (!SDValue(N, 0).use_empty()) { 3069 SDValue Result; 3070 if (isThumb) 3071 Result = SDValue(Ld, 0); 3072 else { 3073 SDValue SubRegIdx = 3074 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 3075 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3076 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3077 Result = SDValue(ResNode,0); 3078 } 3079 ReplaceUses(SDValue(N, 0), Result); 3080 } 3081 if (!SDValue(N, 1).use_empty()) { 3082 SDValue Result; 3083 if (isThumb) 3084 Result = SDValue(Ld, 1); 3085 else { 3086 SDValue SubRegIdx = 3087 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 3088 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3089 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3090 Result = SDValue(ResNode,0); 3091 } 3092 ReplaceUses(SDValue(N, 1), Result); 3093 } 3094 ReplaceUses(SDValue(N, 2), OutChain); 3095 return nullptr; 3096 } 3097 case Intrinsic::arm_stlexd: 3098 case Intrinsic::arm_strexd: { 3099 SDLoc dl(N); 3100 SDValue Chain = N->getOperand(0); 3101 SDValue Val0 = N->getOperand(2); 3102 SDValue Val1 = N->getOperand(3); 3103 SDValue MemAddr = N->getOperand(4); 3104 3105 // Store exclusive double return a i32 value which is the return status 3106 // of the issued store. 3107 const EVT ResTys[] = {MVT::i32, MVT::Other}; 3108 3109 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 3110 // Place arguments in the right order. 3111 SmallVector<SDValue, 7> Ops; 3112 if (isThumb) { 3113 Ops.push_back(Val0); 3114 Ops.push_back(Val1); 3115 } else 3116 // arm_strexd uses GPRPair. 3117 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 3118 Ops.push_back(MemAddr); 3119 Ops.push_back(getAL(CurDAG, dl)); 3120 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3121 Ops.push_back(Chain); 3122 3123 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 3124 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 3125 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 3126 3127 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3128 // Transfer memoperands. 3129 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3130 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3131 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 3132 3133 return St; 3134 } 3135 3136 case Intrinsic::arm_neon_vld1: { 3137 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 3138 ARM::VLD1d32, ARM::VLD1d64 }; 3139 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 3140 ARM::VLD1q32, ARM::VLD1q64}; 3141 return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 3142 } 3143 3144 case Intrinsic::arm_neon_vld2: { 3145 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 3146 ARM::VLD2d32, ARM::VLD1q64 }; 3147 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 3148 ARM::VLD2q32Pseudo }; 3149 return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 3150 } 3151 3152 case Intrinsic::arm_neon_vld3: { 3153 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 3154 ARM::VLD3d16Pseudo, 3155 ARM::VLD3d32Pseudo, 3156 ARM::VLD1d64TPseudo }; 3157 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3158 ARM::VLD3q16Pseudo_UPD, 3159 ARM::VLD3q32Pseudo_UPD }; 3160 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 3161 ARM::VLD3q16oddPseudo, 3162 ARM::VLD3q32oddPseudo }; 3163 return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3164 } 3165 3166 case Intrinsic::arm_neon_vld4: { 3167 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 3168 ARM::VLD4d16Pseudo, 3169 ARM::VLD4d32Pseudo, 3170 ARM::VLD1d64QPseudo }; 3171 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3172 ARM::VLD4q16Pseudo_UPD, 3173 ARM::VLD4q32Pseudo_UPD }; 3174 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 3175 ARM::VLD4q16oddPseudo, 3176 ARM::VLD4q32oddPseudo }; 3177 return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3178 } 3179 3180 case Intrinsic::arm_neon_vld2lane: { 3181 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 3182 ARM::VLD2LNd16Pseudo, 3183 ARM::VLD2LNd32Pseudo }; 3184 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 3185 ARM::VLD2LNq32Pseudo }; 3186 return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 3187 } 3188 3189 case Intrinsic::arm_neon_vld3lane: { 3190 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 3191 ARM::VLD3LNd16Pseudo, 3192 ARM::VLD3LNd32Pseudo }; 3193 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 3194 ARM::VLD3LNq32Pseudo }; 3195 return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 3196 } 3197 3198 case Intrinsic::arm_neon_vld4lane: { 3199 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 3200 ARM::VLD4LNd16Pseudo, 3201 ARM::VLD4LNd32Pseudo }; 3202 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 3203 ARM::VLD4LNq32Pseudo }; 3204 return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 3205 } 3206 3207 case Intrinsic::arm_neon_vst1: { 3208 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 3209 ARM::VST1d32, ARM::VST1d64 }; 3210 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 3211 ARM::VST1q32, ARM::VST1q64 }; 3212 return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 3213 } 3214 3215 case Intrinsic::arm_neon_vst2: { 3216 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 3217 ARM::VST2d32, ARM::VST1q64 }; 3218 static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 3219 ARM::VST2q32Pseudo }; 3220 return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 3221 } 3222 3223 case Intrinsic::arm_neon_vst3: { 3224 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 3225 ARM::VST3d16Pseudo, 3226 ARM::VST3d32Pseudo, 3227 ARM::VST1d64TPseudo }; 3228 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3229 ARM::VST3q16Pseudo_UPD, 3230 ARM::VST3q32Pseudo_UPD }; 3231 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 3232 ARM::VST3q16oddPseudo, 3233 ARM::VST3q32oddPseudo }; 3234 return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3235 } 3236 3237 case Intrinsic::arm_neon_vst4: { 3238 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 3239 ARM::VST4d16Pseudo, 3240 ARM::VST4d32Pseudo, 3241 ARM::VST1d64QPseudo }; 3242 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3243 ARM::VST4q16Pseudo_UPD, 3244 ARM::VST4q32Pseudo_UPD }; 3245 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 3246 ARM::VST4q16oddPseudo, 3247 ARM::VST4q32oddPseudo }; 3248 return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3249 } 3250 3251 case Intrinsic::arm_neon_vst2lane: { 3252 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 3253 ARM::VST2LNd16Pseudo, 3254 ARM::VST2LNd32Pseudo }; 3255 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 3256 ARM::VST2LNq32Pseudo }; 3257 return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 3258 } 3259 3260 case Intrinsic::arm_neon_vst3lane: { 3261 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 3262 ARM::VST3LNd16Pseudo, 3263 ARM::VST3LNd32Pseudo }; 3264 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 3265 ARM::VST3LNq32Pseudo }; 3266 return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 3267 } 3268 3269 case Intrinsic::arm_neon_vst4lane: { 3270 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 3271 ARM::VST4LNd16Pseudo, 3272 ARM::VST4LNd32Pseudo }; 3273 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 3274 ARM::VST4LNq32Pseudo }; 3275 return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 3276 } 3277 } 3278 break; 3279 } 3280 3281 case ISD::INTRINSIC_WO_CHAIN: { 3282 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 3283 switch (IntNo) { 3284 default: 3285 break; 3286 3287 case Intrinsic::arm_neon_vtbl2: 3288 return SelectVTBL(N, false, 2, ARM::VTBL2); 3289 case Intrinsic::arm_neon_vtbl3: 3290 return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); 3291 case Intrinsic::arm_neon_vtbl4: 3292 return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); 3293 3294 case Intrinsic::arm_neon_vtbx2: 3295 return SelectVTBL(N, true, 2, ARM::VTBX2); 3296 case Intrinsic::arm_neon_vtbx3: 3297 return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); 3298 case Intrinsic::arm_neon_vtbx4: 3299 return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo); 3300 } 3301 break; 3302 } 3303 3304 case ARMISD::VTBL1: { 3305 SDLoc dl(N); 3306 EVT VT = N->getValueType(0); 3307 SmallVector<SDValue, 6> Ops; 3308 3309 Ops.push_back(N->getOperand(0)); 3310 Ops.push_back(N->getOperand(1)); 3311 Ops.push_back(getAL(CurDAG, dl)); // Predicate 3312 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register 3313 return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops); 3314 } 3315 case ARMISD::VTBL2: { 3316 SDLoc dl(N); 3317 EVT VT = N->getValueType(0); 3318 3319 // Form a REG_SEQUENCE to force register allocation. 3320 SDValue V0 = N->getOperand(0); 3321 SDValue V1 = N->getOperand(1); 3322 SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); 3323 3324 SmallVector<SDValue, 6> Ops; 3325 Ops.push_back(RegSeq); 3326 Ops.push_back(N->getOperand(2)); 3327 Ops.push_back(getAL(CurDAG, dl)); // Predicate 3328 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register 3329 return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops); 3330 } 3331 3332 case ISD::CONCAT_VECTORS: 3333 return SelectConcatVector(N); 3334 } 3335 3336 return SelectCode(N); 3337 } 3338 3339 SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ 3340 std::vector<SDValue> AsmNodeOperands; 3341 unsigned Flag, Kind; 3342 bool Changed = false; 3343 unsigned NumOps = N->getNumOperands(); 3344 3345 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 3346 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 3347 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 3348 // respectively. Since there is no constraint to explicitly specify a 3349 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 3350 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 3351 // them into a GPRPair. 3352 3353 SDLoc dl(N); 3354 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 3355 : SDValue(nullptr,0); 3356 3357 SmallVector<bool, 8> OpChanged; 3358 // Glue node will be appended late. 3359 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 3360 SDValue op = N->getOperand(i); 3361 AsmNodeOperands.push_back(op); 3362 3363 if (i < InlineAsm::Op_FirstOperand) 3364 continue; 3365 3366 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 3367 Flag = C->getZExtValue(); 3368 Kind = InlineAsm::getKind(Flag); 3369 } 3370 else 3371 continue; 3372 3373 // Immediate operands to inline asm in the SelectionDAG are modeled with 3374 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 3375 // the second is a constant with the value of the immediate. If we get here 3376 // and we have a Kind_Imm, skip the next operand, and continue. 3377 if (Kind == InlineAsm::Kind_Imm) { 3378 SDValue op = N->getOperand(++i); 3379 AsmNodeOperands.push_back(op); 3380 continue; 3381 } 3382 3383 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 3384 if (NumRegs) 3385 OpChanged.push_back(false); 3386 3387 unsigned DefIdx = 0; 3388 bool IsTiedToChangedOp = false; 3389 // If it's a use that is tied with a previous def, it has no 3390 // reg class constraint. 3391 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 3392 IsTiedToChangedOp = OpChanged[DefIdx]; 3393 3394 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 3395 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 3396 continue; 3397 3398 unsigned RC; 3399 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 3400 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 3401 || NumRegs != 2) 3402 continue; 3403 3404 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 3405 SDValue V0 = N->getOperand(i+1); 3406 SDValue V1 = N->getOperand(i+2); 3407 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 3408 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 3409 SDValue PairedReg; 3410 MachineRegisterInfo &MRI = MF->getRegInfo(); 3411 3412 if (Kind == InlineAsm::Kind_RegDef || 3413 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 3414 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 3415 // the original GPRs. 3416 3417 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 3418 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 3419 SDValue Chain = SDValue(N,0); 3420 3421 SDNode *GU = N->getGluedUser(); 3422 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 3423 Chain.getValue(1)); 3424 3425 // Extract values from a GPRPair reg and copy to the original GPR reg. 3426 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 3427 RegCopy); 3428 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 3429 RegCopy); 3430 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 3431 RegCopy.getValue(1)); 3432 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 3433 3434 // Update the original glue user. 3435 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 3436 Ops.push_back(T1.getValue(1)); 3437 CurDAG->UpdateNodeOperands(GU, Ops); 3438 } 3439 else { 3440 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 3441 // GPRPair and then pass the GPRPair to the inline asm. 3442 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 3443 3444 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 3445 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 3446 Chain.getValue(1)); 3447 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 3448 T0.getValue(1)); 3449 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 3450 3451 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 3452 // i32 VRs of inline asm with it. 3453 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 3454 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 3455 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 3456 3457 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 3458 Glue = Chain.getValue(1); 3459 } 3460 3461 Changed = true; 3462 3463 if(PairedReg.getNode()) { 3464 OpChanged[OpChanged.size() -1 ] = true; 3465 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 3466 if (IsTiedToChangedOp) 3467 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 3468 else 3469 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 3470 // Replace the current flag. 3471 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 3472 Flag, dl, MVT::i32); 3473 // Add the new register node and skip the original two GPRs. 3474 AsmNodeOperands.push_back(PairedReg); 3475 // Skip the next two GPRs. 3476 i += 2; 3477 } 3478 } 3479 3480 if (Glue.getNode()) 3481 AsmNodeOperands.push_back(Glue); 3482 if (!Changed) 3483 return nullptr; 3484 3485 SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), 3486 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 3487 New->setNodeId(-1); 3488 return New.getNode(); 3489 } 3490 3491 3492 bool ARMDAGToDAGISel:: 3493 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 3494 std::vector<SDValue> &OutOps) { 3495 assert(ConstraintID == InlineAsm::Constraint_m && 3496 "unexpected asm memory constraint"); 3497 // Require the address to be in a register. That is safe for all ARM 3498 // variants and it is hard to do anything much smarter without knowing 3499 // how the operand is used. 3500 OutOps.push_back(Op); 3501 return false; 3502 } 3503 3504 /// createARMISelDag - This pass converts a legalized DAG into a 3505 /// ARM-specific DAG, ready for instruction scheduling. 3506 /// 3507 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 3508 CodeGenOpt::Level OptLevel) { 3509 return new ARMDAGToDAGISel(TM, OptLevel); 3510 } 3511