1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the Base ARM implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARMBaseInstrInfo.h" 14 #include "ARMBaseRegisterInfo.h" 15 #include "ARMConstantPoolValue.h" 16 #include "ARMFeatures.h" 17 #include "ARMHazardRecognizer.h" 18 #include "ARMMachineFunctionInfo.h" 19 #include "ARMSubtarget.h" 20 #include "MCTargetDesc/ARMAddressingModes.h" 21 #include "MCTargetDesc/ARMBaseInfo.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallSet.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/Triple.h" 27 #include "llvm/CodeGen/LiveVariables.h" 28 #include "llvm/CodeGen/MachineBasicBlock.h" 29 #include "llvm/CodeGen/MachineConstantPool.h" 30 #include "llvm/CodeGen/MachineFrameInfo.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineInstr.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineMemOperand.h" 35 #include "llvm/CodeGen/MachineOperand.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" 38 #include "llvm/CodeGen/SelectionDAGNodes.h" 39 #include "llvm/CodeGen/TargetInstrInfo.h" 40 #include "llvm/CodeGen/TargetRegisterInfo.h" 41 #include "llvm/CodeGen/TargetSchedule.h" 42 #include "llvm/IR/Attributes.h" 43 #include "llvm/IR/Constants.h" 44 #include "llvm/IR/DebugLoc.h" 45 #include "llvm/IR/Function.h" 46 #include "llvm/IR/GlobalValue.h" 47 #include "llvm/MC/MCAsmInfo.h" 48 #include "llvm/MC/MCInstrDesc.h" 49 #include "llvm/MC/MCInstrItineraries.h" 50 #include "llvm/Support/BranchProbability.h" 51 #include "llvm/Support/Casting.h" 52 #include "llvm/Support/CommandLine.h" 53 #include "llvm/Support/Compiler.h" 54 #include "llvm/Support/Debug.h" 55 #include "llvm/Support/ErrorHandling.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include "llvm/Target/TargetMachine.h" 58 #include <algorithm> 59 #include <cassert> 60 #include <cstdint> 61 #include <iterator> 62 #include <new> 63 #include <utility> 64 #include <vector> 65 66 using namespace llvm; 67 68 #define DEBUG_TYPE "arm-instrinfo" 69 70 #define GET_INSTRINFO_CTOR_DTOR 71 #include "ARMGenInstrInfo.inc" 72 73 static cl::opt<bool> 74 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, 75 cl::desc("Enable ARM 2-addr to 3-addr conv")); 76 77 /// ARM_MLxEntry - Record information about MLA / MLS instructions. 78 struct ARM_MLxEntry { 79 uint16_t MLxOpc; // MLA / MLS opcode 80 uint16_t MulOpc; // Expanded multiplication opcode 81 uint16_t AddSubOpc; // Expanded add / sub opcode 82 bool NegAcc; // True if the acc is negated before the add / sub. 83 bool HasLane; // True if instruction has an extra "lane" operand. 84 }; 85 86 static const ARM_MLxEntry ARM_MLxTable[] = { 87 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane 88 // fp scalar ops 89 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, 90 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, 91 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, 92 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, 93 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, 94 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, 95 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, 96 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, 97 98 // fp SIMD ops 99 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, 100 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, 101 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, 102 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, 103 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, 104 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, 105 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, 106 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, 107 }; 108 109 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) 110 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), 111 Subtarget(STI) { 112 for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { 113 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) 114 llvm_unreachable("Duplicated entries?"); 115 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); 116 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); 117 } 118 } 119 120 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl 121 // currently defaults to no prepass hazard recognizer. 122 ScheduleHazardRecognizer * 123 ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, 124 const ScheduleDAG *DAG) const { 125 if (usePreRAHazardRecognizer()) { 126 const InstrItineraryData *II = 127 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); 128 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); 129 } 130 return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); 131 } 132 133 ScheduleHazardRecognizer *ARMBaseInstrInfo:: 134 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 135 const ScheduleDAG *DAG) const { 136 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base()) 137 return new ARMHazardRecognizer(II, DAG); 138 return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); 139 } 140 141 MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( 142 MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { 143 // FIXME: Thumb2 support. 144 145 if (!EnableARM3Addr) 146 return nullptr; 147 148 MachineFunction &MF = *MI.getParent()->getParent(); 149 uint64_t TSFlags = MI.getDesc().TSFlags; 150 bool isPre = false; 151 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { 152 default: return nullptr; 153 case ARMII::IndexModePre: 154 isPre = true; 155 break; 156 case ARMII::IndexModePost: 157 break; 158 } 159 160 // Try splitting an indexed load/store to an un-indexed one plus an add/sub 161 // operation. 162 unsigned MemOpc = getUnindexedOpcode(MI.getOpcode()); 163 if (MemOpc == 0) 164 return nullptr; 165 166 MachineInstr *UpdateMI = nullptr; 167 MachineInstr *MemMI = nullptr; 168 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); 169 const MCInstrDesc &MCID = MI.getDesc(); 170 unsigned NumOps = MCID.getNumOperands(); 171 bool isLoad = !MI.mayStore(); 172 const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0); 173 const MachineOperand &Base = MI.getOperand(2); 174 const MachineOperand &Offset = MI.getOperand(NumOps - 3); 175 Register WBReg = WB.getReg(); 176 Register BaseReg = Base.getReg(); 177 Register OffReg = Offset.getReg(); 178 unsigned OffImm = MI.getOperand(NumOps - 2).getImm(); 179 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm(); 180 switch (AddrMode) { 181 default: llvm_unreachable("Unknown indexed op!"); 182 case ARMII::AddrMode2: { 183 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; 184 unsigned Amt = ARM_AM::getAM2Offset(OffImm); 185 if (OffReg == 0) { 186 if (ARM_AM::getSOImmVal(Amt) == -1) 187 // Can't encode it in a so_imm operand. This transformation will 188 // add more than 1 instruction. Abandon! 189 return nullptr; 190 UpdateMI = BuildMI(MF, MI.getDebugLoc(), 191 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 192 .addReg(BaseReg) 193 .addImm(Amt) 194 .add(predOps(Pred)) 195 .add(condCodeOp()); 196 } else if (Amt != 0) { 197 ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); 198 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); 199 UpdateMI = BuildMI(MF, MI.getDebugLoc(), 200 get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) 201 .addReg(BaseReg) 202 .addReg(OffReg) 203 .addReg(0) 204 .addImm(SOOpc) 205 .add(predOps(Pred)) 206 .add(condCodeOp()); 207 } else 208 UpdateMI = BuildMI(MF, MI.getDebugLoc(), 209 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 210 .addReg(BaseReg) 211 .addReg(OffReg) 212 .add(predOps(Pred)) 213 .add(condCodeOp()); 214 break; 215 } 216 case ARMII::AddrMode3 : { 217 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; 218 unsigned Amt = ARM_AM::getAM3Offset(OffImm); 219 if (OffReg == 0) 220 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. 221 UpdateMI = BuildMI(MF, MI.getDebugLoc(), 222 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 223 .addReg(BaseReg) 224 .addImm(Amt) 225 .add(predOps(Pred)) 226 .add(condCodeOp()); 227 else 228 UpdateMI = BuildMI(MF, MI.getDebugLoc(), 229 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 230 .addReg(BaseReg) 231 .addReg(OffReg) 232 .add(predOps(Pred)) 233 .add(condCodeOp()); 234 break; 235 } 236 } 237 238 std::vector<MachineInstr*> NewMIs; 239 if (isPre) { 240 if (isLoad) 241 MemMI = 242 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg()) 243 .addReg(WBReg) 244 .addImm(0) 245 .addImm(Pred); 246 else 247 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) 248 .addReg(MI.getOperand(1).getReg()) 249 .addReg(WBReg) 250 .addReg(0) 251 .addImm(0) 252 .addImm(Pred); 253 NewMIs.push_back(MemMI); 254 NewMIs.push_back(UpdateMI); 255 } else { 256 if (isLoad) 257 MemMI = 258 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg()) 259 .addReg(BaseReg) 260 .addImm(0) 261 .addImm(Pred); 262 else 263 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) 264 .addReg(MI.getOperand(1).getReg()) 265 .addReg(BaseReg) 266 .addReg(0) 267 .addImm(0) 268 .addImm(Pred); 269 if (WB.isDead()) 270 UpdateMI->getOperand(0).setIsDead(); 271 NewMIs.push_back(UpdateMI); 272 NewMIs.push_back(MemMI); 273 } 274 275 // Transfer LiveVariables states, kill / dead info. 276 if (LV) { 277 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 278 MachineOperand &MO = MI.getOperand(i); 279 if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { 280 Register Reg = MO.getReg(); 281 282 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 283 if (MO.isDef()) { 284 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; 285 if (MO.isDead()) 286 LV->addVirtualRegisterDead(Reg, *NewMI); 287 } 288 if (MO.isUse() && MO.isKill()) { 289 for (unsigned j = 0; j < 2; ++j) { 290 // Look at the two new MI's in reverse order. 291 MachineInstr *NewMI = NewMIs[j]; 292 if (!NewMI->readsRegister(Reg)) 293 continue; 294 LV->addVirtualRegisterKilled(Reg, *NewMI); 295 if (VI.removeKill(MI)) 296 VI.Kills.push_back(NewMI); 297 break; 298 } 299 } 300 } 301 } 302 } 303 304 MachineBasicBlock::iterator MBBI = MI.getIterator(); 305 MFI->insert(MBBI, NewMIs[1]); 306 MFI->insert(MBBI, NewMIs[0]); 307 return NewMIs[0]; 308 } 309 310 // Branch analysis. 311 bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, 312 MachineBasicBlock *&TBB, 313 MachineBasicBlock *&FBB, 314 SmallVectorImpl<MachineOperand> &Cond, 315 bool AllowModify) const { 316 TBB = nullptr; 317 FBB = nullptr; 318 319 MachineBasicBlock::iterator I = MBB.end(); 320 if (I == MBB.begin()) 321 return false; // Empty blocks are easy. 322 --I; 323 324 // Walk backwards from the end of the basic block until the branch is 325 // analyzed or we give up. 326 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) { 327 // Flag to be raised on unanalyzeable instructions. This is useful in cases 328 // where we want to clean up on the end of the basic block before we bail 329 // out. 330 bool CantAnalyze = false; 331 332 // Skip over DEBUG values and predicated nonterminators. 333 while (I->isDebugInstr() || !I->isTerminator()) { 334 if (I == MBB.begin()) 335 return false; 336 --I; 337 } 338 339 if (isIndirectBranchOpcode(I->getOpcode()) || 340 isJumpTableBranchOpcode(I->getOpcode())) { 341 // Indirect branches and jump tables can't be analyzed, but we still want 342 // to clean up any instructions at the tail of the basic block. 343 CantAnalyze = true; 344 } else if (isUncondBranchOpcode(I->getOpcode())) { 345 TBB = I->getOperand(0).getMBB(); 346 } else if (isCondBranchOpcode(I->getOpcode())) { 347 // Bail out if we encounter multiple conditional branches. 348 if (!Cond.empty()) 349 return true; 350 351 assert(!FBB && "FBB should have been null."); 352 FBB = TBB; 353 TBB = I->getOperand(0).getMBB(); 354 Cond.push_back(I->getOperand(1)); 355 Cond.push_back(I->getOperand(2)); 356 } else if (I->isReturn()) { 357 // Returns can't be analyzed, but we should run cleanup. 358 CantAnalyze = !isPredicated(*I); 359 } else { 360 // We encountered other unrecognized terminator. Bail out immediately. 361 return true; 362 } 363 364 // Cleanup code - to be run for unpredicated unconditional branches and 365 // returns. 366 if (!isPredicated(*I) && 367 (isUncondBranchOpcode(I->getOpcode()) || 368 isIndirectBranchOpcode(I->getOpcode()) || 369 isJumpTableBranchOpcode(I->getOpcode()) || 370 I->isReturn())) { 371 // Forget any previous condition branch information - it no longer applies. 372 Cond.clear(); 373 FBB = nullptr; 374 375 // If we can modify the function, delete everything below this 376 // unconditional branch. 377 if (AllowModify) { 378 MachineBasicBlock::iterator DI = std::next(I); 379 while (DI != MBB.end()) { 380 MachineInstr &InstToDelete = *DI; 381 ++DI; 382 InstToDelete.eraseFromParent(); 383 } 384 } 385 } 386 387 if (CantAnalyze) 388 return true; 389 390 if (I == MBB.begin()) 391 return false; 392 393 --I; 394 } 395 396 // We made it past the terminators without bailing out - we must have 397 // analyzed this branch successfully. 398 return false; 399 } 400 401 unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, 402 int *BytesRemoved) const { 403 assert(!BytesRemoved && "code size not handled"); 404 405 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 406 if (I == MBB.end()) 407 return 0; 408 409 if (!isUncondBranchOpcode(I->getOpcode()) && 410 !isCondBranchOpcode(I->getOpcode())) 411 return 0; 412 413 // Remove the branch. 414 I->eraseFromParent(); 415 416 I = MBB.end(); 417 418 if (I == MBB.begin()) return 1; 419 --I; 420 if (!isCondBranchOpcode(I->getOpcode())) 421 return 1; 422 423 // Remove the branch. 424 I->eraseFromParent(); 425 return 2; 426 } 427 428 unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, 429 MachineBasicBlock *TBB, 430 MachineBasicBlock *FBB, 431 ArrayRef<MachineOperand> Cond, 432 const DebugLoc &DL, 433 int *BytesAdded) const { 434 assert(!BytesAdded && "code size not handled"); 435 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); 436 int BOpc = !AFI->isThumbFunction() 437 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); 438 int BccOpc = !AFI->isThumbFunction() 439 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); 440 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); 441 442 // Shouldn't be a fall through. 443 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 444 assert((Cond.size() == 2 || Cond.size() == 0) && 445 "ARM branch conditions have two components!"); 446 447 // For conditional branches, we use addOperand to preserve CPSR flags. 448 449 if (!FBB) { 450 if (Cond.empty()) { // Unconditional branch? 451 if (isThumb) 452 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL)); 453 else 454 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); 455 } else 456 BuildMI(&MBB, DL, get(BccOpc)) 457 .addMBB(TBB) 458 .addImm(Cond[0].getImm()) 459 .add(Cond[1]); 460 return 1; 461 } 462 463 // Two-way conditional branch. 464 BuildMI(&MBB, DL, get(BccOpc)) 465 .addMBB(TBB) 466 .addImm(Cond[0].getImm()) 467 .add(Cond[1]); 468 if (isThumb) 469 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL)); 470 else 471 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); 472 return 2; 473 } 474 475 bool ARMBaseInstrInfo:: 476 reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 477 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); 478 Cond[0].setImm(ARMCC::getOppositeCondition(CC)); 479 return false; 480 } 481 482 bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { 483 if (MI.isBundle()) { 484 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 485 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 486 while (++I != E && I->isInsideBundle()) { 487 int PIdx = I->findFirstPredOperandIdx(); 488 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) 489 return true; 490 } 491 return false; 492 } 493 494 int PIdx = MI.findFirstPredOperandIdx(); 495 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL; 496 } 497 498 std::string ARMBaseInstrInfo::createMIROperandComment( 499 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, 500 const TargetRegisterInfo *TRI) const { 501 502 // First, let's see if there is a generic comment for this operand 503 std::string GenericComment = 504 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI); 505 if (!GenericComment.empty()) 506 return GenericComment; 507 508 // If not, check if we have an immediate operand. 509 if (Op.getType() != MachineOperand::MO_Immediate) 510 return std::string(); 511 512 // And print its corresponding condition code if the immediate is a 513 // predicate. 514 int FirstPredOp = MI.findFirstPredOperandIdx(); 515 if (FirstPredOp != (int) OpIdx) 516 return std::string(); 517 518 std::string CC = "CC::"; 519 CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm()); 520 return CC; 521 } 522 523 bool ARMBaseInstrInfo::PredicateInstruction( 524 MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { 525 unsigned Opc = MI.getOpcode(); 526 if (isUncondBranchOpcode(Opc)) { 527 MI.setDesc(get(getMatchingCondBranchOpcode(Opc))); 528 MachineInstrBuilder(*MI.getParent()->getParent(), MI) 529 .addImm(Pred[0].getImm()) 530 .addReg(Pred[1].getReg()); 531 return true; 532 } 533 534 int PIdx = MI.findFirstPredOperandIdx(); 535 if (PIdx != -1) { 536 MachineOperand &PMO = MI.getOperand(PIdx); 537 PMO.setImm(Pred[0].getImm()); 538 MI.getOperand(PIdx+1).setReg(Pred[1].getReg()); 539 return true; 540 } 541 return false; 542 } 543 544 bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, 545 ArrayRef<MachineOperand> Pred2) const { 546 if (Pred1.size() > 2 || Pred2.size() > 2) 547 return false; 548 549 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); 550 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); 551 if (CC1 == CC2) 552 return true; 553 554 switch (CC1) { 555 default: 556 return false; 557 case ARMCC::AL: 558 return true; 559 case ARMCC::HS: 560 return CC2 == ARMCC::HI; 561 case ARMCC::LS: 562 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; 563 case ARMCC::GE: 564 return CC2 == ARMCC::GT; 565 case ARMCC::LE: 566 return CC2 == ARMCC::LT; 567 } 568 } 569 570 bool ARMBaseInstrInfo::DefinesPredicate( 571 MachineInstr &MI, std::vector<MachineOperand> &Pred) const { 572 bool Found = false; 573 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 574 const MachineOperand &MO = MI.getOperand(i); 575 if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) || 576 (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) { 577 Pred.push_back(MO); 578 Found = true; 579 } 580 } 581 582 return Found; 583 } 584 585 bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) { 586 for (const auto &MO : MI.operands()) 587 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) 588 return true; 589 return false; 590 } 591 592 bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI, 593 unsigned Op) const { 594 const MachineOperand &Offset = MI.getOperand(Op + 1); 595 return Offset.getReg() != 0; 596 } 597 598 // Load with negative register offset requires additional 1cyc and +I unit 599 // for Cortex A57 600 bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI, 601 unsigned Op) const { 602 const MachineOperand &Offset = MI.getOperand(Op + 1); 603 const MachineOperand &Opc = MI.getOperand(Op + 2); 604 assert(Opc.isImm()); 605 assert(Offset.isReg()); 606 int64_t OpcImm = Opc.getImm(); 607 608 bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub; 609 return (isSub && Offset.getReg() != 0); 610 } 611 612 bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI, 613 unsigned Op) const { 614 const MachineOperand &Opc = MI.getOperand(Op + 2); 615 unsigned OffImm = Opc.getImm(); 616 return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; 617 } 618 619 // Load, scaled register offset, not plus LSL2 620 bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, 621 unsigned Op) const { 622 const MachineOperand &Opc = MI.getOperand(Op + 2); 623 unsigned OffImm = Opc.getImm(); 624 625 bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add; 626 unsigned Amt = ARM_AM::getAM2Offset(OffImm); 627 ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm); 628 if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled 629 bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2); 630 return !SimpleScaled; 631 } 632 633 // Minus reg for ldstso addr mode 634 bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI, 635 unsigned Op) const { 636 unsigned OffImm = MI.getOperand(Op + 2).getImm(); 637 return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; 638 } 639 640 // Load, scaled register offset 641 bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI, 642 unsigned Op) const { 643 unsigned OffImm = MI.getOperand(Op + 2).getImm(); 644 return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; 645 } 646 647 static bool isEligibleForITBlock(const MachineInstr *MI) { 648 switch (MI->getOpcode()) { 649 default: return true; 650 case ARM::tADC: // ADC (register) T1 651 case ARM::tADDi3: // ADD (immediate) T1 652 case ARM::tADDi8: // ADD (immediate) T2 653 case ARM::tADDrr: // ADD (register) T1 654 case ARM::tAND: // AND (register) T1 655 case ARM::tASRri: // ASR (immediate) T1 656 case ARM::tASRrr: // ASR (register) T1 657 case ARM::tBIC: // BIC (register) T1 658 case ARM::tEOR: // EOR (register) T1 659 case ARM::tLSLri: // LSL (immediate) T1 660 case ARM::tLSLrr: // LSL (register) T1 661 case ARM::tLSRri: // LSR (immediate) T1 662 case ARM::tLSRrr: // LSR (register) T1 663 case ARM::tMUL: // MUL T1 664 case ARM::tMVN: // MVN (register) T1 665 case ARM::tORR: // ORR (register) T1 666 case ARM::tROR: // ROR (register) T1 667 case ARM::tRSB: // RSB (immediate) T1 668 case ARM::tSBC: // SBC (register) T1 669 case ARM::tSUBi3: // SUB (immediate) T1 670 case ARM::tSUBi8: // SUB (immediate) T2 671 case ARM::tSUBrr: // SUB (register) T1 672 return !ARMBaseInstrInfo::isCPSRDefined(*MI); 673 } 674 } 675 676 /// isPredicable - Return true if the specified instruction can be predicated. 677 /// By default, this returns true for every instruction with a 678 /// PredicateOperand. 679 bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { 680 if (!MI.isPredicable()) 681 return false; 682 683 if (MI.isBundle()) 684 return false; 685 686 if (!isEligibleForITBlock(&MI)) 687 return false; 688 689 const ARMFunctionInfo *AFI = 690 MI.getParent()->getParent()->getInfo<ARMFunctionInfo>(); 691 692 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM. 693 // In their ARM encoding, they can't be encoded in a conditional form. 694 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) 695 return false; 696 697 if (AFI->isThumb2Function()) { 698 if (getSubtarget().restrictIT()) 699 return isV8EligibleForIT(&MI); 700 } 701 702 return true; 703 } 704 705 namespace llvm { 706 707 template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) { 708 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 709 const MachineOperand &MO = MI->getOperand(i); 710 if (!MO.isReg() || MO.isUndef() || MO.isUse()) 711 continue; 712 if (MO.getReg() != ARM::CPSR) 713 continue; 714 if (!MO.isDead()) 715 return false; 716 } 717 // all definitions of CPSR are dead 718 return true; 719 } 720 721 } // end namespace llvm 722 723 /// GetInstSize - Return the size of the specified MachineInstr. 724 /// 725 unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 726 const MachineBasicBlock &MBB = *MI.getParent(); 727 const MachineFunction *MF = MBB.getParent(); 728 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 729 730 const MCInstrDesc &MCID = MI.getDesc(); 731 if (MCID.getSize()) 732 return MCID.getSize(); 733 734 switch (MI.getOpcode()) { 735 default: 736 // pseudo-instruction sizes are zero. 737 return 0; 738 case TargetOpcode::BUNDLE: 739 return getInstBundleLength(MI); 740 case ARM::MOVi16_ga_pcrel: 741 case ARM::MOVTi16_ga_pcrel: 742 case ARM::t2MOVi16_ga_pcrel: 743 case ARM::t2MOVTi16_ga_pcrel: 744 return 4; 745 case ARM::MOVi32imm: 746 case ARM::t2MOVi32imm: 747 return 8; 748 case ARM::CONSTPOOL_ENTRY: 749 case ARM::JUMPTABLE_INSTS: 750 case ARM::JUMPTABLE_ADDRS: 751 case ARM::JUMPTABLE_TBB: 752 case ARM::JUMPTABLE_TBH: 753 // If this machine instr is a constant pool entry, its size is recorded as 754 // operand #2. 755 return MI.getOperand(2).getImm(); 756 case ARM::Int_eh_sjlj_longjmp: 757 return 16; 758 case ARM::tInt_eh_sjlj_longjmp: 759 return 10; 760 case ARM::tInt_WIN_eh_sjlj_longjmp: 761 return 12; 762 case ARM::Int_eh_sjlj_setjmp: 763 case ARM::Int_eh_sjlj_setjmp_nofp: 764 return 20; 765 case ARM::tInt_eh_sjlj_setjmp: 766 case ARM::t2Int_eh_sjlj_setjmp: 767 case ARM::t2Int_eh_sjlj_setjmp_nofp: 768 return 12; 769 case ARM::SPACE: 770 return MI.getOperand(1).getImm(); 771 case ARM::INLINEASM: 772 case ARM::INLINEASM_BR: { 773 // If this machine instr is an inline asm, measure it. 774 unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); 775 if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction()) 776 Size = alignTo(Size, 4); 777 return Size; 778 } 779 } 780 } 781 782 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const { 783 unsigned Size = 0; 784 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 785 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 786 while (++I != E && I->isInsideBundle()) { 787 assert(!I->isBundle() && "No nested bundle!"); 788 Size += getInstSizeInBytes(*I); 789 } 790 return Size; 791 } 792 793 void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, 794 MachineBasicBlock::iterator I, 795 unsigned DestReg, bool KillSrc, 796 const ARMSubtarget &Subtarget) const { 797 unsigned Opc = Subtarget.isThumb() 798 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR) 799 : ARM::MRS; 800 801 MachineInstrBuilder MIB = 802 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg); 803 804 // There is only 1 A/R class MRS instruction, and it always refers to 805 // APSR. However, there are lots of other possibilities on M-class cores. 806 if (Subtarget.isMClass()) 807 MIB.addImm(0x800); 808 809 MIB.add(predOps(ARMCC::AL)) 810 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); 811 } 812 813 void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, 814 MachineBasicBlock::iterator I, 815 unsigned SrcReg, bool KillSrc, 816 const ARMSubtarget &Subtarget) const { 817 unsigned Opc = Subtarget.isThumb() 818 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR) 819 : ARM::MSR; 820 821 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); 822 823 if (Subtarget.isMClass()) 824 MIB.addImm(0x800); 825 else 826 MIB.addImm(8); 827 828 MIB.addReg(SrcReg, getKillRegState(KillSrc)) 829 .add(predOps(ARMCC::AL)) 830 .addReg(ARM::CPSR, RegState::Implicit | RegState::Define); 831 } 832 833 void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) { 834 MIB.addImm(ARMVCC::None); 835 MIB.addReg(0); 836 } 837 838 void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, 839 Register DestReg) { 840 addUnpredicatedMveVpredNOp(MIB); 841 MIB.addReg(DestReg, RegState::Undef); 842 } 843 844 void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) { 845 MIB.addImm(Cond); 846 MIB.addReg(ARM::VPR, RegState::Implicit); 847 } 848 849 void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB, 850 unsigned Cond, unsigned Inactive) { 851 addPredicatedMveVpredNOp(MIB, Cond); 852 MIB.addReg(Inactive); 853 } 854 855 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 856 MachineBasicBlock::iterator I, 857 const DebugLoc &DL, MCRegister DestReg, 858 MCRegister SrcReg, bool KillSrc) const { 859 bool GPRDest = ARM::GPRRegClass.contains(DestReg); 860 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); 861 862 if (GPRDest && GPRSrc) { 863 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) 864 .addReg(SrcReg, getKillRegState(KillSrc)) 865 .add(predOps(ARMCC::AL)) 866 .add(condCodeOp()); 867 return; 868 } 869 870 bool SPRDest = ARM::SPRRegClass.contains(DestReg); 871 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); 872 873 unsigned Opc = 0; 874 if (SPRDest && SPRSrc) 875 Opc = ARM::VMOVS; 876 else if (GPRDest && SPRSrc) 877 Opc = ARM::VMOVRS; 878 else if (SPRDest && GPRSrc) 879 Opc = ARM::VMOVSR; 880 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64()) 881 Opc = ARM::VMOVD; 882 else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) 883 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; 884 885 if (Opc) { 886 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); 887 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 888 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) 889 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 890 if (Opc == ARM::MVE_VORR) 891 addUnpredicatedMveVpredROp(MIB, DestReg); 892 else 893 MIB.add(predOps(ARMCC::AL)); 894 return; 895 } 896 897 // Handle register classes that require multiple instructions. 898 unsigned BeginIdx = 0; 899 unsigned SubRegs = 0; 900 int Spacing = 1; 901 902 // Use VORRq when possible. 903 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) { 904 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; 905 BeginIdx = ARM::qsub_0; 906 SubRegs = 2; 907 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { 908 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; 909 BeginIdx = ARM::qsub_0; 910 SubRegs = 4; 911 // Fall back to VMOVD. 912 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) { 913 Opc = ARM::VMOVD; 914 BeginIdx = ARM::dsub_0; 915 SubRegs = 2; 916 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) { 917 Opc = ARM::VMOVD; 918 BeginIdx = ARM::dsub_0; 919 SubRegs = 3; 920 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) { 921 Opc = ARM::VMOVD; 922 BeginIdx = ARM::dsub_0; 923 SubRegs = 4; 924 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) { 925 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr; 926 BeginIdx = ARM::gsub_0; 927 SubRegs = 2; 928 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) { 929 Opc = ARM::VMOVD; 930 BeginIdx = ARM::dsub_0; 931 SubRegs = 2; 932 Spacing = 2; 933 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) { 934 Opc = ARM::VMOVD; 935 BeginIdx = ARM::dsub_0; 936 SubRegs = 3; 937 Spacing = 2; 938 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) { 939 Opc = ARM::VMOVD; 940 BeginIdx = ARM::dsub_0; 941 SubRegs = 4; 942 Spacing = 2; 943 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && 944 !Subtarget.hasFP64()) { 945 Opc = ARM::VMOVS; 946 BeginIdx = ARM::ssub_0; 947 SubRegs = 2; 948 } else if (SrcReg == ARM::CPSR) { 949 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); 950 return; 951 } else if (DestReg == ARM::CPSR) { 952 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); 953 return; 954 } else if (DestReg == ARM::VPR) { 955 assert(ARM::GPRRegClass.contains(SrcReg)); 956 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg) 957 .addReg(SrcReg, getKillRegState(KillSrc)) 958 .add(predOps(ARMCC::AL)); 959 return; 960 } else if (SrcReg == ARM::VPR) { 961 assert(ARM::GPRRegClass.contains(DestReg)); 962 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg) 963 .addReg(SrcReg, getKillRegState(KillSrc)) 964 .add(predOps(ARMCC::AL)); 965 return; 966 } else if (DestReg == ARM::FPSCR_NZCV) { 967 assert(ARM::GPRRegClass.contains(SrcReg)); 968 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg) 969 .addReg(SrcReg, getKillRegState(KillSrc)) 970 .add(predOps(ARMCC::AL)); 971 return; 972 } else if (SrcReg == ARM::FPSCR_NZCV) { 973 assert(ARM::GPRRegClass.contains(DestReg)); 974 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg) 975 .addReg(SrcReg, getKillRegState(KillSrc)) 976 .add(predOps(ARMCC::AL)); 977 return; 978 } 979 980 assert(Opc && "Impossible reg-to-reg copy"); 981 982 const TargetRegisterInfo *TRI = &getRegisterInfo(); 983 MachineInstrBuilder Mov; 984 985 // Copy register tuples backward when the first Dest reg overlaps with SrcReg. 986 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { 987 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); 988 Spacing = -Spacing; 989 } 990 #ifndef NDEBUG 991 SmallSet<unsigned, 4> DstRegs; 992 #endif 993 for (unsigned i = 0; i != SubRegs; ++i) { 994 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); 995 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); 996 assert(Dst && Src && "Bad sub-register"); 997 #ifndef NDEBUG 998 assert(!DstRegs.count(Src) && "destructive vector copy"); 999 DstRegs.insert(Dst); 1000 #endif 1001 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src); 1002 // VORR (NEON or MVE) takes two source operands. 1003 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) { 1004 Mov.addReg(Src); 1005 } 1006 // MVE VORR takes predicate operands in place of an ordinary condition. 1007 if (Opc == ARM::MVE_VORR) 1008 addUnpredicatedMveVpredROp(Mov, Dst); 1009 else 1010 Mov = Mov.add(predOps(ARMCC::AL)); 1011 // MOVr can set CC. 1012 if (Opc == ARM::MOVr) 1013 Mov = Mov.add(condCodeOp()); 1014 } 1015 // Add implicit super-register defs and kills to the last instruction. 1016 Mov->addRegisterDefined(DestReg, TRI); 1017 if (KillSrc) 1018 Mov->addRegisterKilled(SrcReg, TRI); 1019 } 1020 1021 Optional<DestSourcePair> 1022 ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 1023 // VMOVRRD is also a copy instruction but it requires 1024 // special way of handling. It is more complex copy version 1025 // and since that we are not considering it. For recognition 1026 // of such instruction isExtractSubregLike MI interface fuction 1027 // could be used. 1028 // VORRq is considered as a move only if two inputs are 1029 // the same register. 1030 if (!MI.isMoveReg() || 1031 (MI.getOpcode() == ARM::VORRq && 1032 MI.getOperand(1).getReg() != MI.getOperand(2).getReg())) 1033 return None; 1034 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1035 } 1036 1037 Optional<ParamLoadedValue> 1038 ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI, 1039 Register Reg) const { 1040 if (auto DstSrcPair = isCopyInstrImpl(MI)) { 1041 Register DstReg = DstSrcPair->Destination->getReg(); 1042 1043 // TODO: We don't handle cases where the forwarding reg is narrower/wider 1044 // than the copy registers. Consider for example: 1045 // 1046 // s16 = VMOVS s0 1047 // s17 = VMOVS s1 1048 // call @callee(d0) 1049 // 1050 // We'd like to describe the call site value of d0 as d8, but this requires 1051 // gathering and merging the descriptions for the two VMOVS instructions. 1052 // 1053 // We also don't handle the reverse situation, where the forwarding reg is 1054 // narrower than the copy destination: 1055 // 1056 // d8 = VMOVD d0 1057 // call @callee(s1) 1058 // 1059 // We need to produce a fragment description (the call site value of s1 is 1060 // /not/ just d8). 1061 if (DstReg != Reg) 1062 return None; 1063 } 1064 return TargetInstrInfo::describeLoadedValue(MI, Reg); 1065 } 1066 1067 const MachineInstrBuilder & 1068 ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, 1069 unsigned SubIdx, unsigned State, 1070 const TargetRegisterInfo *TRI) const { 1071 if (!SubIdx) 1072 return MIB.addReg(Reg, State); 1073 1074 if (Register::isPhysicalRegister(Reg)) 1075 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 1076 return MIB.addReg(Reg, State, SubIdx); 1077 } 1078 1079 void ARMBaseInstrInfo:: 1080 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 1081 Register SrcReg, bool isKill, int FI, 1082 const TargetRegisterClass *RC, 1083 const TargetRegisterInfo *TRI) const { 1084 MachineFunction &MF = *MBB.getParent(); 1085 MachineFrameInfo &MFI = MF.getFrameInfo(); 1086 Align Alignment = MFI.getObjectAlign(FI); 1087 1088 MachineMemOperand *MMO = MF.getMachineMemOperand( 1089 MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, 1090 MFI.getObjectSize(FI), Alignment); 1091 1092 switch (TRI->getSpillSize(*RC)) { 1093 case 2: 1094 if (ARM::HPRRegClass.hasSubClassEq(RC)) { 1095 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH)) 1096 .addReg(SrcReg, getKillRegState(isKill)) 1097 .addFrameIndex(FI) 1098 .addImm(0) 1099 .addMemOperand(MMO) 1100 .add(predOps(ARMCC::AL)); 1101 } else 1102 llvm_unreachable("Unknown reg class!"); 1103 break; 1104 case 4: 1105 if (ARM::GPRRegClass.hasSubClassEq(RC)) { 1106 BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12)) 1107 .addReg(SrcReg, getKillRegState(isKill)) 1108 .addFrameIndex(FI) 1109 .addImm(0) 1110 .addMemOperand(MMO) 1111 .add(predOps(ARMCC::AL)); 1112 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 1113 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS)) 1114 .addReg(SrcReg, getKillRegState(isKill)) 1115 .addFrameIndex(FI) 1116 .addImm(0) 1117 .addMemOperand(MMO) 1118 .add(predOps(ARMCC::AL)); 1119 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { 1120 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off)) 1121 .addReg(SrcReg, getKillRegState(isKill)) 1122 .addFrameIndex(FI) 1123 .addImm(0) 1124 .addMemOperand(MMO) 1125 .add(predOps(ARMCC::AL)); 1126 } else 1127 llvm_unreachable("Unknown reg class!"); 1128 break; 1129 case 8: 1130 if (ARM::DPRRegClass.hasSubClassEq(RC)) { 1131 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD)) 1132 .addReg(SrcReg, getKillRegState(isKill)) 1133 .addFrameIndex(FI) 1134 .addImm(0) 1135 .addMemOperand(MMO) 1136 .add(predOps(ARMCC::AL)); 1137 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 1138 if (Subtarget.hasV5TEOps()) { 1139 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD)); 1140 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 1141 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 1142 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) 1143 .add(predOps(ARMCC::AL)); 1144 } else { 1145 // Fallback to STM instruction, which has existed since the dawn of 1146 // time. 1147 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA)) 1148 .addFrameIndex(FI) 1149 .addMemOperand(MMO) 1150 .add(predOps(ARMCC::AL)); 1151 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 1152 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 1153 } 1154 } else 1155 llvm_unreachable("Unknown reg class!"); 1156 break; 1157 case 16: 1158 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { 1159 // Use aligned spills if the stack can be realigned. 1160 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) { 1161 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64)) 1162 .addFrameIndex(FI) 1163 .addImm(16) 1164 .addReg(SrcReg, getKillRegState(isKill)) 1165 .addMemOperand(MMO) 1166 .add(predOps(ARMCC::AL)); 1167 } else { 1168 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA)) 1169 .addReg(SrcReg, getKillRegState(isKill)) 1170 .addFrameIndex(FI) 1171 .addMemOperand(MMO) 1172 .add(predOps(ARMCC::AL)); 1173 } 1174 } else if (ARM::QPRRegClass.hasSubClassEq(RC) && 1175 Subtarget.hasMVEIntegerOps()) { 1176 auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32)); 1177 MIB.addReg(SrcReg, getKillRegState(isKill)) 1178 .addFrameIndex(FI) 1179 .addImm(0) 1180 .addMemOperand(MMO); 1181 addUnpredicatedMveVpredNOp(MIB); 1182 } else 1183 llvm_unreachable("Unknown reg class!"); 1184 break; 1185 case 24: 1186 if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 1187 // Use aligned spills if the stack can be realigned. 1188 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && 1189 Subtarget.hasNEON()) { 1190 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo)) 1191 .addFrameIndex(FI) 1192 .addImm(16) 1193 .addReg(SrcReg, getKillRegState(isKill)) 1194 .addMemOperand(MMO) 1195 .add(predOps(ARMCC::AL)); 1196 } else { 1197 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), 1198 get(ARM::VSTMDIA)) 1199 .addFrameIndex(FI) 1200 .add(predOps(ARMCC::AL)) 1201 .addMemOperand(MMO); 1202 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 1203 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 1204 AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 1205 } 1206 } else 1207 llvm_unreachable("Unknown reg class!"); 1208 break; 1209 case 32: 1210 if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 1211 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && 1212 Subtarget.hasNEON()) { 1213 // FIXME: It's possible to only store part of the QQ register if the 1214 // spilled def has a sub-register index. 1215 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo)) 1216 .addFrameIndex(FI) 1217 .addImm(16) 1218 .addReg(SrcReg, getKillRegState(isKill)) 1219 .addMemOperand(MMO) 1220 .add(predOps(ARMCC::AL)); 1221 } else { 1222 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), 1223 get(ARM::VSTMDIA)) 1224 .addFrameIndex(FI) 1225 .add(predOps(ARMCC::AL)) 1226 .addMemOperand(MMO); 1227 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 1228 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 1229 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 1230 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 1231 } 1232 } else 1233 llvm_unreachable("Unknown reg class!"); 1234 break; 1235 case 64: 1236 if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 1237 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA)) 1238 .addFrameIndex(FI) 1239 .add(predOps(ARMCC::AL)) 1240 .addMemOperand(MMO); 1241 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 1242 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 1243 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 1244 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 1245 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); 1246 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); 1247 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); 1248 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); 1249 } else 1250 llvm_unreachable("Unknown reg class!"); 1251 break; 1252 default: 1253 llvm_unreachable("Unknown reg class!"); 1254 } 1255 } 1256 1257 unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 1258 int &FrameIndex) const { 1259 switch (MI.getOpcode()) { 1260 default: break; 1261 case ARM::STRrs: 1262 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. 1263 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() && 1264 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 && 1265 MI.getOperand(3).getImm() == 0) { 1266 FrameIndex = MI.getOperand(1).getIndex(); 1267 return MI.getOperand(0).getReg(); 1268 } 1269 break; 1270 case ARM::STRi12: 1271 case ARM::t2STRi12: 1272 case ARM::tSTRspi: 1273 case ARM::VSTRD: 1274 case ARM::VSTRS: 1275 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 1276 MI.getOperand(2).getImm() == 0) { 1277 FrameIndex = MI.getOperand(1).getIndex(); 1278 return MI.getOperand(0).getReg(); 1279 } 1280 break; 1281 case ARM::VSTR_P0_off: 1282 if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && 1283 MI.getOperand(1).getImm() == 0) { 1284 FrameIndex = MI.getOperand(0).getIndex(); 1285 return ARM::P0; 1286 } 1287 break; 1288 case ARM::VST1q64: 1289 case ARM::VST1d64TPseudo: 1290 case ARM::VST1d64QPseudo: 1291 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) { 1292 FrameIndex = MI.getOperand(0).getIndex(); 1293 return MI.getOperand(2).getReg(); 1294 } 1295 break; 1296 case ARM::VSTMQIA: 1297 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { 1298 FrameIndex = MI.getOperand(1).getIndex(); 1299 return MI.getOperand(0).getReg(); 1300 } 1301 break; 1302 } 1303 1304 return 0; 1305 } 1306 1307 unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, 1308 int &FrameIndex) const { 1309 SmallVector<const MachineMemOperand *, 1> Accesses; 1310 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) && 1311 Accesses.size() == 1) { 1312 FrameIndex = 1313 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) 1314 ->getFrameIndex(); 1315 return true; 1316 } 1317 return false; 1318 } 1319 1320 void ARMBaseInstrInfo:: 1321 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 1322 Register DestReg, int FI, 1323 const TargetRegisterClass *RC, 1324 const TargetRegisterInfo *TRI) const { 1325 DebugLoc DL; 1326 if (I != MBB.end()) DL = I->getDebugLoc(); 1327 MachineFunction &MF = *MBB.getParent(); 1328 MachineFrameInfo &MFI = MF.getFrameInfo(); 1329 const Align Alignment = MFI.getObjectAlign(FI); 1330 MachineMemOperand *MMO = MF.getMachineMemOperand( 1331 MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, 1332 MFI.getObjectSize(FI), Alignment); 1333 1334 switch (TRI->getSpillSize(*RC)) { 1335 case 2: 1336 if (ARM::HPRRegClass.hasSubClassEq(RC)) { 1337 BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg) 1338 .addFrameIndex(FI) 1339 .addImm(0) 1340 .addMemOperand(MMO) 1341 .add(predOps(ARMCC::AL)); 1342 } else 1343 llvm_unreachable("Unknown reg class!"); 1344 break; 1345 case 4: 1346 if (ARM::GPRRegClass.hasSubClassEq(RC)) { 1347 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) 1348 .addFrameIndex(FI) 1349 .addImm(0) 1350 .addMemOperand(MMO) 1351 .add(predOps(ARMCC::AL)); 1352 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 1353 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) 1354 .addFrameIndex(FI) 1355 .addImm(0) 1356 .addMemOperand(MMO) 1357 .add(predOps(ARMCC::AL)); 1358 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { 1359 BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg) 1360 .addFrameIndex(FI) 1361 .addImm(0) 1362 .addMemOperand(MMO) 1363 .add(predOps(ARMCC::AL)); 1364 } else 1365 llvm_unreachable("Unknown reg class!"); 1366 break; 1367 case 8: 1368 if (ARM::DPRRegClass.hasSubClassEq(RC)) { 1369 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) 1370 .addFrameIndex(FI) 1371 .addImm(0) 1372 .addMemOperand(MMO) 1373 .add(predOps(ARMCC::AL)); 1374 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 1375 MachineInstrBuilder MIB; 1376 1377 if (Subtarget.hasV5TEOps()) { 1378 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); 1379 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 1380 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 1381 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) 1382 .add(predOps(ARMCC::AL)); 1383 } else { 1384 // Fallback to LDM instruction, which has existed since the dawn of 1385 // time. 1386 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA)) 1387 .addFrameIndex(FI) 1388 .addMemOperand(MMO) 1389 .add(predOps(ARMCC::AL)); 1390 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 1391 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 1392 } 1393 1394 if (Register::isPhysicalRegister(DestReg)) 1395 MIB.addReg(DestReg, RegState::ImplicitDefine); 1396 } else 1397 llvm_unreachable("Unknown reg class!"); 1398 break; 1399 case 16: 1400 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { 1401 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) { 1402 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) 1403 .addFrameIndex(FI) 1404 .addImm(16) 1405 .addMemOperand(MMO) 1406 .add(predOps(ARMCC::AL)); 1407 } else { 1408 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) 1409 .addFrameIndex(FI) 1410 .addMemOperand(MMO) 1411 .add(predOps(ARMCC::AL)); 1412 } 1413 } else if (ARM::QPRRegClass.hasSubClassEq(RC) && 1414 Subtarget.hasMVEIntegerOps()) { 1415 auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg); 1416 MIB.addFrameIndex(FI) 1417 .addImm(0) 1418 .addMemOperand(MMO); 1419 addUnpredicatedMveVpredNOp(MIB); 1420 } else 1421 llvm_unreachable("Unknown reg class!"); 1422 break; 1423 case 24: 1424 if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 1425 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && 1426 Subtarget.hasNEON()) { 1427 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) 1428 .addFrameIndex(FI) 1429 .addImm(16) 1430 .addMemOperand(MMO) 1431 .add(predOps(ARMCC::AL)); 1432 } else { 1433 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1434 .addFrameIndex(FI) 1435 .addMemOperand(MMO) 1436 .add(predOps(ARMCC::AL)); 1437 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1438 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1439 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1440 if (Register::isPhysicalRegister(DestReg)) 1441 MIB.addReg(DestReg, RegState::ImplicitDefine); 1442 } 1443 } else 1444 llvm_unreachable("Unknown reg class!"); 1445 break; 1446 case 32: 1447 if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 1448 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && 1449 Subtarget.hasNEON()) { 1450 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) 1451 .addFrameIndex(FI) 1452 .addImm(16) 1453 .addMemOperand(MMO) 1454 .add(predOps(ARMCC::AL)); 1455 } else { 1456 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1457 .addFrameIndex(FI) 1458 .add(predOps(ARMCC::AL)) 1459 .addMemOperand(MMO); 1460 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1461 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1462 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1463 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 1464 if (Register::isPhysicalRegister(DestReg)) 1465 MIB.addReg(DestReg, RegState::ImplicitDefine); 1466 } 1467 } else 1468 llvm_unreachable("Unknown reg class!"); 1469 break; 1470 case 64: 1471 if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 1472 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1473 .addFrameIndex(FI) 1474 .add(predOps(ARMCC::AL)) 1475 .addMemOperand(MMO); 1476 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1477 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1478 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1479 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 1480 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); 1481 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); 1482 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); 1483 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); 1484 if (Register::isPhysicalRegister(DestReg)) 1485 MIB.addReg(DestReg, RegState::ImplicitDefine); 1486 } else 1487 llvm_unreachable("Unknown reg class!"); 1488 break; 1489 default: 1490 llvm_unreachable("Unknown regclass!"); 1491 } 1492 } 1493 1494 unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 1495 int &FrameIndex) const { 1496 switch (MI.getOpcode()) { 1497 default: break; 1498 case ARM::LDRrs: 1499 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. 1500 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() && 1501 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 && 1502 MI.getOperand(3).getImm() == 0) { 1503 FrameIndex = MI.getOperand(1).getIndex(); 1504 return MI.getOperand(0).getReg(); 1505 } 1506 break; 1507 case ARM::LDRi12: 1508 case ARM::t2LDRi12: 1509 case ARM::tLDRspi: 1510 case ARM::VLDRD: 1511 case ARM::VLDRS: 1512 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 1513 MI.getOperand(2).getImm() == 0) { 1514 FrameIndex = MI.getOperand(1).getIndex(); 1515 return MI.getOperand(0).getReg(); 1516 } 1517 break; 1518 case ARM::VLDR_P0_off: 1519 if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && 1520 MI.getOperand(1).getImm() == 0) { 1521 FrameIndex = MI.getOperand(0).getIndex(); 1522 return ARM::P0; 1523 } 1524 break; 1525 case ARM::VLD1q64: 1526 case ARM::VLD1d8TPseudo: 1527 case ARM::VLD1d16TPseudo: 1528 case ARM::VLD1d32TPseudo: 1529 case ARM::VLD1d64TPseudo: 1530 case ARM::VLD1d8QPseudo: 1531 case ARM::VLD1d16QPseudo: 1532 case ARM::VLD1d32QPseudo: 1533 case ARM::VLD1d64QPseudo: 1534 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { 1535 FrameIndex = MI.getOperand(1).getIndex(); 1536 return MI.getOperand(0).getReg(); 1537 } 1538 break; 1539 case ARM::VLDMQIA: 1540 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { 1541 FrameIndex = MI.getOperand(1).getIndex(); 1542 return MI.getOperand(0).getReg(); 1543 } 1544 break; 1545 } 1546 1547 return 0; 1548 } 1549 1550 unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, 1551 int &FrameIndex) const { 1552 SmallVector<const MachineMemOperand *, 1> Accesses; 1553 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) && 1554 Accesses.size() == 1) { 1555 FrameIndex = 1556 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) 1557 ->getFrameIndex(); 1558 return true; 1559 } 1560 return false; 1561 } 1562 1563 /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD 1564 /// depending on whether the result is used. 1565 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { 1566 bool isThumb1 = Subtarget.isThumb1Only(); 1567 bool isThumb2 = Subtarget.isThumb2(); 1568 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo(); 1569 1570 DebugLoc dl = MI->getDebugLoc(); 1571 MachineBasicBlock *BB = MI->getParent(); 1572 1573 MachineInstrBuilder LDM, STM; 1574 if (isThumb1 || !MI->getOperand(1).isDead()) { 1575 MachineOperand LDWb(MI->getOperand(1)); 1576 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD 1577 : isThumb1 ? ARM::tLDMIA_UPD 1578 : ARM::LDMIA_UPD)) 1579 .add(LDWb); 1580 } else { 1581 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA)); 1582 } 1583 1584 if (isThumb1 || !MI->getOperand(0).isDead()) { 1585 MachineOperand STWb(MI->getOperand(0)); 1586 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD 1587 : isThumb1 ? ARM::tSTMIA_UPD 1588 : ARM::STMIA_UPD)) 1589 .add(STWb); 1590 } else { 1591 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA)); 1592 } 1593 1594 MachineOperand LDBase(MI->getOperand(3)); 1595 LDM.add(LDBase).add(predOps(ARMCC::AL)); 1596 1597 MachineOperand STBase(MI->getOperand(2)); 1598 STM.add(STBase).add(predOps(ARMCC::AL)); 1599 1600 // Sort the scratch registers into ascending order. 1601 const TargetRegisterInfo &TRI = getRegisterInfo(); 1602 SmallVector<unsigned, 6> ScratchRegs; 1603 for(unsigned I = 5; I < MI->getNumOperands(); ++I) 1604 ScratchRegs.push_back(MI->getOperand(I).getReg()); 1605 llvm::sort(ScratchRegs, 1606 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool { 1607 return TRI.getEncodingValue(Reg1) < 1608 TRI.getEncodingValue(Reg2); 1609 }); 1610 1611 for (const auto &Reg : ScratchRegs) { 1612 LDM.addReg(Reg, RegState::Define); 1613 STM.addReg(Reg, RegState::Kill); 1614 } 1615 1616 BB->erase(MI); 1617 } 1618 1619 bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1620 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { 1621 assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() && 1622 "LOAD_STACK_GUARD currently supported only for MachO."); 1623 expandLoadStackGuard(MI); 1624 MI.getParent()->erase(MI); 1625 return true; 1626 } 1627 1628 if (MI.getOpcode() == ARM::MEMCPY) { 1629 expandMEMCPY(MI); 1630 return true; 1631 } 1632 1633 // This hook gets to expand COPY instructions before they become 1634 // copyPhysReg() calls. Look for VMOVS instructions that can legally be 1635 // widened to VMOVD. We prefer the VMOVD when possible because it may be 1636 // changed into a VORR that can go down the NEON pipeline. 1637 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64()) 1638 return false; 1639 1640 // Look for a copy between even S-registers. That is where we keep floats 1641 // when using NEON v2f32 instructions for f32 arithmetic. 1642 Register DstRegS = MI.getOperand(0).getReg(); 1643 Register SrcRegS = MI.getOperand(1).getReg(); 1644 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) 1645 return false; 1646 1647 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1648 unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, 1649 &ARM::DPRRegClass); 1650 unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, 1651 &ARM::DPRRegClass); 1652 if (!DstRegD || !SrcRegD) 1653 return false; 1654 1655 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only 1656 // legal if the COPY already defines the full DstRegD, and it isn't a 1657 // sub-register insertion. 1658 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI)) 1659 return false; 1660 1661 // A dead copy shouldn't show up here, but reject it just in case. 1662 if (MI.getOperand(0).isDead()) 1663 return false; 1664 1665 // All clear, widen the COPY. 1666 LLVM_DEBUG(dbgs() << "widening: " << MI); 1667 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); 1668 1669 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg 1670 // or some other super-register. 1671 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD); 1672 if (ImpDefIdx != -1) 1673 MI.RemoveOperand(ImpDefIdx); 1674 1675 // Change the opcode and operands. 1676 MI.setDesc(get(ARM::VMOVD)); 1677 MI.getOperand(0).setReg(DstRegD); 1678 MI.getOperand(1).setReg(SrcRegD); 1679 MIB.add(predOps(ARMCC::AL)); 1680 1681 // We are now reading SrcRegD instead of SrcRegS. This may upset the 1682 // register scavenger and machine verifier, so we need to indicate that we 1683 // are reading an undefined value from SrcRegD, but a proper value from 1684 // SrcRegS. 1685 MI.getOperand(1).setIsUndef(); 1686 MIB.addReg(SrcRegS, RegState::Implicit); 1687 1688 // SrcRegD may actually contain an unrelated value in the ssub_1 1689 // sub-register. Don't kill it. Only kill the ssub_0 sub-register. 1690 if (MI.getOperand(1).isKill()) { 1691 MI.getOperand(1).setIsKill(false); 1692 MI.addRegisterKilled(SrcRegS, TRI, true); 1693 } 1694 1695 LLVM_DEBUG(dbgs() << "replaced by: " << MI); 1696 return true; 1697 } 1698 1699 /// Create a copy of a const pool value. Update CPI to the new index and return 1700 /// the label UID. 1701 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { 1702 MachineConstantPool *MCP = MF.getConstantPool(); 1703 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1704 1705 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; 1706 assert(MCPE.isMachineConstantPoolEntry() && 1707 "Expecting a machine constantpool entry!"); 1708 ARMConstantPoolValue *ACPV = 1709 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); 1710 1711 unsigned PCLabelId = AFI->createPICLabelUId(); 1712 ARMConstantPoolValue *NewCPV = nullptr; 1713 1714 // FIXME: The below assumes PIC relocation model and that the function 1715 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and 1716 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR 1717 // instructions, so that's probably OK, but is PIC always correct when 1718 // we get here? 1719 if (ACPV->isGlobalValue()) 1720 NewCPV = ARMConstantPoolConstant::Create( 1721 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue, 1722 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress()); 1723 else if (ACPV->isExtSymbol()) 1724 NewCPV = ARMConstantPoolSymbol:: 1725 Create(MF.getFunction().getContext(), 1726 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); 1727 else if (ACPV->isBlockAddress()) 1728 NewCPV = ARMConstantPoolConstant:: 1729 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, 1730 ARMCP::CPBlockAddress, 4); 1731 else if (ACPV->isLSDA()) 1732 NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId, 1733 ARMCP::CPLSDA, 4); 1734 else if (ACPV->isMachineBasicBlock()) 1735 NewCPV = ARMConstantPoolMBB:: 1736 Create(MF.getFunction().getContext(), 1737 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); 1738 else 1739 llvm_unreachable("Unexpected ARM constantpool value type!!"); 1740 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign()); 1741 return PCLabelId; 1742 } 1743 1744 void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, 1745 MachineBasicBlock::iterator I, 1746 Register DestReg, unsigned SubIdx, 1747 const MachineInstr &Orig, 1748 const TargetRegisterInfo &TRI) const { 1749 unsigned Opcode = Orig.getOpcode(); 1750 switch (Opcode) { 1751 default: { 1752 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); 1753 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI); 1754 MBB.insert(I, MI); 1755 break; 1756 } 1757 case ARM::tLDRpci_pic: 1758 case ARM::t2LDRpci_pic: { 1759 MachineFunction &MF = *MBB.getParent(); 1760 unsigned CPI = Orig.getOperand(1).getIndex(); 1761 unsigned PCLabelId = duplicateCPV(MF, CPI); 1762 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg) 1763 .addConstantPoolIndex(CPI) 1764 .addImm(PCLabelId) 1765 .cloneMemRefs(Orig); 1766 break; 1767 } 1768 } 1769 } 1770 1771 MachineInstr & 1772 ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB, 1773 MachineBasicBlock::iterator InsertBefore, 1774 const MachineInstr &Orig) const { 1775 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig); 1776 MachineBasicBlock::instr_iterator I = Cloned.getIterator(); 1777 for (;;) { 1778 switch (I->getOpcode()) { 1779 case ARM::tLDRpci_pic: 1780 case ARM::t2LDRpci_pic: { 1781 MachineFunction &MF = *MBB.getParent(); 1782 unsigned CPI = I->getOperand(1).getIndex(); 1783 unsigned PCLabelId = duplicateCPV(MF, CPI); 1784 I->getOperand(1).setIndex(CPI); 1785 I->getOperand(2).setImm(PCLabelId); 1786 break; 1787 } 1788 } 1789 if (!I->isBundledWithSucc()) 1790 break; 1791 ++I; 1792 } 1793 return Cloned; 1794 } 1795 1796 bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, 1797 const MachineInstr &MI1, 1798 const MachineRegisterInfo *MRI) const { 1799 unsigned Opcode = MI0.getOpcode(); 1800 if (Opcode == ARM::t2LDRpci || 1801 Opcode == ARM::t2LDRpci_pic || 1802 Opcode == ARM::tLDRpci || 1803 Opcode == ARM::tLDRpci_pic || 1804 Opcode == ARM::LDRLIT_ga_pcrel || 1805 Opcode == ARM::LDRLIT_ga_pcrel_ldr || 1806 Opcode == ARM::tLDRLIT_ga_pcrel || 1807 Opcode == ARM::MOV_ga_pcrel || 1808 Opcode == ARM::MOV_ga_pcrel_ldr || 1809 Opcode == ARM::t2MOV_ga_pcrel) { 1810 if (MI1.getOpcode() != Opcode) 1811 return false; 1812 if (MI0.getNumOperands() != MI1.getNumOperands()) 1813 return false; 1814 1815 const MachineOperand &MO0 = MI0.getOperand(1); 1816 const MachineOperand &MO1 = MI1.getOperand(1); 1817 if (MO0.getOffset() != MO1.getOffset()) 1818 return false; 1819 1820 if (Opcode == ARM::LDRLIT_ga_pcrel || 1821 Opcode == ARM::LDRLIT_ga_pcrel_ldr || 1822 Opcode == ARM::tLDRLIT_ga_pcrel || 1823 Opcode == ARM::MOV_ga_pcrel || 1824 Opcode == ARM::MOV_ga_pcrel_ldr || 1825 Opcode == ARM::t2MOV_ga_pcrel) 1826 // Ignore the PC labels. 1827 return MO0.getGlobal() == MO1.getGlobal(); 1828 1829 const MachineFunction *MF = MI0.getParent()->getParent(); 1830 const MachineConstantPool *MCP = MF->getConstantPool(); 1831 int CPI0 = MO0.getIndex(); 1832 int CPI1 = MO1.getIndex(); 1833 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; 1834 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; 1835 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); 1836 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); 1837 if (isARMCP0 && isARMCP1) { 1838 ARMConstantPoolValue *ACPV0 = 1839 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); 1840 ARMConstantPoolValue *ACPV1 = 1841 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); 1842 return ACPV0->hasSameValue(ACPV1); 1843 } else if (!isARMCP0 && !isARMCP1) { 1844 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; 1845 } 1846 return false; 1847 } else if (Opcode == ARM::PICLDR) { 1848 if (MI1.getOpcode() != Opcode) 1849 return false; 1850 if (MI0.getNumOperands() != MI1.getNumOperands()) 1851 return false; 1852 1853 Register Addr0 = MI0.getOperand(1).getReg(); 1854 Register Addr1 = MI1.getOperand(1).getReg(); 1855 if (Addr0 != Addr1) { 1856 if (!MRI || !Register::isVirtualRegister(Addr0) || 1857 !Register::isVirtualRegister(Addr1)) 1858 return false; 1859 1860 // This assumes SSA form. 1861 MachineInstr *Def0 = MRI->getVRegDef(Addr0); 1862 MachineInstr *Def1 = MRI->getVRegDef(Addr1); 1863 // Check if the loaded value, e.g. a constantpool of a global address, are 1864 // the same. 1865 if (!produceSameValue(*Def0, *Def1, MRI)) 1866 return false; 1867 } 1868 1869 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) { 1870 // %12 = PICLDR %11, 0, 14, %noreg 1871 const MachineOperand &MO0 = MI0.getOperand(i); 1872 const MachineOperand &MO1 = MI1.getOperand(i); 1873 if (!MO0.isIdenticalTo(MO1)) 1874 return false; 1875 } 1876 return true; 1877 } 1878 1879 return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); 1880 } 1881 1882 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to 1883 /// determine if two loads are loading from the same base address. It should 1884 /// only return true if the base pointers are the same and the only differences 1885 /// between the two addresses is the offset. It also returns the offsets by 1886 /// reference. 1887 /// 1888 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 1889 /// is permanently disabled. 1890 bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 1891 int64_t &Offset1, 1892 int64_t &Offset2) const { 1893 // Don't worry about Thumb: just ARM and Thumb2. 1894 if (Subtarget.isThumb1Only()) return false; 1895 1896 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 1897 return false; 1898 1899 switch (Load1->getMachineOpcode()) { 1900 default: 1901 return false; 1902 case ARM::LDRi12: 1903 case ARM::LDRBi12: 1904 case ARM::LDRD: 1905 case ARM::LDRH: 1906 case ARM::LDRSB: 1907 case ARM::LDRSH: 1908 case ARM::VLDRD: 1909 case ARM::VLDRS: 1910 case ARM::t2LDRi8: 1911 case ARM::t2LDRBi8: 1912 case ARM::t2LDRDi8: 1913 case ARM::t2LDRSHi8: 1914 case ARM::t2LDRi12: 1915 case ARM::t2LDRBi12: 1916 case ARM::t2LDRSHi12: 1917 break; 1918 } 1919 1920 switch (Load2->getMachineOpcode()) { 1921 default: 1922 return false; 1923 case ARM::LDRi12: 1924 case ARM::LDRBi12: 1925 case ARM::LDRD: 1926 case ARM::LDRH: 1927 case ARM::LDRSB: 1928 case ARM::LDRSH: 1929 case ARM::VLDRD: 1930 case ARM::VLDRS: 1931 case ARM::t2LDRi8: 1932 case ARM::t2LDRBi8: 1933 case ARM::t2LDRSHi8: 1934 case ARM::t2LDRi12: 1935 case ARM::t2LDRBi12: 1936 case ARM::t2LDRSHi12: 1937 break; 1938 } 1939 1940 // Check if base addresses and chain operands match. 1941 if (Load1->getOperand(0) != Load2->getOperand(0) || 1942 Load1->getOperand(4) != Load2->getOperand(4)) 1943 return false; 1944 1945 // Index should be Reg0. 1946 if (Load1->getOperand(3) != Load2->getOperand(3)) 1947 return false; 1948 1949 // Determine the offsets. 1950 if (isa<ConstantSDNode>(Load1->getOperand(1)) && 1951 isa<ConstantSDNode>(Load2->getOperand(1))) { 1952 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); 1953 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); 1954 return true; 1955 } 1956 1957 return false; 1958 } 1959 1960 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to 1961 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should 1962 /// be scheduled togther. On some targets if two loads are loading from 1963 /// addresses in the same cache line, it's better if they are scheduled 1964 /// together. This function takes two integers that represent the load offsets 1965 /// from the common base address. It returns true if it decides it's desirable 1966 /// to schedule the two loads together. "NumLoads" is the number of loads that 1967 /// have already been scheduled after Load1. 1968 /// 1969 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 1970 /// is permanently disabled. 1971 bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 1972 int64_t Offset1, int64_t Offset2, 1973 unsigned NumLoads) const { 1974 // Don't worry about Thumb: just ARM and Thumb2. 1975 if (Subtarget.isThumb1Only()) return false; 1976 1977 assert(Offset2 > Offset1); 1978 1979 if ((Offset2 - Offset1) / 8 > 64) 1980 return false; 1981 1982 // Check if the machine opcodes are different. If they are different 1983 // then we consider them to not be of the same base address, 1984 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. 1985 // In this case, they are considered to be the same because they are different 1986 // encoding forms of the same basic instruction. 1987 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && 1988 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && 1989 Load2->getMachineOpcode() == ARM::t2LDRBi12) || 1990 (Load1->getMachineOpcode() == ARM::t2LDRBi12 && 1991 Load2->getMachineOpcode() == ARM::t2LDRBi8))) 1992 return false; // FIXME: overly conservative? 1993 1994 // Four loads in a row should be sufficient. 1995 if (NumLoads >= 3) 1996 return false; 1997 1998 return true; 1999 } 2000 2001 bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, 2002 const MachineBasicBlock *MBB, 2003 const MachineFunction &MF) const { 2004 // Debug info is never a scheduling boundary. It's necessary to be explicit 2005 // due to the special treatment of IT instructions below, otherwise a 2006 // dbg_value followed by an IT will result in the IT instruction being 2007 // considered a scheduling hazard, which is wrong. It should be the actual 2008 // instruction preceding the dbg_value instruction(s), just like it is 2009 // when debug info is not present. 2010 if (MI.isDebugInstr()) 2011 return false; 2012 2013 // Terminators and labels can't be scheduled around. 2014 if (MI.isTerminator() || MI.isPosition()) 2015 return true; 2016 2017 // Treat the start of the IT block as a scheduling boundary, but schedule 2018 // t2IT along with all instructions following it. 2019 // FIXME: This is a big hammer. But the alternative is to add all potential 2020 // true and anti dependencies to IT block instructions as implicit operands 2021 // to the t2IT instruction. The added compile time and complexity does not 2022 // seem worth it. 2023 MachineBasicBlock::const_iterator I = MI; 2024 // Make sure to skip any debug instructions 2025 while (++I != MBB->end() && I->isDebugInstr()) 2026 ; 2027 if (I != MBB->end() && I->getOpcode() == ARM::t2IT) 2028 return true; 2029 2030 // Don't attempt to schedule around any instruction that defines 2031 // a stack-oriented pointer, as it's unlikely to be profitable. This 2032 // saves compile time, because it doesn't require every single 2033 // stack slot reference to depend on the instruction that does the 2034 // modification. 2035 // Calls don't actually change the stack pointer, even if they have imp-defs. 2036 // No ARM calling conventions change the stack pointer. (X86 calling 2037 // conventions sometimes do). 2038 if (!MI.isCall() && MI.definesRegister(ARM::SP)) 2039 return true; 2040 2041 return false; 2042 } 2043 2044 bool ARMBaseInstrInfo:: 2045 isProfitableToIfCvt(MachineBasicBlock &MBB, 2046 unsigned NumCycles, unsigned ExtraPredCycles, 2047 BranchProbability Probability) const { 2048 if (!NumCycles) 2049 return false; 2050 2051 // If we are optimizing for size, see if the branch in the predecessor can be 2052 // lowered to cbn?z by the constant island lowering pass, and return false if 2053 // so. This results in a shorter instruction sequence. 2054 if (MBB.getParent()->getFunction().hasOptSize()) { 2055 MachineBasicBlock *Pred = *MBB.pred_begin(); 2056 if (!Pred->empty()) { 2057 MachineInstr *LastMI = &*Pred->rbegin(); 2058 if (LastMI->getOpcode() == ARM::t2Bcc) { 2059 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2060 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI); 2061 if (CmpMI) 2062 return false; 2063 } 2064 } 2065 } 2066 return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles, 2067 MBB, 0, 0, Probability); 2068 } 2069 2070 bool ARMBaseInstrInfo:: 2071 isProfitableToIfCvt(MachineBasicBlock &TBB, 2072 unsigned TCycles, unsigned TExtra, 2073 MachineBasicBlock &FBB, 2074 unsigned FCycles, unsigned FExtra, 2075 BranchProbability Probability) const { 2076 if (!TCycles) 2077 return false; 2078 2079 // In thumb code we often end up trading one branch for a IT block, and 2080 // if we are cloning the instruction can increase code size. Prevent 2081 // blocks with multiple predecesors from being ifcvted to prevent this 2082 // cloning. 2083 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) { 2084 if (TBB.pred_size() != 1 || FBB.pred_size() != 1) 2085 return false; 2086 } 2087 2088 // Attempt to estimate the relative costs of predication versus branching. 2089 // Here we scale up each component of UnpredCost to avoid precision issue when 2090 // scaling TCycles/FCycles by Probability. 2091 const unsigned ScalingUpFactor = 1024; 2092 2093 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor; 2094 unsigned UnpredCost; 2095 if (!Subtarget.hasBranchPredictor()) { 2096 // When we don't have a branch predictor it's always cheaper to not take a 2097 // branch than take it, so we have to take that into account. 2098 unsigned NotTakenBranchCost = 1; 2099 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty(); 2100 unsigned TUnpredCycles, FUnpredCycles; 2101 if (!FCycles) { 2102 // Triangle: TBB is the fallthrough 2103 TUnpredCycles = TCycles + NotTakenBranchCost; 2104 FUnpredCycles = TakenBranchCost; 2105 } else { 2106 // Diamond: TBB is the block that is branched to, FBB is the fallthrough 2107 TUnpredCycles = TCycles + TakenBranchCost; 2108 FUnpredCycles = FCycles + NotTakenBranchCost; 2109 // The branch at the end of FBB will disappear when it's predicated, so 2110 // discount it from PredCost. 2111 PredCost -= 1 * ScalingUpFactor; 2112 } 2113 // The total cost is the cost of each path scaled by their probabilites 2114 unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor); 2115 unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor); 2116 UnpredCost = TUnpredCost + FUnpredCost; 2117 // When predicating assume that the first IT can be folded away but later 2118 // ones cost one cycle each 2119 if (Subtarget.isThumb2() && TCycles + FCycles > 4) { 2120 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor; 2121 } 2122 } else { 2123 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); 2124 unsigned FUnpredCost = 2125 Probability.getCompl().scale(FCycles * ScalingUpFactor); 2126 UnpredCost = TUnpredCost + FUnpredCost; 2127 UnpredCost += 1 * ScalingUpFactor; // The branch itself 2128 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; 2129 } 2130 2131 return PredCost <= UnpredCost; 2132 } 2133 2134 unsigned 2135 ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF, 2136 unsigned NumInsts) const { 2137 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions. 2138 // ARM has a condition code field in every predicable instruction, using it 2139 // doesn't change code size. 2140 return Subtarget.isThumb2() ? divideCeil(NumInsts, 4) * 2 : 0; 2141 } 2142 2143 unsigned 2144 ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const { 2145 // If this branch is likely to be folded into the comparison to form a 2146 // CB(N)Z, then removing it won't reduce code size at all, because that will 2147 // just replace the CB(N)Z with a CMP. 2148 if (MI.getOpcode() == ARM::t2Bcc && 2149 findCMPToFoldIntoCBZ(&MI, &getRegisterInfo())) 2150 return 0; 2151 2152 unsigned Size = getInstSizeInBytes(MI); 2153 2154 // For Thumb2, all branches are 32-bit instructions during the if conversion 2155 // pass, but may be replaced with 16-bit instructions during size reduction. 2156 // Since the branches considered by if conversion tend to be forward branches 2157 // over small basic blocks, they are very likely to be in range for the 2158 // narrow instructions, so we assume the final code size will be half what it 2159 // currently is. 2160 if (Subtarget.isThumb2()) 2161 Size /= 2; 2162 2163 return Size; 2164 } 2165 2166 bool 2167 ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 2168 MachineBasicBlock &FMBB) const { 2169 // Reduce false anti-dependencies to let the target's out-of-order execution 2170 // engine do its thing. 2171 return Subtarget.isProfitableToUnpredicate(); 2172 } 2173 2174 /// getInstrPredicate - If instruction is predicated, returns its predicate 2175 /// condition, otherwise returns AL. It also returns the condition code 2176 /// register by reference. 2177 ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI, 2178 Register &PredReg) { 2179 int PIdx = MI.findFirstPredOperandIdx(); 2180 if (PIdx == -1) { 2181 PredReg = 0; 2182 return ARMCC::AL; 2183 } 2184 2185 PredReg = MI.getOperand(PIdx+1).getReg(); 2186 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); 2187 } 2188 2189 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { 2190 if (Opc == ARM::B) 2191 return ARM::Bcc; 2192 if (Opc == ARM::tB) 2193 return ARM::tBcc; 2194 if (Opc == ARM::t2B) 2195 return ARM::t2Bcc; 2196 2197 llvm_unreachable("Unknown unconditional branch opcode!"); 2198 } 2199 2200 MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, 2201 bool NewMI, 2202 unsigned OpIdx1, 2203 unsigned OpIdx2) const { 2204 switch (MI.getOpcode()) { 2205 case ARM::MOVCCr: 2206 case ARM::t2MOVCCr: { 2207 // MOVCC can be commuted by inverting the condition. 2208 Register PredReg; 2209 ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); 2210 // MOVCC AL can't be inverted. Shouldn't happen. 2211 if (CC == ARMCC::AL || PredReg != ARM::CPSR) 2212 return nullptr; 2213 MachineInstr *CommutedMI = 2214 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 2215 if (!CommutedMI) 2216 return nullptr; 2217 // After swapping the MOVCC operands, also invert the condition. 2218 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx()) 2219 .setImm(ARMCC::getOppositeCondition(CC)); 2220 return CommutedMI; 2221 } 2222 } 2223 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 2224 } 2225 2226 /// Identify instructions that can be folded into a MOVCC instruction, and 2227 /// return the defining instruction. 2228 MachineInstr * 2229 ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI, 2230 const TargetInstrInfo *TII) const { 2231 if (!Reg.isVirtual()) 2232 return nullptr; 2233 if (!MRI.hasOneNonDBGUse(Reg)) 2234 return nullptr; 2235 MachineInstr *MI = MRI.getVRegDef(Reg); 2236 if (!MI) 2237 return nullptr; 2238 // Check if MI can be predicated and folded into the MOVCC. 2239 if (!isPredicable(*MI)) 2240 return nullptr; 2241 // Check if MI has any non-dead defs or physreg uses. This also detects 2242 // predicated instructions which will be reading CPSR. 2243 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 2244 const MachineOperand &MO = MI->getOperand(i); 2245 // Reject frame index operands, PEI can't handle the predicated pseudos. 2246 if (MO.isFI() || MO.isCPI() || MO.isJTI()) 2247 return nullptr; 2248 if (!MO.isReg()) 2249 continue; 2250 // MI can't have any tied operands, that would conflict with predication. 2251 if (MO.isTied()) 2252 return nullptr; 2253 if (Register::isPhysicalRegister(MO.getReg())) 2254 return nullptr; 2255 if (MO.isDef() && !MO.isDead()) 2256 return nullptr; 2257 } 2258 bool DontMoveAcrossStores = true; 2259 if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores)) 2260 return nullptr; 2261 return MI; 2262 } 2263 2264 bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI, 2265 SmallVectorImpl<MachineOperand> &Cond, 2266 unsigned &TrueOp, unsigned &FalseOp, 2267 bool &Optimizable) const { 2268 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && 2269 "Unknown select instruction"); 2270 // MOVCC operands: 2271 // 0: Def. 2272 // 1: True use. 2273 // 2: False use. 2274 // 3: Condition code. 2275 // 4: CPSR use. 2276 TrueOp = 1; 2277 FalseOp = 2; 2278 Cond.push_back(MI.getOperand(3)); 2279 Cond.push_back(MI.getOperand(4)); 2280 // We can always fold a def. 2281 Optimizable = true; 2282 return false; 2283 } 2284 2285 MachineInstr * 2286 ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, 2287 SmallPtrSetImpl<MachineInstr *> &SeenMIs, 2288 bool PreferFalse) const { 2289 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && 2290 "Unknown select instruction"); 2291 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 2292 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this); 2293 bool Invert = !DefMI; 2294 if (!DefMI) 2295 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this); 2296 if (!DefMI) 2297 return nullptr; 2298 2299 // Find new register class to use. 2300 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1); 2301 Register DestReg = MI.getOperand(0).getReg(); 2302 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); 2303 if (!MRI.constrainRegClass(DestReg, PreviousClass)) 2304 return nullptr; 2305 2306 // Create a new predicated version of DefMI. 2307 // Rfalse is the first use. 2308 MachineInstrBuilder NewMI = 2309 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg); 2310 2311 // Copy all the DefMI operands, excluding its (null) predicate. 2312 const MCInstrDesc &DefDesc = DefMI->getDesc(); 2313 for (unsigned i = 1, e = DefDesc.getNumOperands(); 2314 i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) 2315 NewMI.add(DefMI->getOperand(i)); 2316 2317 unsigned CondCode = MI.getOperand(3).getImm(); 2318 if (Invert) 2319 NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); 2320 else 2321 NewMI.addImm(CondCode); 2322 NewMI.add(MI.getOperand(4)); 2323 2324 // DefMI is not the -S version that sets CPSR, so add an optional %noreg. 2325 if (NewMI->hasOptionalDef()) 2326 NewMI.add(condCodeOp()); 2327 2328 // The output register value when the predicate is false is an implicit 2329 // register operand tied to the first def. 2330 // The tie makes the register allocator ensure the FalseReg is allocated the 2331 // same register as operand 0. 2332 FalseReg.setImplicit(); 2333 NewMI.add(FalseReg); 2334 NewMI->tieOperands(0, NewMI->getNumOperands() - 1); 2335 2336 // Update SeenMIs set: register newly created MI and erase removed DefMI. 2337 SeenMIs.insert(NewMI); 2338 SeenMIs.erase(DefMI); 2339 2340 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on 2341 // DefMI would be invalid when tranferred inside the loop. Checking for a 2342 // loop is expensive, but at least remove kill flags if they are in different 2343 // BBs. 2344 if (DefMI->getParent() != MI.getParent()) 2345 NewMI->clearKillInfo(); 2346 2347 // The caller will erase MI, but not DefMI. 2348 DefMI->eraseFromParent(); 2349 return NewMI; 2350 } 2351 2352 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the 2353 /// instruction is encoded with an 'S' bit is determined by the optional CPSR 2354 /// def operand. 2355 /// 2356 /// This will go away once we can teach tblgen how to set the optional CPSR def 2357 /// operand itself. 2358 struct AddSubFlagsOpcodePair { 2359 uint16_t PseudoOpc; 2360 uint16_t MachineOpc; 2361 }; 2362 2363 static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { 2364 {ARM::ADDSri, ARM::ADDri}, 2365 {ARM::ADDSrr, ARM::ADDrr}, 2366 {ARM::ADDSrsi, ARM::ADDrsi}, 2367 {ARM::ADDSrsr, ARM::ADDrsr}, 2368 2369 {ARM::SUBSri, ARM::SUBri}, 2370 {ARM::SUBSrr, ARM::SUBrr}, 2371 {ARM::SUBSrsi, ARM::SUBrsi}, 2372 {ARM::SUBSrsr, ARM::SUBrsr}, 2373 2374 {ARM::RSBSri, ARM::RSBri}, 2375 {ARM::RSBSrsi, ARM::RSBrsi}, 2376 {ARM::RSBSrsr, ARM::RSBrsr}, 2377 2378 {ARM::tADDSi3, ARM::tADDi3}, 2379 {ARM::tADDSi8, ARM::tADDi8}, 2380 {ARM::tADDSrr, ARM::tADDrr}, 2381 {ARM::tADCS, ARM::tADC}, 2382 2383 {ARM::tSUBSi3, ARM::tSUBi3}, 2384 {ARM::tSUBSi8, ARM::tSUBi8}, 2385 {ARM::tSUBSrr, ARM::tSUBrr}, 2386 {ARM::tSBCS, ARM::tSBC}, 2387 {ARM::tRSBS, ARM::tRSB}, 2388 {ARM::tLSLSri, ARM::tLSLri}, 2389 2390 {ARM::t2ADDSri, ARM::t2ADDri}, 2391 {ARM::t2ADDSrr, ARM::t2ADDrr}, 2392 {ARM::t2ADDSrs, ARM::t2ADDrs}, 2393 2394 {ARM::t2SUBSri, ARM::t2SUBri}, 2395 {ARM::t2SUBSrr, ARM::t2SUBrr}, 2396 {ARM::t2SUBSrs, ARM::t2SUBrs}, 2397 2398 {ARM::t2RSBSri, ARM::t2RSBri}, 2399 {ARM::t2RSBSrs, ARM::t2RSBrs}, 2400 }; 2401 2402 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { 2403 for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i) 2404 if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc) 2405 return AddSubFlagsOpcodeMap[i].MachineOpc; 2406 return 0; 2407 } 2408 2409 void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, 2410 MachineBasicBlock::iterator &MBBI, 2411 const DebugLoc &dl, Register DestReg, 2412 Register BaseReg, int NumBytes, 2413 ARMCC::CondCodes Pred, Register PredReg, 2414 const ARMBaseInstrInfo &TII, 2415 unsigned MIFlags) { 2416 if (NumBytes == 0 && DestReg != BaseReg) { 2417 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) 2418 .addReg(BaseReg, RegState::Kill) 2419 .add(predOps(Pred, PredReg)) 2420 .add(condCodeOp()) 2421 .setMIFlags(MIFlags); 2422 return; 2423 } 2424 2425 bool isSub = NumBytes < 0; 2426 if (isSub) NumBytes = -NumBytes; 2427 2428 while (NumBytes) { 2429 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); 2430 unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); 2431 assert(ThisVal && "Didn't extract field correctly"); 2432 2433 // We will handle these bits from offset, clear them. 2434 NumBytes &= ~ThisVal; 2435 2436 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); 2437 2438 // Build the new ADD / SUB. 2439 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; 2440 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) 2441 .addReg(BaseReg, RegState::Kill) 2442 .addImm(ThisVal) 2443 .add(predOps(Pred, PredReg)) 2444 .add(condCodeOp()) 2445 .setMIFlags(MIFlags); 2446 BaseReg = DestReg; 2447 } 2448 } 2449 2450 bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, 2451 MachineFunction &MF, MachineInstr *MI, 2452 unsigned NumBytes) { 2453 // This optimisation potentially adds lots of load and store 2454 // micro-operations, it's only really a great benefit to code-size. 2455 if (!Subtarget.hasMinSize()) 2456 return false; 2457 2458 // If only one register is pushed/popped, LLVM can use an LDR/STR 2459 // instead. We can't modify those so make sure we're dealing with an 2460 // instruction we understand. 2461 bool IsPop = isPopOpcode(MI->getOpcode()); 2462 bool IsPush = isPushOpcode(MI->getOpcode()); 2463 if (!IsPush && !IsPop) 2464 return false; 2465 2466 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || 2467 MI->getOpcode() == ARM::VLDMDIA_UPD; 2468 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || 2469 MI->getOpcode() == ARM::tPOP || 2470 MI->getOpcode() == ARM::tPOP_RET; 2471 2472 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && 2473 MI->getOperand(1).getReg() == ARM::SP)) && 2474 "trying to fold sp update into non-sp-updating push/pop"); 2475 2476 // The VFP push & pop act on D-registers, so we can only fold an adjustment 2477 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try 2478 // if this is violated. 2479 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0) 2480 return false; 2481 2482 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ 2483 // pred) so the list starts at 4. Thumb1 starts after the predicate. 2484 int RegListIdx = IsT1PushPop ? 2 : 4; 2485 2486 // Calculate the space we'll need in terms of registers. 2487 unsigned RegsNeeded; 2488 const TargetRegisterClass *RegClass; 2489 if (IsVFPPushPop) { 2490 RegsNeeded = NumBytes / 8; 2491 RegClass = &ARM::DPRRegClass; 2492 } else { 2493 RegsNeeded = NumBytes / 4; 2494 RegClass = &ARM::GPRRegClass; 2495 } 2496 2497 // We're going to have to strip all list operands off before 2498 // re-adding them since the order matters, so save the existing ones 2499 // for later. 2500 SmallVector<MachineOperand, 4> RegList; 2501 2502 // We're also going to need the first register transferred by this 2503 // instruction, which won't necessarily be the first register in the list. 2504 unsigned FirstRegEnc = -1; 2505 2506 const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); 2507 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) { 2508 MachineOperand &MO = MI->getOperand(i); 2509 RegList.push_back(MO); 2510 2511 if (MO.isReg() && !MO.isImplicit() && 2512 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc) 2513 FirstRegEnc = TRI->getEncodingValue(MO.getReg()); 2514 } 2515 2516 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); 2517 2518 // Now try to find enough space in the reglist to allocate NumBytes. 2519 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded; 2520 --CurRegEnc) { 2521 unsigned CurReg = RegClass->getRegister(CurRegEnc); 2522 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7)) 2523 continue; 2524 if (!IsPop) { 2525 // Pushing any register is completely harmless, mark the register involved 2526 // as undef since we don't care about its value and must not restore it 2527 // during stack unwinding. 2528 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false, 2529 false, false, true)); 2530 --RegsNeeded; 2531 continue; 2532 } 2533 2534 // However, we can only pop an extra register if it's not live. For 2535 // registers live within the function we might clobber a return value 2536 // register; the other way a register can be live here is if it's 2537 // callee-saved. 2538 if (isCalleeSavedRegister(CurReg, CSRegs) || 2539 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) != 2540 MachineBasicBlock::LQR_Dead) { 2541 // VFP pops don't allow holes in the register list, so any skip is fatal 2542 // for our transformation. GPR pops do, so we should just keep looking. 2543 if (IsVFPPushPop) 2544 return false; 2545 else 2546 continue; 2547 } 2548 2549 // Mark the unimportant registers as <def,dead> in the POP. 2550 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false, 2551 true)); 2552 --RegsNeeded; 2553 } 2554 2555 if (RegsNeeded > 0) 2556 return false; 2557 2558 // Finally we know we can profitably perform the optimisation so go 2559 // ahead: strip all existing registers off and add them back again 2560 // in the right order. 2561 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) 2562 MI->RemoveOperand(i); 2563 2564 // Add the complete list back in. 2565 MachineInstrBuilder MIB(MF, &*MI); 2566 for (int i = RegList.size() - 1; i >= 0; --i) 2567 MIB.add(RegList[i]); 2568 2569 return true; 2570 } 2571 2572 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 2573 Register FrameReg, int &Offset, 2574 const ARMBaseInstrInfo &TII) { 2575 unsigned Opcode = MI.getOpcode(); 2576 const MCInstrDesc &Desc = MI.getDesc(); 2577 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); 2578 bool isSub = false; 2579 2580 // Memory operands in inline assembly always use AddrMode2. 2581 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) 2582 AddrMode = ARMII::AddrMode2; 2583 2584 if (Opcode == ARM::ADDri) { 2585 Offset += MI.getOperand(FrameRegIdx+1).getImm(); 2586 if (Offset == 0) { 2587 // Turn it into a move. 2588 MI.setDesc(TII.get(ARM::MOVr)); 2589 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2590 MI.RemoveOperand(FrameRegIdx+1); 2591 Offset = 0; 2592 return true; 2593 } else if (Offset < 0) { 2594 Offset = -Offset; 2595 isSub = true; 2596 MI.setDesc(TII.get(ARM::SUBri)); 2597 } 2598 2599 // Common case: small offset, fits into instruction. 2600 if (ARM_AM::getSOImmVal(Offset) != -1) { 2601 // Replace the FrameIndex with sp / fp 2602 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2603 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); 2604 Offset = 0; 2605 return true; 2606 } 2607 2608 // Otherwise, pull as much of the immedidate into this ADDri/SUBri 2609 // as possible. 2610 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); 2611 unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); 2612 2613 // We will handle these bits from offset, clear them. 2614 Offset &= ~ThisImmVal; 2615 2616 // Get the properly encoded SOImmVal field. 2617 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && 2618 "Bit extraction didn't work?"); 2619 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); 2620 } else { 2621 unsigned ImmIdx = 0; 2622 int InstrOffs = 0; 2623 unsigned NumBits = 0; 2624 unsigned Scale = 1; 2625 switch (AddrMode) { 2626 case ARMII::AddrMode_i12: 2627 ImmIdx = FrameRegIdx + 1; 2628 InstrOffs = MI.getOperand(ImmIdx).getImm(); 2629 NumBits = 12; 2630 break; 2631 case ARMII::AddrMode2: 2632 ImmIdx = FrameRegIdx+2; 2633 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); 2634 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2635 InstrOffs *= -1; 2636 NumBits = 12; 2637 break; 2638 case ARMII::AddrMode3: 2639 ImmIdx = FrameRegIdx+2; 2640 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); 2641 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2642 InstrOffs *= -1; 2643 NumBits = 8; 2644 break; 2645 case ARMII::AddrMode4: 2646 case ARMII::AddrMode6: 2647 // Can't fold any offset even if it's zero. 2648 return false; 2649 case ARMII::AddrMode5: 2650 ImmIdx = FrameRegIdx+1; 2651 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); 2652 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2653 InstrOffs *= -1; 2654 NumBits = 8; 2655 Scale = 4; 2656 break; 2657 case ARMII::AddrMode5FP16: 2658 ImmIdx = FrameRegIdx+1; 2659 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); 2660 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2661 InstrOffs *= -1; 2662 NumBits = 8; 2663 Scale = 2; 2664 break; 2665 case ARMII::AddrModeT2_i7: 2666 case ARMII::AddrModeT2_i7s2: 2667 case ARMII::AddrModeT2_i7s4: 2668 ImmIdx = FrameRegIdx+1; 2669 InstrOffs = MI.getOperand(ImmIdx).getImm(); 2670 NumBits = 7; 2671 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 : 2672 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1); 2673 break; 2674 default: 2675 llvm_unreachable("Unsupported addressing mode!"); 2676 } 2677 2678 Offset += InstrOffs * Scale; 2679 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); 2680 if (Offset < 0) { 2681 Offset = -Offset; 2682 isSub = true; 2683 } 2684 2685 // Attempt to fold address comp. if opcode has offset bits 2686 if (NumBits > 0) { 2687 // Common case: small offset, fits into instruction. 2688 MachineOperand &ImmOp = MI.getOperand(ImmIdx); 2689 int ImmedOffset = Offset / Scale; 2690 unsigned Mask = (1 << NumBits) - 1; 2691 if ((unsigned)Offset <= Mask * Scale) { 2692 // Replace the FrameIndex with sp 2693 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2694 // FIXME: When addrmode2 goes away, this will simplify (like the 2695 // T2 version), as the LDR.i12 versions don't need the encoding 2696 // tricks for the offset value. 2697 if (isSub) { 2698 if (AddrMode == ARMII::AddrMode_i12) 2699 ImmedOffset = -ImmedOffset; 2700 else 2701 ImmedOffset |= 1 << NumBits; 2702 } 2703 ImmOp.ChangeToImmediate(ImmedOffset); 2704 Offset = 0; 2705 return true; 2706 } 2707 2708 // Otherwise, it didn't fit. Pull in what we can to simplify the immed. 2709 ImmedOffset = ImmedOffset & Mask; 2710 if (isSub) { 2711 if (AddrMode == ARMII::AddrMode_i12) 2712 ImmedOffset = -ImmedOffset; 2713 else 2714 ImmedOffset |= 1 << NumBits; 2715 } 2716 ImmOp.ChangeToImmediate(ImmedOffset); 2717 Offset &= ~(Mask*Scale); 2718 } 2719 } 2720 2721 Offset = (isSub) ? -Offset : Offset; 2722 return Offset == 0; 2723 } 2724 2725 /// analyzeCompare - For a comparison instruction, return the source registers 2726 /// in SrcReg and SrcReg2 if having two register operands, and the value it 2727 /// compares against in CmpValue. Return true if the comparison instruction 2728 /// can be analyzed. 2729 bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, 2730 Register &SrcReg2, int &CmpMask, 2731 int &CmpValue) const { 2732 switch (MI.getOpcode()) { 2733 default: break; 2734 case ARM::CMPri: 2735 case ARM::t2CMPri: 2736 case ARM::tCMPi8: 2737 SrcReg = MI.getOperand(0).getReg(); 2738 SrcReg2 = 0; 2739 CmpMask = ~0; 2740 CmpValue = MI.getOperand(1).getImm(); 2741 return true; 2742 case ARM::CMPrr: 2743 case ARM::t2CMPrr: 2744 case ARM::tCMPr: 2745 SrcReg = MI.getOperand(0).getReg(); 2746 SrcReg2 = MI.getOperand(1).getReg(); 2747 CmpMask = ~0; 2748 CmpValue = 0; 2749 return true; 2750 case ARM::TSTri: 2751 case ARM::t2TSTri: 2752 SrcReg = MI.getOperand(0).getReg(); 2753 SrcReg2 = 0; 2754 CmpMask = MI.getOperand(1).getImm(); 2755 CmpValue = 0; 2756 return true; 2757 } 2758 2759 return false; 2760 } 2761 2762 /// isSuitableForMask - Identify a suitable 'and' instruction that 2763 /// operates on the given source register and applies the same mask 2764 /// as a 'tst' instruction. Provide a limited look-through for copies. 2765 /// When successful, MI will hold the found instruction. 2766 static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, 2767 int CmpMask, bool CommonUse) { 2768 switch (MI->getOpcode()) { 2769 case ARM::ANDri: 2770 case ARM::t2ANDri: 2771 if (CmpMask != MI->getOperand(2).getImm()) 2772 return false; 2773 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) 2774 return true; 2775 break; 2776 } 2777 2778 return false; 2779 } 2780 2781 /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return 2782 /// the condition code if we modify the instructions such that flags are 2783 /// set by ADD(a,b,X). 2784 inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) { 2785 switch (CC) { 2786 default: return ARMCC::AL; 2787 case ARMCC::HS: return ARMCC::LO; 2788 case ARMCC::LO: return ARMCC::HS; 2789 case ARMCC::VS: return ARMCC::VS; 2790 case ARMCC::VC: return ARMCC::VC; 2791 } 2792 } 2793 2794 /// isRedundantFlagInstr - check whether the first instruction, whose only 2795 /// purpose is to update flags, can be made redundant. 2796 /// CMPrr can be made redundant by SUBrr if the operands are the same. 2797 /// CMPri can be made redundant by SUBri if the operands are the same. 2798 /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X). 2799 /// This function can be extended later on. 2800 inline static bool isRedundantFlagInstr(const MachineInstr *CmpI, 2801 Register SrcReg, Register SrcReg2, 2802 int ImmValue, const MachineInstr *OI, 2803 bool &IsThumb1) { 2804 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && 2805 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) && 2806 ((OI->getOperand(1).getReg() == SrcReg && 2807 OI->getOperand(2).getReg() == SrcReg2) || 2808 (OI->getOperand(1).getReg() == SrcReg2 && 2809 OI->getOperand(2).getReg() == SrcReg))) { 2810 IsThumb1 = false; 2811 return true; 2812 } 2813 2814 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr && 2815 ((OI->getOperand(2).getReg() == SrcReg && 2816 OI->getOperand(3).getReg() == SrcReg2) || 2817 (OI->getOperand(2).getReg() == SrcReg2 && 2818 OI->getOperand(3).getReg() == SrcReg))) { 2819 IsThumb1 = true; 2820 return true; 2821 } 2822 2823 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) && 2824 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) && 2825 OI->getOperand(1).getReg() == SrcReg && 2826 OI->getOperand(2).getImm() == ImmValue) { 2827 IsThumb1 = false; 2828 return true; 2829 } 2830 2831 if (CmpI->getOpcode() == ARM::tCMPi8 && 2832 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) && 2833 OI->getOperand(2).getReg() == SrcReg && 2834 OI->getOperand(3).getImm() == ImmValue) { 2835 IsThumb1 = true; 2836 return true; 2837 } 2838 2839 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && 2840 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr || 2841 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) && 2842 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() && 2843 OI->getOperand(0).getReg() == SrcReg && 2844 OI->getOperand(1).getReg() == SrcReg2) { 2845 IsThumb1 = false; 2846 return true; 2847 } 2848 2849 if (CmpI->getOpcode() == ARM::tCMPr && 2850 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 || 2851 OI->getOpcode() == ARM::tADDrr) && 2852 OI->getOperand(0).getReg() == SrcReg && 2853 OI->getOperand(2).getReg() == SrcReg2) { 2854 IsThumb1 = true; 2855 return true; 2856 } 2857 2858 return false; 2859 } 2860 2861 static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { 2862 switch (MI->getOpcode()) { 2863 default: return false; 2864 case ARM::tLSLri: 2865 case ARM::tLSRri: 2866 case ARM::tLSLrr: 2867 case ARM::tLSRrr: 2868 case ARM::tSUBrr: 2869 case ARM::tADDrr: 2870 case ARM::tADDi3: 2871 case ARM::tADDi8: 2872 case ARM::tSUBi3: 2873 case ARM::tSUBi8: 2874 case ARM::tMUL: 2875 case ARM::tADC: 2876 case ARM::tSBC: 2877 case ARM::tRSB: 2878 case ARM::tAND: 2879 case ARM::tORR: 2880 case ARM::tEOR: 2881 case ARM::tBIC: 2882 case ARM::tMVN: 2883 case ARM::tASRri: 2884 case ARM::tASRrr: 2885 case ARM::tROR: 2886 IsThumb1 = true; 2887 LLVM_FALLTHROUGH; 2888 case ARM::RSBrr: 2889 case ARM::RSBri: 2890 case ARM::RSCrr: 2891 case ARM::RSCri: 2892 case ARM::ADDrr: 2893 case ARM::ADDri: 2894 case ARM::ADCrr: 2895 case ARM::ADCri: 2896 case ARM::SUBrr: 2897 case ARM::SUBri: 2898 case ARM::SBCrr: 2899 case ARM::SBCri: 2900 case ARM::t2RSBri: 2901 case ARM::t2ADDrr: 2902 case ARM::t2ADDri: 2903 case ARM::t2ADCrr: 2904 case ARM::t2ADCri: 2905 case ARM::t2SUBrr: 2906 case ARM::t2SUBri: 2907 case ARM::t2SBCrr: 2908 case ARM::t2SBCri: 2909 case ARM::ANDrr: 2910 case ARM::ANDri: 2911 case ARM::t2ANDrr: 2912 case ARM::t2ANDri: 2913 case ARM::ORRrr: 2914 case ARM::ORRri: 2915 case ARM::t2ORRrr: 2916 case ARM::t2ORRri: 2917 case ARM::EORrr: 2918 case ARM::EORri: 2919 case ARM::t2EORrr: 2920 case ARM::t2EORri: 2921 case ARM::t2LSRri: 2922 case ARM::t2LSRrr: 2923 case ARM::t2LSLri: 2924 case ARM::t2LSLrr: 2925 return true; 2926 } 2927 } 2928 2929 /// optimizeCompareInstr - Convert the instruction supplying the argument to the 2930 /// comparison into one that sets the zero bit in the flags register; 2931 /// Remove a redundant Compare instruction if an earlier instruction can set the 2932 /// flags in the same way as Compare. 2933 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two 2934 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the 2935 /// condition code of instructions which use the flags. 2936 bool ARMBaseInstrInfo::optimizeCompareInstr( 2937 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask, 2938 int CmpValue, const MachineRegisterInfo *MRI) const { 2939 // Get the unique definition of SrcReg. 2940 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 2941 if (!MI) return false; 2942 2943 // Masked compares sometimes use the same register as the corresponding 'and'. 2944 if (CmpMask != ~0) { 2945 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) { 2946 MI = nullptr; 2947 for (MachineRegisterInfo::use_instr_iterator 2948 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end(); 2949 UI != UE; ++UI) { 2950 if (UI->getParent() != CmpInstr.getParent()) 2951 continue; 2952 MachineInstr *PotentialAND = &*UI; 2953 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || 2954 isPredicated(*PotentialAND)) 2955 continue; 2956 MI = PotentialAND; 2957 break; 2958 } 2959 if (!MI) return false; 2960 } 2961 } 2962 2963 // Get ready to iterate backward from CmpInstr. 2964 MachineBasicBlock::iterator I = CmpInstr, E = MI, 2965 B = CmpInstr.getParent()->begin(); 2966 2967 // Early exit if CmpInstr is at the beginning of the BB. 2968 if (I == B) return false; 2969 2970 // There are two possible candidates which can be changed to set CPSR: 2971 // One is MI, the other is a SUB or ADD instruction. 2972 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or 2973 // ADDr[ri](r1, r2, X). 2974 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). 2975 MachineInstr *SubAdd = nullptr; 2976 if (SrcReg2 != 0) 2977 // MI is not a candidate for CMPrr. 2978 MI = nullptr; 2979 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) { 2980 // Conservatively refuse to convert an instruction which isn't in the same 2981 // BB as the comparison. 2982 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate. 2983 // Thus we cannot return here. 2984 if (CmpInstr.getOpcode() == ARM::CMPri || 2985 CmpInstr.getOpcode() == ARM::t2CMPri || 2986 CmpInstr.getOpcode() == ARM::tCMPi8) 2987 MI = nullptr; 2988 else 2989 return false; 2990 } 2991 2992 bool IsThumb1 = false; 2993 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1)) 2994 return false; 2995 2996 // We also want to do this peephole for cases like this: if (a*b == 0), 2997 // and optimise away the CMP instruction from the generated code sequence: 2998 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values 2999 // resulting from the select instruction, but these MOVS instructions for 3000 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation. 3001 // However, if we only have MOVS instructions in between the CMP and the 3002 // other instruction (the MULS in this example), then the CPSR is dead so we 3003 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this 3004 // reordering and then continue the analysis hoping we can eliminate the 3005 // CMP. This peephole works on the vregs, so is still in SSA form. As a 3006 // consequence, the movs won't redefine/kill the MUL operands which would 3007 // make this reordering illegal. 3008 const TargetRegisterInfo *TRI = &getRegisterInfo(); 3009 if (MI && IsThumb1) { 3010 --I; 3011 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) { 3012 bool CanReorder = true; 3013 for (; I != E; --I) { 3014 if (I->getOpcode() != ARM::tMOVi8) { 3015 CanReorder = false; 3016 break; 3017 } 3018 } 3019 if (CanReorder) { 3020 MI = MI->removeFromParent(); 3021 E = CmpInstr; 3022 CmpInstr.getParent()->insert(E, MI); 3023 } 3024 } 3025 I = CmpInstr; 3026 E = MI; 3027 } 3028 3029 // Check that CPSR isn't set between the comparison instruction and the one we 3030 // want to change. At the same time, search for SubAdd. 3031 bool SubAddIsThumb1 = false; 3032 do { 3033 const MachineInstr &Instr = *--I; 3034 3035 // Check whether CmpInstr can be made redundant by the current instruction. 3036 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr, 3037 SubAddIsThumb1)) { 3038 SubAdd = &*I; 3039 break; 3040 } 3041 3042 // Allow E (which was initially MI) to be SubAdd but do not search before E. 3043 if (I == E) 3044 break; 3045 3046 if (Instr.modifiesRegister(ARM::CPSR, TRI) || 3047 Instr.readsRegister(ARM::CPSR, TRI)) 3048 // This instruction modifies or uses CPSR after the one we want to 3049 // change. We can't do this transformation. 3050 return false; 3051 3052 if (I == B) { 3053 // In some cases, we scan the use-list of an instruction for an AND; 3054 // that AND is in the same BB, but may not be scheduled before the 3055 // corresponding TST. In that case, bail out. 3056 // 3057 // FIXME: We could try to reschedule the AND. 3058 return false; 3059 } 3060 } while (true); 3061 3062 // Return false if no candidates exist. 3063 if (!MI && !SubAdd) 3064 return false; 3065 3066 // If we found a SubAdd, use it as it will be closer to the CMP 3067 if (SubAdd) { 3068 MI = SubAdd; 3069 IsThumb1 = SubAddIsThumb1; 3070 } 3071 3072 // We can't use a predicated instruction - it doesn't always write the flags. 3073 if (isPredicated(*MI)) 3074 return false; 3075 3076 // Scan forward for the use of CPSR 3077 // When checking against MI: if it's a conditional code that requires 3078 // checking of the V bit or C bit, then this is not safe to do. 3079 // It is safe to remove CmpInstr if CPSR is redefined or killed. 3080 // If we are done with the basic block, we need to check whether CPSR is 3081 // live-out. 3082 SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> 3083 OperandsToUpdate; 3084 bool isSafe = false; 3085 I = CmpInstr; 3086 E = CmpInstr.getParent()->end(); 3087 while (!isSafe && ++I != E) { 3088 const MachineInstr &Instr = *I; 3089 for (unsigned IO = 0, EO = Instr.getNumOperands(); 3090 !isSafe && IO != EO; ++IO) { 3091 const MachineOperand &MO = Instr.getOperand(IO); 3092 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { 3093 isSafe = true; 3094 break; 3095 } 3096 if (!MO.isReg() || MO.getReg() != ARM::CPSR) 3097 continue; 3098 if (MO.isDef()) { 3099 isSafe = true; 3100 break; 3101 } 3102 // Condition code is after the operand before CPSR except for VSELs. 3103 ARMCC::CondCodes CC; 3104 bool IsInstrVSel = true; 3105 switch (Instr.getOpcode()) { 3106 default: 3107 IsInstrVSel = false; 3108 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); 3109 break; 3110 case ARM::VSELEQD: 3111 case ARM::VSELEQS: 3112 case ARM::VSELEQH: 3113 CC = ARMCC::EQ; 3114 break; 3115 case ARM::VSELGTD: 3116 case ARM::VSELGTS: 3117 case ARM::VSELGTH: 3118 CC = ARMCC::GT; 3119 break; 3120 case ARM::VSELGED: 3121 case ARM::VSELGES: 3122 case ARM::VSELGEH: 3123 CC = ARMCC::GE; 3124 break; 3125 case ARM::VSELVSD: 3126 case ARM::VSELVSS: 3127 case ARM::VSELVSH: 3128 CC = ARMCC::VS; 3129 break; 3130 } 3131 3132 if (SubAdd) { 3133 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based 3134 // on CMP needs to be updated to be based on SUB. 3135 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also 3136 // needs to be modified. 3137 // Push the condition code operands to OperandsToUpdate. 3138 // If it is safe to remove CmpInstr, the condition code of these 3139 // operands will be modified. 3140 unsigned Opc = SubAdd->getOpcode(); 3141 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr || 3142 Opc == ARM::SUBri || Opc == ARM::t2SUBri || 3143 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 || 3144 Opc == ARM::tSUBi8; 3145 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2; 3146 if (!IsSub || 3147 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 && 3148 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) { 3149 // VSel doesn't support condition code update. 3150 if (IsInstrVSel) 3151 return false; 3152 // Ensure we can swap the condition. 3153 ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC)); 3154 if (NewCC == ARMCC::AL) 3155 return false; 3156 OperandsToUpdate.push_back( 3157 std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); 3158 } 3159 } else { 3160 // No SubAdd, so this is x = <op> y, z; cmp x, 0. 3161 switch (CC) { 3162 case ARMCC::EQ: // Z 3163 case ARMCC::NE: // Z 3164 case ARMCC::MI: // N 3165 case ARMCC::PL: // N 3166 case ARMCC::AL: // none 3167 // CPSR can be used multiple times, we should continue. 3168 break; 3169 case ARMCC::HS: // C 3170 case ARMCC::LO: // C 3171 case ARMCC::VS: // V 3172 case ARMCC::VC: // V 3173 case ARMCC::HI: // C Z 3174 case ARMCC::LS: // C Z 3175 case ARMCC::GE: // N V 3176 case ARMCC::LT: // N V 3177 case ARMCC::GT: // Z N V 3178 case ARMCC::LE: // Z N V 3179 // The instruction uses the V bit or C bit which is not safe. 3180 return false; 3181 } 3182 } 3183 } 3184 } 3185 3186 // If CPSR is not killed nor re-defined, we should check whether it is 3187 // live-out. If it is live-out, do not optimize. 3188 if (!isSafe) { 3189 MachineBasicBlock *MBB = CmpInstr.getParent(); 3190 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 3191 SE = MBB->succ_end(); SI != SE; ++SI) 3192 if ((*SI)->isLiveIn(ARM::CPSR)) 3193 return false; 3194 } 3195 3196 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always 3197 // set CPSR so this is represented as an explicit output) 3198 if (!IsThumb1) { 3199 MI->getOperand(5).setReg(ARM::CPSR); 3200 MI->getOperand(5).setIsDef(true); 3201 } 3202 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction"); 3203 CmpInstr.eraseFromParent(); 3204 3205 // Modify the condition code of operands in OperandsToUpdate. 3206 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to 3207 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. 3208 for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) 3209 OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); 3210 3211 MI->clearRegisterDeads(ARM::CPSR); 3212 3213 return true; 3214 } 3215 3216 bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const { 3217 // Do not sink MI if it might be used to optimize a redundant compare. 3218 // We heuristically only look at the instruction immediately following MI to 3219 // avoid potentially searching the entire basic block. 3220 if (isPredicated(MI)) 3221 return true; 3222 MachineBasicBlock::const_iterator Next = &MI; 3223 ++Next; 3224 Register SrcReg, SrcReg2; 3225 int CmpMask, CmpValue; 3226 bool IsThumb1; 3227 if (Next != MI.getParent()->end() && 3228 analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) && 3229 isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1)) 3230 return false; 3231 return true; 3232 } 3233 3234 bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, 3235 Register Reg, 3236 MachineRegisterInfo *MRI) const { 3237 // Fold large immediates into add, sub, or, xor. 3238 unsigned DefOpc = DefMI.getOpcode(); 3239 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) 3240 return false; 3241 if (!DefMI.getOperand(1).isImm()) 3242 // Could be t2MOVi32imm @xx 3243 return false; 3244 3245 if (!MRI->hasOneNonDBGUse(Reg)) 3246 return false; 3247 3248 const MCInstrDesc &DefMCID = DefMI.getDesc(); 3249 if (DefMCID.hasOptionalDef()) { 3250 unsigned NumOps = DefMCID.getNumOperands(); 3251 const MachineOperand &MO = DefMI.getOperand(NumOps - 1); 3252 if (MO.getReg() == ARM::CPSR && !MO.isDead()) 3253 // If DefMI defines CPSR and it is not dead, it's obviously not safe 3254 // to delete DefMI. 3255 return false; 3256 } 3257 3258 const MCInstrDesc &UseMCID = UseMI.getDesc(); 3259 if (UseMCID.hasOptionalDef()) { 3260 unsigned NumOps = UseMCID.getNumOperands(); 3261 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR) 3262 // If the instruction sets the flag, do not attempt this optimization 3263 // since it may change the semantics of the code. 3264 return false; 3265 } 3266 3267 unsigned UseOpc = UseMI.getOpcode(); 3268 unsigned NewUseOpc = 0; 3269 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm(); 3270 uint32_t SOImmValV1 = 0, SOImmValV2 = 0; 3271 bool Commute = false; 3272 switch (UseOpc) { 3273 default: return false; 3274 case ARM::SUBrr: 3275 case ARM::ADDrr: 3276 case ARM::ORRrr: 3277 case ARM::EORrr: 3278 case ARM::t2SUBrr: 3279 case ARM::t2ADDrr: 3280 case ARM::t2ORRrr: 3281 case ARM::t2EORrr: { 3282 Commute = UseMI.getOperand(2).getReg() != Reg; 3283 switch (UseOpc) { 3284 default: break; 3285 case ARM::ADDrr: 3286 case ARM::SUBrr: 3287 if (UseOpc == ARM::SUBrr && Commute) 3288 return false; 3289 3290 // ADD/SUB are special because they're essentially the same operation, so 3291 // we can handle a larger range of immediates. 3292 if (ARM_AM::isSOImmTwoPartVal(ImmVal)) 3293 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri; 3294 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) { 3295 ImmVal = -ImmVal; 3296 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri; 3297 } else 3298 return false; 3299 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); 3300 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); 3301 break; 3302 case ARM::ORRrr: 3303 case ARM::EORrr: 3304 if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) 3305 return false; 3306 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); 3307 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); 3308 switch (UseOpc) { 3309 default: break; 3310 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; 3311 case ARM::EORrr: NewUseOpc = ARM::EORri; break; 3312 } 3313 break; 3314 case ARM::t2ADDrr: 3315 case ARM::t2SUBrr: { 3316 if (UseOpc == ARM::t2SUBrr && Commute) 3317 return false; 3318 3319 // ADD/SUB are special because they're essentially the same operation, so 3320 // we can handle a larger range of immediates. 3321 const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP; 3322 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; 3323 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; 3324 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal)) 3325 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB; 3326 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) { 3327 ImmVal = -ImmVal; 3328 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD; 3329 } else 3330 return false; 3331 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); 3332 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); 3333 break; 3334 } 3335 case ARM::t2ORRrr: 3336 case ARM::t2EORrr: 3337 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) 3338 return false; 3339 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); 3340 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); 3341 switch (UseOpc) { 3342 default: break; 3343 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; 3344 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; 3345 } 3346 break; 3347 } 3348 } 3349 } 3350 3351 unsigned OpIdx = Commute ? 2 : 1; 3352 Register Reg1 = UseMI.getOperand(OpIdx).getReg(); 3353 bool isKill = UseMI.getOperand(OpIdx).isKill(); 3354 const TargetRegisterClass *TRC = MRI->getRegClass(Reg); 3355 Register NewReg = MRI->createVirtualRegister(TRC); 3356 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc), 3357 NewReg) 3358 .addReg(Reg1, getKillRegState(isKill)) 3359 .addImm(SOImmValV1) 3360 .add(predOps(ARMCC::AL)) 3361 .add(condCodeOp()); 3362 UseMI.setDesc(get(NewUseOpc)); 3363 UseMI.getOperand(1).setReg(NewReg); 3364 UseMI.getOperand(1).setIsKill(); 3365 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2); 3366 DefMI.eraseFromParent(); 3367 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP. 3368 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm]. 3369 // Then the below code will not be needed, as the input/output register 3370 // classes will be rgpr or gprSP. 3371 // For now, we fix the UseMI operand explicitly here: 3372 switch(NewUseOpc){ 3373 case ARM::t2ADDspImm: 3374 case ARM::t2SUBspImm: 3375 case ARM::t2ADDri: 3376 case ARM::t2SUBri: 3377 MRI->setRegClass(UseMI.getOperand(0).getReg(), TRC); 3378 } 3379 return true; 3380 } 3381 3382 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, 3383 const MachineInstr &MI) { 3384 switch (MI.getOpcode()) { 3385 default: { 3386 const MCInstrDesc &Desc = MI.getDesc(); 3387 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); 3388 assert(UOps >= 0 && "bad # UOps"); 3389 return UOps; 3390 } 3391 3392 case ARM::LDRrs: 3393 case ARM::LDRBrs: 3394 case ARM::STRrs: 3395 case ARM::STRBrs: { 3396 unsigned ShOpVal = MI.getOperand(3).getImm(); 3397 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3398 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3399 if (!isSub && 3400 (ShImm == 0 || 3401 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3402 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3403 return 1; 3404 return 2; 3405 } 3406 3407 case ARM::LDRH: 3408 case ARM::STRH: { 3409 if (!MI.getOperand(2).getReg()) 3410 return 1; 3411 3412 unsigned ShOpVal = MI.getOperand(3).getImm(); 3413 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3414 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3415 if (!isSub && 3416 (ShImm == 0 || 3417 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3418 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3419 return 1; 3420 return 2; 3421 } 3422 3423 case ARM::LDRSB: 3424 case ARM::LDRSH: 3425 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2; 3426 3427 case ARM::LDRSB_POST: 3428 case ARM::LDRSH_POST: { 3429 Register Rt = MI.getOperand(0).getReg(); 3430 Register Rm = MI.getOperand(3).getReg(); 3431 return (Rt == Rm) ? 4 : 3; 3432 } 3433 3434 case ARM::LDR_PRE_REG: 3435 case ARM::LDRB_PRE_REG: { 3436 Register Rt = MI.getOperand(0).getReg(); 3437 Register Rm = MI.getOperand(3).getReg(); 3438 if (Rt == Rm) 3439 return 3; 3440 unsigned ShOpVal = MI.getOperand(4).getImm(); 3441 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3442 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3443 if (!isSub && 3444 (ShImm == 0 || 3445 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3446 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3447 return 2; 3448 return 3; 3449 } 3450 3451 case ARM::STR_PRE_REG: 3452 case ARM::STRB_PRE_REG: { 3453 unsigned ShOpVal = MI.getOperand(4).getImm(); 3454 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3455 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3456 if (!isSub && 3457 (ShImm == 0 || 3458 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3459 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3460 return 2; 3461 return 3; 3462 } 3463 3464 case ARM::LDRH_PRE: 3465 case ARM::STRH_PRE: { 3466 Register Rt = MI.getOperand(0).getReg(); 3467 Register Rm = MI.getOperand(3).getReg(); 3468 if (!Rm) 3469 return 2; 3470 if (Rt == Rm) 3471 return 3; 3472 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2; 3473 } 3474 3475 case ARM::LDR_POST_REG: 3476 case ARM::LDRB_POST_REG: 3477 case ARM::LDRH_POST: { 3478 Register Rt = MI.getOperand(0).getReg(); 3479 Register Rm = MI.getOperand(3).getReg(); 3480 return (Rt == Rm) ? 3 : 2; 3481 } 3482 3483 case ARM::LDR_PRE_IMM: 3484 case ARM::LDRB_PRE_IMM: 3485 case ARM::LDR_POST_IMM: 3486 case ARM::LDRB_POST_IMM: 3487 case ARM::STRB_POST_IMM: 3488 case ARM::STRB_POST_REG: 3489 case ARM::STRB_PRE_IMM: 3490 case ARM::STRH_POST: 3491 case ARM::STR_POST_IMM: 3492 case ARM::STR_POST_REG: 3493 case ARM::STR_PRE_IMM: 3494 return 2; 3495 3496 case ARM::LDRSB_PRE: 3497 case ARM::LDRSH_PRE: { 3498 Register Rm = MI.getOperand(3).getReg(); 3499 if (Rm == 0) 3500 return 3; 3501 Register Rt = MI.getOperand(0).getReg(); 3502 if (Rt == Rm) 3503 return 4; 3504 unsigned ShOpVal = MI.getOperand(4).getImm(); 3505 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3506 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3507 if (!isSub && 3508 (ShImm == 0 || 3509 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3510 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3511 return 3; 3512 return 4; 3513 } 3514 3515 case ARM::LDRD: { 3516 Register Rt = MI.getOperand(0).getReg(); 3517 Register Rn = MI.getOperand(2).getReg(); 3518 Register Rm = MI.getOperand(3).getReg(); 3519 if (Rm) 3520 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 3521 : 3; 3522 return (Rt == Rn) ? 3 : 2; 3523 } 3524 3525 case ARM::STRD: { 3526 Register Rm = MI.getOperand(3).getReg(); 3527 if (Rm) 3528 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 3529 : 3; 3530 return 2; 3531 } 3532 3533 case ARM::LDRD_POST: 3534 case ARM::t2LDRD_POST: 3535 return 3; 3536 3537 case ARM::STRD_POST: 3538 case ARM::t2STRD_POST: 3539 return 4; 3540 3541 case ARM::LDRD_PRE: { 3542 Register Rt = MI.getOperand(0).getReg(); 3543 Register Rn = MI.getOperand(3).getReg(); 3544 Register Rm = MI.getOperand(4).getReg(); 3545 if (Rm) 3546 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 3547 : 4; 3548 return (Rt == Rn) ? 4 : 3; 3549 } 3550 3551 case ARM::t2LDRD_PRE: { 3552 Register Rt = MI.getOperand(0).getReg(); 3553 Register Rn = MI.getOperand(3).getReg(); 3554 return (Rt == Rn) ? 4 : 3; 3555 } 3556 3557 case ARM::STRD_PRE: { 3558 Register Rm = MI.getOperand(4).getReg(); 3559 if (Rm) 3560 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 3561 : 4; 3562 return 3; 3563 } 3564 3565 case ARM::t2STRD_PRE: 3566 return 3; 3567 3568 case ARM::t2LDR_POST: 3569 case ARM::t2LDRB_POST: 3570 case ARM::t2LDRB_PRE: 3571 case ARM::t2LDRSBi12: 3572 case ARM::t2LDRSBi8: 3573 case ARM::t2LDRSBpci: 3574 case ARM::t2LDRSBs: 3575 case ARM::t2LDRH_POST: 3576 case ARM::t2LDRH_PRE: 3577 case ARM::t2LDRSBT: 3578 case ARM::t2LDRSB_POST: 3579 case ARM::t2LDRSB_PRE: 3580 case ARM::t2LDRSH_POST: 3581 case ARM::t2LDRSH_PRE: 3582 case ARM::t2LDRSHi12: 3583 case ARM::t2LDRSHi8: 3584 case ARM::t2LDRSHpci: 3585 case ARM::t2LDRSHs: 3586 return 2; 3587 3588 case ARM::t2LDRDi8: { 3589 Register Rt = MI.getOperand(0).getReg(); 3590 Register Rn = MI.getOperand(2).getReg(); 3591 return (Rt == Rn) ? 3 : 2; 3592 } 3593 3594 case ARM::t2STRB_POST: 3595 case ARM::t2STRB_PRE: 3596 case ARM::t2STRBs: 3597 case ARM::t2STRDi8: 3598 case ARM::t2STRH_POST: 3599 case ARM::t2STRH_PRE: 3600 case ARM::t2STRHs: 3601 case ARM::t2STR_POST: 3602 case ARM::t2STR_PRE: 3603 case ARM::t2STRs: 3604 return 2; 3605 } 3606 } 3607 3608 // Return the number of 32-bit words loaded by LDM or stored by STM. If this 3609 // can't be easily determined return 0 (missing MachineMemOperand). 3610 // 3611 // FIXME: The current MachineInstr design does not support relying on machine 3612 // mem operands to determine the width of a memory access. Instead, we expect 3613 // the target to provide this information based on the instruction opcode and 3614 // operands. However, using MachineMemOperand is the best solution now for 3615 // two reasons: 3616 // 3617 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI 3618 // operands. This is much more dangerous than using the MachineMemOperand 3619 // sizes because CodeGen passes can insert/remove optional machine operands. In 3620 // fact, it's totally incorrect for preRA passes and appears to be wrong for 3621 // postRA passes as well. 3622 // 3623 // 2) getNumLDMAddresses is only used by the scheduling machine model and any 3624 // machine model that calls this should handle the unknown (zero size) case. 3625 // 3626 // Long term, we should require a target hook that verifies MachineMemOperand 3627 // sizes during MC lowering. That target hook should be local to MC lowering 3628 // because we can't ensure that it is aware of other MI forms. Doing this will 3629 // ensure that MachineMemOperands are correctly propagated through all passes. 3630 unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const { 3631 unsigned Size = 0; 3632 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), 3633 E = MI.memoperands_end(); 3634 I != E; ++I) { 3635 Size += (*I)->getSize(); 3636 } 3637 // FIXME: The scheduler currently can't handle values larger than 16. But 3638 // the values can actually go up to 32 for floating-point load/store 3639 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory 3640 // operations isn't right; we could end up with "extra" memory operands for 3641 // various reasons, like tail merge merging two memory operations. 3642 return std::min(Size / 4, 16U); 3643 } 3644 3645 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, 3646 unsigned NumRegs) { 3647 unsigned UOps = 1 + NumRegs; // 1 for address computation. 3648 switch (Opc) { 3649 default: 3650 break; 3651 case ARM::VLDMDIA_UPD: 3652 case ARM::VLDMDDB_UPD: 3653 case ARM::VLDMSIA_UPD: 3654 case ARM::VLDMSDB_UPD: 3655 case ARM::VSTMDIA_UPD: 3656 case ARM::VSTMDDB_UPD: 3657 case ARM::VSTMSIA_UPD: 3658 case ARM::VSTMSDB_UPD: 3659 case ARM::LDMIA_UPD: 3660 case ARM::LDMDA_UPD: 3661 case ARM::LDMDB_UPD: 3662 case ARM::LDMIB_UPD: 3663 case ARM::STMIA_UPD: 3664 case ARM::STMDA_UPD: 3665 case ARM::STMDB_UPD: 3666 case ARM::STMIB_UPD: 3667 case ARM::tLDMIA_UPD: 3668 case ARM::tSTMIA_UPD: 3669 case ARM::t2LDMIA_UPD: 3670 case ARM::t2LDMDB_UPD: 3671 case ARM::t2STMIA_UPD: 3672 case ARM::t2STMDB_UPD: 3673 ++UOps; // One for base register writeback. 3674 break; 3675 case ARM::LDMIA_RET: 3676 case ARM::tPOP_RET: 3677 case ARM::t2LDMIA_RET: 3678 UOps += 2; // One for base reg wb, one for write to pc. 3679 break; 3680 } 3681 return UOps; 3682 } 3683 3684 unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, 3685 const MachineInstr &MI) const { 3686 if (!ItinData || ItinData->isEmpty()) 3687 return 1; 3688 3689 const MCInstrDesc &Desc = MI.getDesc(); 3690 unsigned Class = Desc.getSchedClass(); 3691 int ItinUOps = ItinData->getNumMicroOps(Class); 3692 if (ItinUOps >= 0) { 3693 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) 3694 return getNumMicroOpsSwiftLdSt(ItinData, MI); 3695 3696 return ItinUOps; 3697 } 3698 3699 unsigned Opc = MI.getOpcode(); 3700 switch (Opc) { 3701 default: 3702 llvm_unreachable("Unexpected multi-uops instruction!"); 3703 case ARM::VLDMQIA: 3704 case ARM::VSTMQIA: 3705 return 2; 3706 3707 // The number of uOps for load / store multiple are determined by the number 3708 // registers. 3709 // 3710 // On Cortex-A8, each pair of register loads / stores can be scheduled on the 3711 // same cycle. The scheduling for the first load / store must be done 3712 // separately by assuming the address is not 64-bit aligned. 3713 // 3714 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address 3715 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON 3716 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. 3717 case ARM::VLDMDIA: 3718 case ARM::VLDMDIA_UPD: 3719 case ARM::VLDMDDB_UPD: 3720 case ARM::VLDMSIA: 3721 case ARM::VLDMSIA_UPD: 3722 case ARM::VLDMSDB_UPD: 3723 case ARM::VSTMDIA: 3724 case ARM::VSTMDIA_UPD: 3725 case ARM::VSTMDDB_UPD: 3726 case ARM::VSTMSIA: 3727 case ARM::VSTMSIA_UPD: 3728 case ARM::VSTMSDB_UPD: { 3729 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands(); 3730 return (NumRegs / 2) + (NumRegs % 2) + 1; 3731 } 3732 3733 case ARM::LDMIA_RET: 3734 case ARM::LDMIA: 3735 case ARM::LDMDA: 3736 case ARM::LDMDB: 3737 case ARM::LDMIB: 3738 case ARM::LDMIA_UPD: 3739 case ARM::LDMDA_UPD: 3740 case ARM::LDMDB_UPD: 3741 case ARM::LDMIB_UPD: 3742 case ARM::STMIA: 3743 case ARM::STMDA: 3744 case ARM::STMDB: 3745 case ARM::STMIB: 3746 case ARM::STMIA_UPD: 3747 case ARM::STMDA_UPD: 3748 case ARM::STMDB_UPD: 3749 case ARM::STMIB_UPD: 3750 case ARM::tLDMIA: 3751 case ARM::tLDMIA_UPD: 3752 case ARM::tSTMIA_UPD: 3753 case ARM::tPOP_RET: 3754 case ARM::tPOP: 3755 case ARM::tPUSH: 3756 case ARM::t2LDMIA_RET: 3757 case ARM::t2LDMIA: 3758 case ARM::t2LDMDB: 3759 case ARM::t2LDMIA_UPD: 3760 case ARM::t2LDMDB_UPD: 3761 case ARM::t2STMIA: 3762 case ARM::t2STMDB: 3763 case ARM::t2STMIA_UPD: 3764 case ARM::t2STMDB_UPD: { 3765 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1; 3766 switch (Subtarget.getLdStMultipleTiming()) { 3767 case ARMSubtarget::SingleIssuePlusExtras: 3768 return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs); 3769 case ARMSubtarget::SingleIssue: 3770 // Assume the worst. 3771 return NumRegs; 3772 case ARMSubtarget::DoubleIssue: { 3773 if (NumRegs < 4) 3774 return 2; 3775 // 4 registers would be issued: 2, 2. 3776 // 5 registers would be issued: 2, 2, 1. 3777 unsigned UOps = (NumRegs / 2); 3778 if (NumRegs % 2) 3779 ++UOps; 3780 return UOps; 3781 } 3782 case ARMSubtarget::DoubleIssueCheckUnalignedAccess: { 3783 unsigned UOps = (NumRegs / 2); 3784 // If there are odd number of registers or if it's not 64-bit aligned, 3785 // then it takes an extra AGU (Address Generation Unit) cycle. 3786 if ((NumRegs % 2) || !MI.hasOneMemOperand() || 3787 (*MI.memoperands_begin())->getAlign() < Align(8)) 3788 ++UOps; 3789 return UOps; 3790 } 3791 } 3792 } 3793 } 3794 llvm_unreachable("Didn't find the number of microops"); 3795 } 3796 3797 int 3798 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, 3799 const MCInstrDesc &DefMCID, 3800 unsigned DefClass, 3801 unsigned DefIdx, unsigned DefAlign) const { 3802 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 3803 if (RegNo <= 0) 3804 // Def is the address writeback. 3805 return ItinData->getOperandCycle(DefClass, DefIdx); 3806 3807 int DefCycle; 3808 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 3809 // (regno / 2) + (regno % 2) + 1 3810 DefCycle = RegNo / 2 + 1; 3811 if (RegNo % 2) 3812 ++DefCycle; 3813 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3814 DefCycle = RegNo; 3815 bool isSLoad = false; 3816 3817 switch (DefMCID.getOpcode()) { 3818 default: break; 3819 case ARM::VLDMSIA: 3820 case ARM::VLDMSIA_UPD: 3821 case ARM::VLDMSDB_UPD: 3822 isSLoad = true; 3823 break; 3824 } 3825 3826 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 3827 // then it takes an extra cycle. 3828 if ((isSLoad && (RegNo % 2)) || DefAlign < 8) 3829 ++DefCycle; 3830 } else { 3831 // Assume the worst. 3832 DefCycle = RegNo + 2; 3833 } 3834 3835 return DefCycle; 3836 } 3837 3838 bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const { 3839 Register BaseReg = MI.getOperand(0).getReg(); 3840 for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) { 3841 const auto &Op = MI.getOperand(i); 3842 if (Op.isReg() && Op.getReg() == BaseReg) 3843 return true; 3844 } 3845 return false; 3846 } 3847 unsigned 3848 ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const { 3849 // ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops 3850 // (outs GPR:$wb), (ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops) 3851 return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands(); 3852 } 3853 3854 int 3855 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, 3856 const MCInstrDesc &DefMCID, 3857 unsigned DefClass, 3858 unsigned DefIdx, unsigned DefAlign) const { 3859 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 3860 if (RegNo <= 0) 3861 // Def is the address writeback. 3862 return ItinData->getOperandCycle(DefClass, DefIdx); 3863 3864 int DefCycle; 3865 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 3866 // 4 registers would be issued: 1, 2, 1. 3867 // 5 registers would be issued: 1, 2, 2. 3868 DefCycle = RegNo / 2; 3869 if (DefCycle < 1) 3870 DefCycle = 1; 3871 // Result latency is issue cycle + 2: E2. 3872 DefCycle += 2; 3873 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3874 DefCycle = (RegNo / 2); 3875 // If there are odd number of registers or if it's not 64-bit aligned, 3876 // then it takes an extra AGU (Address Generation Unit) cycle. 3877 if ((RegNo % 2) || DefAlign < 8) 3878 ++DefCycle; 3879 // Result latency is AGU cycles + 2. 3880 DefCycle += 2; 3881 } else { 3882 // Assume the worst. 3883 DefCycle = RegNo + 2; 3884 } 3885 3886 return DefCycle; 3887 } 3888 3889 int 3890 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, 3891 const MCInstrDesc &UseMCID, 3892 unsigned UseClass, 3893 unsigned UseIdx, unsigned UseAlign) const { 3894 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 3895 if (RegNo <= 0) 3896 return ItinData->getOperandCycle(UseClass, UseIdx); 3897 3898 int UseCycle; 3899 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 3900 // (regno / 2) + (regno % 2) + 1 3901 UseCycle = RegNo / 2 + 1; 3902 if (RegNo % 2) 3903 ++UseCycle; 3904 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3905 UseCycle = RegNo; 3906 bool isSStore = false; 3907 3908 switch (UseMCID.getOpcode()) { 3909 default: break; 3910 case ARM::VSTMSIA: 3911 case ARM::VSTMSIA_UPD: 3912 case ARM::VSTMSDB_UPD: 3913 isSStore = true; 3914 break; 3915 } 3916 3917 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 3918 // then it takes an extra cycle. 3919 if ((isSStore && (RegNo % 2)) || UseAlign < 8) 3920 ++UseCycle; 3921 } else { 3922 // Assume the worst. 3923 UseCycle = RegNo + 2; 3924 } 3925 3926 return UseCycle; 3927 } 3928 3929 int 3930 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, 3931 const MCInstrDesc &UseMCID, 3932 unsigned UseClass, 3933 unsigned UseIdx, unsigned UseAlign) const { 3934 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 3935 if (RegNo <= 0) 3936 return ItinData->getOperandCycle(UseClass, UseIdx); 3937 3938 int UseCycle; 3939 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 3940 UseCycle = RegNo / 2; 3941 if (UseCycle < 2) 3942 UseCycle = 2; 3943 // Read in E3. 3944 UseCycle += 2; 3945 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3946 UseCycle = (RegNo / 2); 3947 // If there are odd number of registers or if it's not 64-bit aligned, 3948 // then it takes an extra AGU (Address Generation Unit) cycle. 3949 if ((RegNo % 2) || UseAlign < 8) 3950 ++UseCycle; 3951 } else { 3952 // Assume the worst. 3953 UseCycle = 1; 3954 } 3955 return UseCycle; 3956 } 3957 3958 int 3959 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 3960 const MCInstrDesc &DefMCID, 3961 unsigned DefIdx, unsigned DefAlign, 3962 const MCInstrDesc &UseMCID, 3963 unsigned UseIdx, unsigned UseAlign) const { 3964 unsigned DefClass = DefMCID.getSchedClass(); 3965 unsigned UseClass = UseMCID.getSchedClass(); 3966 3967 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) 3968 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); 3969 3970 // This may be a def / use of a variable_ops instruction, the operand 3971 // latency might be determinable dynamically. Let the target try to 3972 // figure it out. 3973 int DefCycle = -1; 3974 bool LdmBypass = false; 3975 switch (DefMCID.getOpcode()) { 3976 default: 3977 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 3978 break; 3979 3980 case ARM::VLDMDIA: 3981 case ARM::VLDMDIA_UPD: 3982 case ARM::VLDMDDB_UPD: 3983 case ARM::VLDMSIA: 3984 case ARM::VLDMSIA_UPD: 3985 case ARM::VLDMSDB_UPD: 3986 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 3987 break; 3988 3989 case ARM::LDMIA_RET: 3990 case ARM::LDMIA: 3991 case ARM::LDMDA: 3992 case ARM::LDMDB: 3993 case ARM::LDMIB: 3994 case ARM::LDMIA_UPD: 3995 case ARM::LDMDA_UPD: 3996 case ARM::LDMDB_UPD: 3997 case ARM::LDMIB_UPD: 3998 case ARM::tLDMIA: 3999 case ARM::tLDMIA_UPD: 4000 case ARM::tPUSH: 4001 case ARM::t2LDMIA_RET: 4002 case ARM::t2LDMIA: 4003 case ARM::t2LDMDB: 4004 case ARM::t2LDMIA_UPD: 4005 case ARM::t2LDMDB_UPD: 4006 LdmBypass = true; 4007 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 4008 break; 4009 } 4010 4011 if (DefCycle == -1) 4012 // We can't seem to determine the result latency of the def, assume it's 2. 4013 DefCycle = 2; 4014 4015 int UseCycle = -1; 4016 switch (UseMCID.getOpcode()) { 4017 default: 4018 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); 4019 break; 4020 4021 case ARM::VSTMDIA: 4022 case ARM::VSTMDIA_UPD: 4023 case ARM::VSTMDDB_UPD: 4024 case ARM::VSTMSIA: 4025 case ARM::VSTMSIA_UPD: 4026 case ARM::VSTMSDB_UPD: 4027 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 4028 break; 4029 4030 case ARM::STMIA: 4031 case ARM::STMDA: 4032 case ARM::STMDB: 4033 case ARM::STMIB: 4034 case ARM::STMIA_UPD: 4035 case ARM::STMDA_UPD: 4036 case ARM::STMDB_UPD: 4037 case ARM::STMIB_UPD: 4038 case ARM::tSTMIA_UPD: 4039 case ARM::tPOP_RET: 4040 case ARM::tPOP: 4041 case ARM::t2STMIA: 4042 case ARM::t2STMDB: 4043 case ARM::t2STMIA_UPD: 4044 case ARM::t2STMDB_UPD: 4045 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 4046 break; 4047 } 4048 4049 if (UseCycle == -1) 4050 // Assume it's read in the first stage. 4051 UseCycle = 1; 4052 4053 UseCycle = DefCycle - UseCycle + 1; 4054 if (UseCycle > 0) { 4055 if (LdmBypass) { 4056 // It's a variable_ops instruction so we can't use DefIdx here. Just use 4057 // first def operand. 4058 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, 4059 UseClass, UseIdx)) 4060 --UseCycle; 4061 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, 4062 UseClass, UseIdx)) { 4063 --UseCycle; 4064 } 4065 } 4066 4067 return UseCycle; 4068 } 4069 4070 static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, 4071 const MachineInstr *MI, unsigned Reg, 4072 unsigned &DefIdx, unsigned &Dist) { 4073 Dist = 0; 4074 4075 MachineBasicBlock::const_iterator I = MI; ++I; 4076 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator()); 4077 assert(II->isInsideBundle() && "Empty bundle?"); 4078 4079 int Idx = -1; 4080 while (II->isInsideBundle()) { 4081 Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); 4082 if (Idx != -1) 4083 break; 4084 --II; 4085 ++Dist; 4086 } 4087 4088 assert(Idx != -1 && "Cannot find bundled definition!"); 4089 DefIdx = Idx; 4090 return &*II; 4091 } 4092 4093 static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, 4094 const MachineInstr &MI, unsigned Reg, 4095 unsigned &UseIdx, unsigned &Dist) { 4096 Dist = 0; 4097 4098 MachineBasicBlock::const_instr_iterator II = ++MI.getIterator(); 4099 assert(II->isInsideBundle() && "Empty bundle?"); 4100 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 4101 4102 // FIXME: This doesn't properly handle multiple uses. 4103 int Idx = -1; 4104 while (II != E && II->isInsideBundle()) { 4105 Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); 4106 if (Idx != -1) 4107 break; 4108 if (II->getOpcode() != ARM::t2IT) 4109 ++Dist; 4110 ++II; 4111 } 4112 4113 if (Idx == -1) { 4114 Dist = 0; 4115 return nullptr; 4116 } 4117 4118 UseIdx = Idx; 4119 return &*II; 4120 } 4121 4122 /// Return the number of cycles to add to (or subtract from) the static 4123 /// itinerary based on the def opcode and alignment. The caller will ensure that 4124 /// adjusted latency is at least one cycle. 4125 static int adjustDefLatency(const ARMSubtarget &Subtarget, 4126 const MachineInstr &DefMI, 4127 const MCInstrDesc &DefMCID, unsigned DefAlign) { 4128 int Adjust = 0; 4129 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) { 4130 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 4131 // variants are one cycle cheaper. 4132 switch (DefMCID.getOpcode()) { 4133 default: break; 4134 case ARM::LDRrs: 4135 case ARM::LDRBrs: { 4136 unsigned ShOpVal = DefMI.getOperand(3).getImm(); 4137 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 4138 if (ShImm == 0 || 4139 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 4140 --Adjust; 4141 break; 4142 } 4143 case ARM::t2LDRs: 4144 case ARM::t2LDRBs: 4145 case ARM::t2LDRHs: 4146 case ARM::t2LDRSHs: { 4147 // Thumb2 mode: lsl only. 4148 unsigned ShAmt = DefMI.getOperand(3).getImm(); 4149 if (ShAmt == 0 || ShAmt == 2) 4150 --Adjust; 4151 break; 4152 } 4153 } 4154 } else if (Subtarget.isSwift()) { 4155 // FIXME: Properly handle all of the latency adjustments for address 4156 // writeback. 4157 switch (DefMCID.getOpcode()) { 4158 default: break; 4159 case ARM::LDRrs: 4160 case ARM::LDRBrs: { 4161 unsigned ShOpVal = DefMI.getOperand(3).getImm(); 4162 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 4163 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 4164 if (!isSub && 4165 (ShImm == 0 || 4166 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 4167 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 4168 Adjust -= 2; 4169 else if (!isSub && 4170 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 4171 --Adjust; 4172 break; 4173 } 4174 case ARM::t2LDRs: 4175 case ARM::t2LDRBs: 4176 case ARM::t2LDRHs: 4177 case ARM::t2LDRSHs: { 4178 // Thumb2 mode: lsl only. 4179 unsigned ShAmt = DefMI.getOperand(3).getImm(); 4180 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) 4181 Adjust -= 2; 4182 break; 4183 } 4184 } 4185 } 4186 4187 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) { 4188 switch (DefMCID.getOpcode()) { 4189 default: break; 4190 case ARM::VLD1q8: 4191 case ARM::VLD1q16: 4192 case ARM::VLD1q32: 4193 case ARM::VLD1q64: 4194 case ARM::VLD1q8wb_fixed: 4195 case ARM::VLD1q16wb_fixed: 4196 case ARM::VLD1q32wb_fixed: 4197 case ARM::VLD1q64wb_fixed: 4198 case ARM::VLD1q8wb_register: 4199 case ARM::VLD1q16wb_register: 4200 case ARM::VLD1q32wb_register: 4201 case ARM::VLD1q64wb_register: 4202 case ARM::VLD2d8: 4203 case ARM::VLD2d16: 4204 case ARM::VLD2d32: 4205 case ARM::VLD2q8: 4206 case ARM::VLD2q16: 4207 case ARM::VLD2q32: 4208 case ARM::VLD2d8wb_fixed: 4209 case ARM::VLD2d16wb_fixed: 4210 case ARM::VLD2d32wb_fixed: 4211 case ARM::VLD2q8wb_fixed: 4212 case ARM::VLD2q16wb_fixed: 4213 case ARM::VLD2q32wb_fixed: 4214 case ARM::VLD2d8wb_register: 4215 case ARM::VLD2d16wb_register: 4216 case ARM::VLD2d32wb_register: 4217 case ARM::VLD2q8wb_register: 4218 case ARM::VLD2q16wb_register: 4219 case ARM::VLD2q32wb_register: 4220 case ARM::VLD3d8: 4221 case ARM::VLD3d16: 4222 case ARM::VLD3d32: 4223 case ARM::VLD1d64T: 4224 case ARM::VLD3d8_UPD: 4225 case ARM::VLD3d16_UPD: 4226 case ARM::VLD3d32_UPD: 4227 case ARM::VLD1d64Twb_fixed: 4228 case ARM::VLD1d64Twb_register: 4229 case ARM::VLD3q8_UPD: 4230 case ARM::VLD3q16_UPD: 4231 case ARM::VLD3q32_UPD: 4232 case ARM::VLD4d8: 4233 case ARM::VLD4d16: 4234 case ARM::VLD4d32: 4235 case ARM::VLD1d64Q: 4236 case ARM::VLD4d8_UPD: 4237 case ARM::VLD4d16_UPD: 4238 case ARM::VLD4d32_UPD: 4239 case ARM::VLD1d64Qwb_fixed: 4240 case ARM::VLD1d64Qwb_register: 4241 case ARM::VLD4q8_UPD: 4242 case ARM::VLD4q16_UPD: 4243 case ARM::VLD4q32_UPD: 4244 case ARM::VLD1DUPq8: 4245 case ARM::VLD1DUPq16: 4246 case ARM::VLD1DUPq32: 4247 case ARM::VLD1DUPq8wb_fixed: 4248 case ARM::VLD1DUPq16wb_fixed: 4249 case ARM::VLD1DUPq32wb_fixed: 4250 case ARM::VLD1DUPq8wb_register: 4251 case ARM::VLD1DUPq16wb_register: 4252 case ARM::VLD1DUPq32wb_register: 4253 case ARM::VLD2DUPd8: 4254 case ARM::VLD2DUPd16: 4255 case ARM::VLD2DUPd32: 4256 case ARM::VLD2DUPd8wb_fixed: 4257 case ARM::VLD2DUPd16wb_fixed: 4258 case ARM::VLD2DUPd32wb_fixed: 4259 case ARM::VLD2DUPd8wb_register: 4260 case ARM::VLD2DUPd16wb_register: 4261 case ARM::VLD2DUPd32wb_register: 4262 case ARM::VLD4DUPd8: 4263 case ARM::VLD4DUPd16: 4264 case ARM::VLD4DUPd32: 4265 case ARM::VLD4DUPd8_UPD: 4266 case ARM::VLD4DUPd16_UPD: 4267 case ARM::VLD4DUPd32_UPD: 4268 case ARM::VLD1LNd8: 4269 case ARM::VLD1LNd16: 4270 case ARM::VLD1LNd32: 4271 case ARM::VLD1LNd8_UPD: 4272 case ARM::VLD1LNd16_UPD: 4273 case ARM::VLD1LNd32_UPD: 4274 case ARM::VLD2LNd8: 4275 case ARM::VLD2LNd16: 4276 case ARM::VLD2LNd32: 4277 case ARM::VLD2LNq16: 4278 case ARM::VLD2LNq32: 4279 case ARM::VLD2LNd8_UPD: 4280 case ARM::VLD2LNd16_UPD: 4281 case ARM::VLD2LNd32_UPD: 4282 case ARM::VLD2LNq16_UPD: 4283 case ARM::VLD2LNq32_UPD: 4284 case ARM::VLD4LNd8: 4285 case ARM::VLD4LNd16: 4286 case ARM::VLD4LNd32: 4287 case ARM::VLD4LNq16: 4288 case ARM::VLD4LNq32: 4289 case ARM::VLD4LNd8_UPD: 4290 case ARM::VLD4LNd16_UPD: 4291 case ARM::VLD4LNd32_UPD: 4292 case ARM::VLD4LNq16_UPD: 4293 case ARM::VLD4LNq32_UPD: 4294 // If the address is not 64-bit aligned, the latencies of these 4295 // instructions increases by one. 4296 ++Adjust; 4297 break; 4298 } 4299 } 4300 return Adjust; 4301 } 4302 4303 int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 4304 const MachineInstr &DefMI, 4305 unsigned DefIdx, 4306 const MachineInstr &UseMI, 4307 unsigned UseIdx) const { 4308 // No operand latency. The caller may fall back to getInstrLatency. 4309 if (!ItinData || ItinData->isEmpty()) 4310 return -1; 4311 4312 const MachineOperand &DefMO = DefMI.getOperand(DefIdx); 4313 Register Reg = DefMO.getReg(); 4314 4315 const MachineInstr *ResolvedDefMI = &DefMI; 4316 unsigned DefAdj = 0; 4317 if (DefMI.isBundle()) 4318 ResolvedDefMI = 4319 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj); 4320 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() || 4321 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) { 4322 return 1; 4323 } 4324 4325 const MachineInstr *ResolvedUseMI = &UseMI; 4326 unsigned UseAdj = 0; 4327 if (UseMI.isBundle()) { 4328 ResolvedUseMI = 4329 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj); 4330 if (!ResolvedUseMI) 4331 return -1; 4332 } 4333 4334 return getOperandLatencyImpl( 4335 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO, 4336 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj); 4337 } 4338 4339 int ARMBaseInstrInfo::getOperandLatencyImpl( 4340 const InstrItineraryData *ItinData, const MachineInstr &DefMI, 4341 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj, 4342 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI, 4343 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const { 4344 if (Reg == ARM::CPSR) { 4345 if (DefMI.getOpcode() == ARM::FMSTAT) { 4346 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) 4347 return Subtarget.isLikeA9() ? 1 : 20; 4348 } 4349 4350 // CPSR set and branch can be paired in the same cycle. 4351 if (UseMI.isBranch()) 4352 return 0; 4353 4354 // Otherwise it takes the instruction latency (generally one). 4355 unsigned Latency = getInstrLatency(ItinData, DefMI); 4356 4357 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to 4358 // its uses. Instructions which are otherwise scheduled between them may 4359 // incur a code size penalty (not able to use the CPSR setting 16-bit 4360 // instructions). 4361 if (Latency > 0 && Subtarget.isThumb2()) { 4362 const MachineFunction *MF = DefMI.getParent()->getParent(); 4363 // FIXME: Use Function::hasOptSize(). 4364 if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize)) 4365 --Latency; 4366 } 4367 return Latency; 4368 } 4369 4370 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit()) 4371 return -1; 4372 4373 unsigned DefAlign = DefMI.hasOneMemOperand() 4374 ? (*DefMI.memoperands_begin())->getAlign().value() 4375 : 0; 4376 unsigned UseAlign = UseMI.hasOneMemOperand() 4377 ? (*UseMI.memoperands_begin())->getAlign().value() 4378 : 0; 4379 4380 // Get the itinerary's latency if possible, and handle variable_ops. 4381 int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID, 4382 UseIdx, UseAlign); 4383 // Unable to find operand latency. The caller may resort to getInstrLatency. 4384 if (Latency < 0) 4385 return Latency; 4386 4387 // Adjust for IT block position. 4388 int Adj = DefAdj + UseAdj; 4389 4390 // Adjust for dynamic def-side opcode variants not captured by the itinerary. 4391 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); 4392 if (Adj >= 0 || (int)Latency > -Adj) { 4393 return Latency + Adj; 4394 } 4395 // Return the itinerary latency, which may be zero but not less than zero. 4396 return Latency; 4397 } 4398 4399 int 4400 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 4401 SDNode *DefNode, unsigned DefIdx, 4402 SDNode *UseNode, unsigned UseIdx) const { 4403 if (!DefNode->isMachineOpcode()) 4404 return 1; 4405 4406 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); 4407 4408 if (isZeroCost(DefMCID.Opcode)) 4409 return 0; 4410 4411 if (!ItinData || ItinData->isEmpty()) 4412 return DefMCID.mayLoad() ? 3 : 1; 4413 4414 if (!UseNode->isMachineOpcode()) { 4415 int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); 4416 int Adj = Subtarget.getPreISelOperandLatencyAdjustment(); 4417 int Threshold = 1 + Adj; 4418 return Latency <= Threshold ? 1 : Latency - Adj; 4419 } 4420 4421 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); 4422 auto *DefMN = cast<MachineSDNode>(DefNode); 4423 unsigned DefAlign = !DefMN->memoperands_empty() 4424 ? (*DefMN->memoperands_begin())->getAlign().value() 4425 : 0; 4426 auto *UseMN = cast<MachineSDNode>(UseNode); 4427 unsigned UseAlign = !UseMN->memoperands_empty() 4428 ? (*UseMN->memoperands_begin())->getAlign().value() 4429 : 0; 4430 int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, 4431 UseMCID, UseIdx, UseAlign); 4432 4433 if (Latency > 1 && 4434 (Subtarget.isCortexA8() || Subtarget.isLikeA9() || 4435 Subtarget.isCortexA7())) { 4436 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 4437 // variants are one cycle cheaper. 4438 switch (DefMCID.getOpcode()) { 4439 default: break; 4440 case ARM::LDRrs: 4441 case ARM::LDRBrs: { 4442 unsigned ShOpVal = 4443 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 4444 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 4445 if (ShImm == 0 || 4446 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 4447 --Latency; 4448 break; 4449 } 4450 case ARM::t2LDRs: 4451 case ARM::t2LDRBs: 4452 case ARM::t2LDRHs: 4453 case ARM::t2LDRSHs: { 4454 // Thumb2 mode: lsl only. 4455 unsigned ShAmt = 4456 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 4457 if (ShAmt == 0 || ShAmt == 2) 4458 --Latency; 4459 break; 4460 } 4461 } 4462 } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) { 4463 // FIXME: Properly handle all of the latency adjustments for address 4464 // writeback. 4465 switch (DefMCID.getOpcode()) { 4466 default: break; 4467 case ARM::LDRrs: 4468 case ARM::LDRBrs: { 4469 unsigned ShOpVal = 4470 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 4471 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 4472 if (ShImm == 0 || 4473 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 4474 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 4475 Latency -= 2; 4476 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 4477 --Latency; 4478 break; 4479 } 4480 case ARM::t2LDRs: 4481 case ARM::t2LDRBs: 4482 case ARM::t2LDRHs: 4483 case ARM::t2LDRSHs: 4484 // Thumb2 mode: lsl 0-3 only. 4485 Latency -= 2; 4486 break; 4487 } 4488 } 4489 4490 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) 4491 switch (DefMCID.getOpcode()) { 4492 default: break; 4493 case ARM::VLD1q8: 4494 case ARM::VLD1q16: 4495 case ARM::VLD1q32: 4496 case ARM::VLD1q64: 4497 case ARM::VLD1q8wb_register: 4498 case ARM::VLD1q16wb_register: 4499 case ARM::VLD1q32wb_register: 4500 case ARM::VLD1q64wb_register: 4501 case ARM::VLD1q8wb_fixed: 4502 case ARM::VLD1q16wb_fixed: 4503 case ARM::VLD1q32wb_fixed: 4504 case ARM::VLD1q64wb_fixed: 4505 case ARM::VLD2d8: 4506 case ARM::VLD2d16: 4507 case ARM::VLD2d32: 4508 case ARM::VLD2q8Pseudo: 4509 case ARM::VLD2q16Pseudo: 4510 case ARM::VLD2q32Pseudo: 4511 case ARM::VLD2d8wb_fixed: 4512 case ARM::VLD2d16wb_fixed: 4513 case ARM::VLD2d32wb_fixed: 4514 case ARM::VLD2q8PseudoWB_fixed: 4515 case ARM::VLD2q16PseudoWB_fixed: 4516 case ARM::VLD2q32PseudoWB_fixed: 4517 case ARM::VLD2d8wb_register: 4518 case ARM::VLD2d16wb_register: 4519 case ARM::VLD2d32wb_register: 4520 case ARM::VLD2q8PseudoWB_register: 4521 case ARM::VLD2q16PseudoWB_register: 4522 case ARM::VLD2q32PseudoWB_register: 4523 case ARM::VLD3d8Pseudo: 4524 case ARM::VLD3d16Pseudo: 4525 case ARM::VLD3d32Pseudo: 4526 case ARM::VLD1d8TPseudo: 4527 case ARM::VLD1d16TPseudo: 4528 case ARM::VLD1d32TPseudo: 4529 case ARM::VLD1d64TPseudo: 4530 case ARM::VLD1d64TPseudoWB_fixed: 4531 case ARM::VLD1d64TPseudoWB_register: 4532 case ARM::VLD3d8Pseudo_UPD: 4533 case ARM::VLD3d16Pseudo_UPD: 4534 case ARM::VLD3d32Pseudo_UPD: 4535 case ARM::VLD3q8Pseudo_UPD: 4536 case ARM::VLD3q16Pseudo_UPD: 4537 case ARM::VLD3q32Pseudo_UPD: 4538 case ARM::VLD3q8oddPseudo: 4539 case ARM::VLD3q16oddPseudo: 4540 case ARM::VLD3q32oddPseudo: 4541 case ARM::VLD3q8oddPseudo_UPD: 4542 case ARM::VLD3q16oddPseudo_UPD: 4543 case ARM::VLD3q32oddPseudo_UPD: 4544 case ARM::VLD4d8Pseudo: 4545 case ARM::VLD4d16Pseudo: 4546 case ARM::VLD4d32Pseudo: 4547 case ARM::VLD1d8QPseudo: 4548 case ARM::VLD1d16QPseudo: 4549 case ARM::VLD1d32QPseudo: 4550 case ARM::VLD1d64QPseudo: 4551 case ARM::VLD1d64QPseudoWB_fixed: 4552 case ARM::VLD1d64QPseudoWB_register: 4553 case ARM::VLD1q8HighQPseudo: 4554 case ARM::VLD1q8LowQPseudo_UPD: 4555 case ARM::VLD1q8HighTPseudo: 4556 case ARM::VLD1q8LowTPseudo_UPD: 4557 case ARM::VLD1q16HighQPseudo: 4558 case ARM::VLD1q16LowQPseudo_UPD: 4559 case ARM::VLD1q16HighTPseudo: 4560 case ARM::VLD1q16LowTPseudo_UPD: 4561 case ARM::VLD1q32HighQPseudo: 4562 case ARM::VLD1q32LowQPseudo_UPD: 4563 case ARM::VLD1q32HighTPseudo: 4564 case ARM::VLD1q32LowTPseudo_UPD: 4565 case ARM::VLD1q64HighQPseudo: 4566 case ARM::VLD1q64LowQPseudo_UPD: 4567 case ARM::VLD1q64HighTPseudo: 4568 case ARM::VLD1q64LowTPseudo_UPD: 4569 case ARM::VLD4d8Pseudo_UPD: 4570 case ARM::VLD4d16Pseudo_UPD: 4571 case ARM::VLD4d32Pseudo_UPD: 4572 case ARM::VLD4q8Pseudo_UPD: 4573 case ARM::VLD4q16Pseudo_UPD: 4574 case ARM::VLD4q32Pseudo_UPD: 4575 case ARM::VLD4q8oddPseudo: 4576 case ARM::VLD4q16oddPseudo: 4577 case ARM::VLD4q32oddPseudo: 4578 case ARM::VLD4q8oddPseudo_UPD: 4579 case ARM::VLD4q16oddPseudo_UPD: 4580 case ARM::VLD4q32oddPseudo_UPD: 4581 case ARM::VLD1DUPq8: 4582 case ARM::VLD1DUPq16: 4583 case ARM::VLD1DUPq32: 4584 case ARM::VLD1DUPq8wb_fixed: 4585 case ARM::VLD1DUPq16wb_fixed: 4586 case ARM::VLD1DUPq32wb_fixed: 4587 case ARM::VLD1DUPq8wb_register: 4588 case ARM::VLD1DUPq16wb_register: 4589 case ARM::VLD1DUPq32wb_register: 4590 case ARM::VLD2DUPd8: 4591 case ARM::VLD2DUPd16: 4592 case ARM::VLD2DUPd32: 4593 case ARM::VLD2DUPd8wb_fixed: 4594 case ARM::VLD2DUPd16wb_fixed: 4595 case ARM::VLD2DUPd32wb_fixed: 4596 case ARM::VLD2DUPd8wb_register: 4597 case ARM::VLD2DUPd16wb_register: 4598 case ARM::VLD2DUPd32wb_register: 4599 case ARM::VLD2DUPq8EvenPseudo: 4600 case ARM::VLD2DUPq8OddPseudo: 4601 case ARM::VLD2DUPq16EvenPseudo: 4602 case ARM::VLD2DUPq16OddPseudo: 4603 case ARM::VLD2DUPq32EvenPseudo: 4604 case ARM::VLD2DUPq32OddPseudo: 4605 case ARM::VLD3DUPq8EvenPseudo: 4606 case ARM::VLD3DUPq8OddPseudo: 4607 case ARM::VLD3DUPq16EvenPseudo: 4608 case ARM::VLD3DUPq16OddPseudo: 4609 case ARM::VLD3DUPq32EvenPseudo: 4610 case ARM::VLD3DUPq32OddPseudo: 4611 case ARM::VLD4DUPd8Pseudo: 4612 case ARM::VLD4DUPd16Pseudo: 4613 case ARM::VLD4DUPd32Pseudo: 4614 case ARM::VLD4DUPd8Pseudo_UPD: 4615 case ARM::VLD4DUPd16Pseudo_UPD: 4616 case ARM::VLD4DUPd32Pseudo_UPD: 4617 case ARM::VLD4DUPq8EvenPseudo: 4618 case ARM::VLD4DUPq8OddPseudo: 4619 case ARM::VLD4DUPq16EvenPseudo: 4620 case ARM::VLD4DUPq16OddPseudo: 4621 case ARM::VLD4DUPq32EvenPseudo: 4622 case ARM::VLD4DUPq32OddPseudo: 4623 case ARM::VLD1LNq8Pseudo: 4624 case ARM::VLD1LNq16Pseudo: 4625 case ARM::VLD1LNq32Pseudo: 4626 case ARM::VLD1LNq8Pseudo_UPD: 4627 case ARM::VLD1LNq16Pseudo_UPD: 4628 case ARM::VLD1LNq32Pseudo_UPD: 4629 case ARM::VLD2LNd8Pseudo: 4630 case ARM::VLD2LNd16Pseudo: 4631 case ARM::VLD2LNd32Pseudo: 4632 case ARM::VLD2LNq16Pseudo: 4633 case ARM::VLD2LNq32Pseudo: 4634 case ARM::VLD2LNd8Pseudo_UPD: 4635 case ARM::VLD2LNd16Pseudo_UPD: 4636 case ARM::VLD2LNd32Pseudo_UPD: 4637 case ARM::VLD2LNq16Pseudo_UPD: 4638 case ARM::VLD2LNq32Pseudo_UPD: 4639 case ARM::VLD4LNd8Pseudo: 4640 case ARM::VLD4LNd16Pseudo: 4641 case ARM::VLD4LNd32Pseudo: 4642 case ARM::VLD4LNq16Pseudo: 4643 case ARM::VLD4LNq32Pseudo: 4644 case ARM::VLD4LNd8Pseudo_UPD: 4645 case ARM::VLD4LNd16Pseudo_UPD: 4646 case ARM::VLD4LNd32Pseudo_UPD: 4647 case ARM::VLD4LNq16Pseudo_UPD: 4648 case ARM::VLD4LNq32Pseudo_UPD: 4649 // If the address is not 64-bit aligned, the latencies of these 4650 // instructions increases by one. 4651 ++Latency; 4652 break; 4653 } 4654 4655 return Latency; 4656 } 4657 4658 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { 4659 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || 4660 MI.isImplicitDef()) 4661 return 0; 4662 4663 if (MI.isBundle()) 4664 return 0; 4665 4666 const MCInstrDesc &MCID = MI.getDesc(); 4667 4668 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && 4669 !Subtarget.cheapPredicableCPSRDef())) { 4670 // When predicated, CPSR is an additional source operand for CPSR updating 4671 // instructions, this apparently increases their latencies. 4672 return 1; 4673 } 4674 return 0; 4675 } 4676 4677 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 4678 const MachineInstr &MI, 4679 unsigned *PredCost) const { 4680 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || 4681 MI.isImplicitDef()) 4682 return 1; 4683 4684 // An instruction scheduler typically runs on unbundled instructions, however 4685 // other passes may query the latency of a bundled instruction. 4686 if (MI.isBundle()) { 4687 unsigned Latency = 0; 4688 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 4689 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 4690 while (++I != E && I->isInsideBundle()) { 4691 if (I->getOpcode() != ARM::t2IT) 4692 Latency += getInstrLatency(ItinData, *I, PredCost); 4693 } 4694 return Latency; 4695 } 4696 4697 const MCInstrDesc &MCID = MI.getDesc(); 4698 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && 4699 !Subtarget.cheapPredicableCPSRDef()))) { 4700 // When predicated, CPSR is an additional source operand for CPSR updating 4701 // instructions, this apparently increases their latencies. 4702 *PredCost = 1; 4703 } 4704 // Be sure to call getStageLatency for an empty itinerary in case it has a 4705 // valid MinLatency property. 4706 if (!ItinData) 4707 return MI.mayLoad() ? 3 : 1; 4708 4709 unsigned Class = MCID.getSchedClass(); 4710 4711 // For instructions with variable uops, use uops as latency. 4712 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0) 4713 return getNumMicroOps(ItinData, MI); 4714 4715 // For the common case, fall back on the itinerary's latency. 4716 unsigned Latency = ItinData->getStageLatency(Class); 4717 4718 // Adjust for dynamic def-side opcode variants not captured by the itinerary. 4719 unsigned DefAlign = 4720 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0; 4721 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign); 4722 if (Adj >= 0 || (int)Latency > -Adj) { 4723 return Latency + Adj; 4724 } 4725 return Latency; 4726 } 4727 4728 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 4729 SDNode *Node) const { 4730 if (!Node->isMachineOpcode()) 4731 return 1; 4732 4733 if (!ItinData || ItinData->isEmpty()) 4734 return 1; 4735 4736 unsigned Opcode = Node->getMachineOpcode(); 4737 switch (Opcode) { 4738 default: 4739 return ItinData->getStageLatency(get(Opcode).getSchedClass()); 4740 case ARM::VLDMQIA: 4741 case ARM::VSTMQIA: 4742 return 2; 4743 } 4744 } 4745 4746 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel, 4747 const MachineRegisterInfo *MRI, 4748 const MachineInstr &DefMI, 4749 unsigned DefIdx, 4750 const MachineInstr &UseMI, 4751 unsigned UseIdx) const { 4752 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; 4753 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask; 4754 if (Subtarget.nonpipelinedVFP() && 4755 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) 4756 return true; 4757 4758 // Hoist VFP / NEON instructions with 4 or higher latency. 4759 unsigned Latency = 4760 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx); 4761 if (Latency <= 3) 4762 return false; 4763 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || 4764 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; 4765 } 4766 4767 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, 4768 const MachineInstr &DefMI, 4769 unsigned DefIdx) const { 4770 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); 4771 if (!ItinData || ItinData->isEmpty()) 4772 return false; 4773 4774 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; 4775 if (DDomain == ARMII::DomainGeneral) { 4776 unsigned DefClass = DefMI.getDesc().getSchedClass(); 4777 int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 4778 return (DefCycle != -1 && DefCycle <= 2); 4779 } 4780 return false; 4781 } 4782 4783 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, 4784 StringRef &ErrInfo) const { 4785 if (convertAddSubFlagsOpcode(MI.getOpcode())) { 4786 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; 4787 return false; 4788 } 4789 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) { 4790 // Make sure we don't generate a lo-lo mov that isn't supported. 4791 if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) && 4792 !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) { 4793 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only"; 4794 return false; 4795 } 4796 } 4797 if (MI.getOpcode() == ARM::tPUSH || 4798 MI.getOpcode() == ARM::tPOP || 4799 MI.getOpcode() == ARM::tPOP_RET) { 4800 for (int i = 2, e = MI.getNumOperands(); i < e; ++i) { 4801 if (MI.getOperand(i).isImplicit() || 4802 !MI.getOperand(i).isReg()) 4803 continue; 4804 Register Reg = MI.getOperand(i).getReg(); 4805 if (Reg < ARM::R0 || Reg > ARM::R7) { 4806 if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) && 4807 !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) { 4808 ErrInfo = "Unsupported register in Thumb1 push/pop"; 4809 return false; 4810 } 4811 } 4812 } 4813 } 4814 return true; 4815 } 4816 4817 // LoadStackGuard has so far only been implemented for MachO. Different code 4818 // sequence is needed for other targets. 4819 void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, 4820 unsigned LoadImmOpc, 4821 unsigned LoadOpc) const { 4822 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() && 4823 "ROPI/RWPI not currently supported with stack guard"); 4824 4825 MachineBasicBlock &MBB = *MI->getParent(); 4826 DebugLoc DL = MI->getDebugLoc(); 4827 Register Reg = MI->getOperand(0).getReg(); 4828 const GlobalValue *GV = 4829 cast<GlobalValue>((*MI->memoperands_begin())->getValue()); 4830 MachineInstrBuilder MIB; 4831 4832 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg) 4833 .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); 4834 4835 if (Subtarget.isGVIndirectSymbol(GV)) { 4836 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); 4837 MIB.addReg(Reg, RegState::Kill).addImm(0); 4838 auto Flags = MachineMemOperand::MOLoad | 4839 MachineMemOperand::MODereferenceable | 4840 MachineMemOperand::MOInvariant; 4841 MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( 4842 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4)); 4843 MIB.addMemOperand(MMO).add(predOps(ARMCC::AL)); 4844 } 4845 4846 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); 4847 MIB.addReg(Reg, RegState::Kill) 4848 .addImm(0) 4849 .cloneMemRefs(*MI) 4850 .add(predOps(ARMCC::AL)); 4851 } 4852 4853 bool 4854 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, 4855 unsigned &AddSubOpc, 4856 bool &NegAcc, bool &HasLane) const { 4857 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); 4858 if (I == MLxEntryMap.end()) 4859 return false; 4860 4861 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; 4862 MulOpc = Entry.MulOpc; 4863 AddSubOpc = Entry.AddSubOpc; 4864 NegAcc = Entry.NegAcc; 4865 HasLane = Entry.HasLane; 4866 return true; 4867 } 4868 4869 //===----------------------------------------------------------------------===// 4870 // Execution domains. 4871 //===----------------------------------------------------------------------===// 4872 // 4873 // Some instructions go down the NEON pipeline, some go down the VFP pipeline, 4874 // and some can go down both. The vmov instructions go down the VFP pipeline, 4875 // but they can be changed to vorr equivalents that are executed by the NEON 4876 // pipeline. 4877 // 4878 // We use the following execution domain numbering: 4879 // 4880 enum ARMExeDomain { 4881 ExeGeneric = 0, 4882 ExeVFP = 1, 4883 ExeNEON = 2 4884 }; 4885 4886 // 4887 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h 4888 // 4889 std::pair<uint16_t, uint16_t> 4890 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const { 4891 // If we don't have access to NEON instructions then we won't be able 4892 // to swizzle anything to the NEON domain. Check to make sure. 4893 if (Subtarget.hasNEON()) { 4894 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON 4895 // if they are not predicated. 4896 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI)) 4897 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); 4898 4899 // CortexA9 is particularly picky about mixing the two and wants these 4900 // converted. 4901 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) && 4902 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR || 4903 MI.getOpcode() == ARM::VMOVS)) 4904 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); 4905 } 4906 // No other instructions can be swizzled, so just determine their domain. 4907 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask; 4908 4909 if (Domain & ARMII::DomainNEON) 4910 return std::make_pair(ExeNEON, 0); 4911 4912 // Certain instructions can go either way on Cortex-A8. 4913 // Treat them as NEON instructions. 4914 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) 4915 return std::make_pair(ExeNEON, 0); 4916 4917 if (Domain & ARMII::DomainVFP) 4918 return std::make_pair(ExeVFP, 0); 4919 4920 return std::make_pair(ExeGeneric, 0); 4921 } 4922 4923 static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, 4924 unsigned SReg, unsigned &Lane) { 4925 unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); 4926 Lane = 0; 4927 4928 if (DReg != ARM::NoRegister) 4929 return DReg; 4930 4931 Lane = 1; 4932 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); 4933 4934 assert(DReg && "S-register with no D super-register?"); 4935 return DReg; 4936 } 4937 4938 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, 4939 /// set ImplicitSReg to a register number that must be marked as implicit-use or 4940 /// zero if no register needs to be defined as implicit-use. 4941 /// 4942 /// If the function cannot determine if an SPR should be marked implicit use or 4943 /// not, it returns false. 4944 /// 4945 /// This function handles cases where an instruction is being modified from taking 4946 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict 4947 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other 4948 /// lane of the DPR). 4949 /// 4950 /// If the other SPR is defined, an implicit-use of it should be added. Else, 4951 /// (including the case where the DPR itself is defined), it should not. 4952 /// 4953 static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, 4954 MachineInstr &MI, unsigned DReg, 4955 unsigned Lane, unsigned &ImplicitSReg) { 4956 // If the DPR is defined or used already, the other SPR lane will be chained 4957 // correctly, so there is nothing to be done. 4958 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) { 4959 ImplicitSReg = 0; 4960 return true; 4961 } 4962 4963 // Otherwise we need to go searching to see if the SPR is set explicitly. 4964 ImplicitSReg = TRI->getSubReg(DReg, 4965 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); 4966 MachineBasicBlock::LivenessQueryResult LQR = 4967 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); 4968 4969 if (LQR == MachineBasicBlock::LQR_Live) 4970 return true; 4971 else if (LQR == MachineBasicBlock::LQR_Unknown) 4972 return false; 4973 4974 // If the register is known not to be live, there is no need to add an 4975 // implicit-use. 4976 ImplicitSReg = 0; 4977 return true; 4978 } 4979 4980 void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, 4981 unsigned Domain) const { 4982 unsigned DstReg, SrcReg, DReg; 4983 unsigned Lane; 4984 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); 4985 const TargetRegisterInfo *TRI = &getRegisterInfo(); 4986 switch (MI.getOpcode()) { 4987 default: 4988 llvm_unreachable("cannot handle opcode!"); 4989 break; 4990 case ARM::VMOVD: 4991 if (Domain != ExeNEON) 4992 break; 4993 4994 // Zap the predicate operands. 4995 assert(!isPredicated(MI) && "Cannot predicate a VORRd"); 4996 4997 // Make sure we've got NEON instructions. 4998 assert(Subtarget.hasNEON() && "VORRd requires NEON"); 4999 5000 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) 5001 DstReg = MI.getOperand(0).getReg(); 5002 SrcReg = MI.getOperand(1).getReg(); 5003 5004 for (unsigned i = MI.getDesc().getNumOperands(); i; --i) 5005 MI.RemoveOperand(i - 1); 5006 5007 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) 5008 MI.setDesc(get(ARM::VORRd)); 5009 MIB.addReg(DstReg, RegState::Define) 5010 .addReg(SrcReg) 5011 .addReg(SrcReg) 5012 .add(predOps(ARMCC::AL)); 5013 break; 5014 case ARM::VMOVRS: 5015 if (Domain != ExeNEON) 5016 break; 5017 assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); 5018 5019 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) 5020 DstReg = MI.getOperand(0).getReg(); 5021 SrcReg = MI.getOperand(1).getReg(); 5022 5023 for (unsigned i = MI.getDesc().getNumOperands(); i; --i) 5024 MI.RemoveOperand(i - 1); 5025 5026 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); 5027 5028 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) 5029 // Note that DSrc has been widened and the other lane may be undef, which 5030 // contaminates the entire register. 5031 MI.setDesc(get(ARM::VGETLNi32)); 5032 MIB.addReg(DstReg, RegState::Define) 5033 .addReg(DReg, RegState::Undef) 5034 .addImm(Lane) 5035 .add(predOps(ARMCC::AL)); 5036 5037 // The old source should be an implicit use, otherwise we might think it 5038 // was dead before here. 5039 MIB.addReg(SrcReg, RegState::Implicit); 5040 break; 5041 case ARM::VMOVSR: { 5042 if (Domain != ExeNEON) 5043 break; 5044 assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); 5045 5046 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) 5047 DstReg = MI.getOperand(0).getReg(); 5048 SrcReg = MI.getOperand(1).getReg(); 5049 5050 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); 5051 5052 unsigned ImplicitSReg; 5053 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) 5054 break; 5055 5056 for (unsigned i = MI.getDesc().getNumOperands(); i; --i) 5057 MI.RemoveOperand(i - 1); 5058 5059 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) 5060 // Again DDst may be undefined at the beginning of this instruction. 5061 MI.setDesc(get(ARM::VSETLNi32)); 5062 MIB.addReg(DReg, RegState::Define) 5063 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI))) 5064 .addReg(SrcReg) 5065 .addImm(Lane) 5066 .add(predOps(ARMCC::AL)); 5067 5068 // The narrower destination must be marked as set to keep previous chains 5069 // in place. 5070 MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 5071 if (ImplicitSReg != 0) 5072 MIB.addReg(ImplicitSReg, RegState::Implicit); 5073 break; 5074 } 5075 case ARM::VMOVS: { 5076 if (Domain != ExeNEON) 5077 break; 5078 5079 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) 5080 DstReg = MI.getOperand(0).getReg(); 5081 SrcReg = MI.getOperand(1).getReg(); 5082 5083 unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; 5084 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); 5085 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); 5086 5087 unsigned ImplicitSReg; 5088 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) 5089 break; 5090 5091 for (unsigned i = MI.getDesc().getNumOperands(); i; --i) 5092 MI.RemoveOperand(i - 1); 5093 5094 if (DSrc == DDst) { 5095 // Destination can be: 5096 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) 5097 MI.setDesc(get(ARM::VDUPLN32d)); 5098 MIB.addReg(DDst, RegState::Define) 5099 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI))) 5100 .addImm(SrcLane) 5101 .add(predOps(ARMCC::AL)); 5102 5103 // Neither the source or the destination are naturally represented any 5104 // more, so add them in manually. 5105 MIB.addReg(DstReg, RegState::Implicit | RegState::Define); 5106 MIB.addReg(SrcReg, RegState::Implicit); 5107 if (ImplicitSReg != 0) 5108 MIB.addReg(ImplicitSReg, RegState::Implicit); 5109 break; 5110 } 5111 5112 // In general there's no single instruction that can perform an S <-> S 5113 // move in NEON space, but a pair of VEXT instructions *can* do the 5114 // job. It turns out that the VEXTs needed will only use DSrc once, with 5115 // the position based purely on the combination of lane-0 and lane-1 5116 // involved. For example 5117 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 5118 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 5119 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 5120 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 5121 // 5122 // Pattern of the MachineInstrs is: 5123 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) 5124 MachineInstrBuilder NewMIB; 5125 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32), 5126 DDst); 5127 5128 // On the first instruction, both DSrc and DDst may be undef if present. 5129 // Specifically when the original instruction didn't have them as an 5130 // <imp-use>. 5131 unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; 5132 bool CurUndef = !MI.readsRegister(CurReg, TRI); 5133 NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); 5134 5135 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; 5136 CurUndef = !MI.readsRegister(CurReg, TRI); 5137 NewMIB.addReg(CurReg, getUndefRegState(CurUndef)) 5138 .addImm(1) 5139 .add(predOps(ARMCC::AL)); 5140 5141 if (SrcLane == DstLane) 5142 NewMIB.addReg(SrcReg, RegState::Implicit); 5143 5144 MI.setDesc(get(ARM::VEXTd32)); 5145 MIB.addReg(DDst, RegState::Define); 5146 5147 // On the second instruction, DDst has definitely been defined above, so 5148 // it is not undef. DSrc, if present, can be undef as above. 5149 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; 5150 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); 5151 MIB.addReg(CurReg, getUndefRegState(CurUndef)); 5152 5153 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; 5154 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); 5155 MIB.addReg(CurReg, getUndefRegState(CurUndef)) 5156 .addImm(1) 5157 .add(predOps(ARMCC::AL)); 5158 5159 if (SrcLane != DstLane) 5160 MIB.addReg(SrcReg, RegState::Implicit); 5161 5162 // As before, the original destination is no longer represented, add it 5163 // implicitly. 5164 MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 5165 if (ImplicitSReg != 0) 5166 MIB.addReg(ImplicitSReg, RegState::Implicit); 5167 break; 5168 } 5169 } 5170 } 5171 5172 //===----------------------------------------------------------------------===// 5173 // Partial register updates 5174 //===----------------------------------------------------------------------===// 5175 // 5176 // Swift renames NEON registers with 64-bit granularity. That means any 5177 // instruction writing an S-reg implicitly reads the containing D-reg. The 5178 // problem is mostly avoided by translating f32 operations to v2f32 operations 5179 // on D-registers, but f32 loads are still a problem. 5180 // 5181 // These instructions can load an f32 into a NEON register: 5182 // 5183 // VLDRS - Only writes S, partial D update. 5184 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. 5185 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. 5186 // 5187 // FCONSTD can be used as a dependency-breaking instruction. 5188 unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( 5189 const MachineInstr &MI, unsigned OpNum, 5190 const TargetRegisterInfo *TRI) const { 5191 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance(); 5192 if (!PartialUpdateClearance) 5193 return 0; 5194 5195 assert(TRI && "Need TRI instance"); 5196 5197 const MachineOperand &MO = MI.getOperand(OpNum); 5198 if (MO.readsReg()) 5199 return 0; 5200 Register Reg = MO.getReg(); 5201 int UseOp = -1; 5202 5203 switch (MI.getOpcode()) { 5204 // Normal instructions writing only an S-register. 5205 case ARM::VLDRS: 5206 case ARM::FCONSTS: 5207 case ARM::VMOVSR: 5208 case ARM::VMOVv8i8: 5209 case ARM::VMOVv4i16: 5210 case ARM::VMOVv2i32: 5211 case ARM::VMOVv2f32: 5212 case ARM::VMOVv1i64: 5213 UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI); 5214 break; 5215 5216 // Explicitly reads the dependency. 5217 case ARM::VLD1LNd32: 5218 UseOp = 3; 5219 break; 5220 default: 5221 return 0; 5222 } 5223 5224 // If this instruction actually reads a value from Reg, there is no unwanted 5225 // dependency. 5226 if (UseOp != -1 && MI.getOperand(UseOp).readsReg()) 5227 return 0; 5228 5229 // We must be able to clobber the whole D-reg. 5230 if (Register::isVirtualRegister(Reg)) { 5231 // Virtual register must be a def undef foo:ssub_0 operand. 5232 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg)) 5233 return 0; 5234 } else if (ARM::SPRRegClass.contains(Reg)) { 5235 // Physical register: MI must define the full D-reg. 5236 unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, 5237 &ARM::DPRRegClass); 5238 if (!DReg || !MI.definesRegister(DReg, TRI)) 5239 return 0; 5240 } 5241 5242 // MI has an unwanted D-register dependency. 5243 // Avoid defs in the previous N instructrions. 5244 return PartialUpdateClearance; 5245 } 5246 5247 // Break a partial register dependency after getPartialRegUpdateClearance 5248 // returned non-zero. 5249 void ARMBaseInstrInfo::breakPartialRegDependency( 5250 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { 5251 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def"); 5252 assert(TRI && "Need TRI instance"); 5253 5254 const MachineOperand &MO = MI.getOperand(OpNum); 5255 Register Reg = MO.getReg(); 5256 assert(Register::isPhysicalRegister(Reg) && 5257 "Can't break virtual register dependencies."); 5258 unsigned DReg = Reg; 5259 5260 // If MI defines an S-reg, find the corresponding D super-register. 5261 if (ARM::SPRRegClass.contains(Reg)) { 5262 DReg = ARM::D0 + (Reg - ARM::S0) / 2; 5263 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); 5264 } 5265 5266 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); 5267 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); 5268 5269 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines 5270 // the full D-register by loading the same value to both lanes. The 5271 // instruction is micro-coded with 2 uops, so don't do this until we can 5272 // properly schedule micro-coded instructions. The dispatcher stalls cause 5273 // too big regressions. 5274 5275 // Insert the dependency-breaking FCONSTD before MI. 5276 // 96 is the encoding of 0.5, but the actual value doesn't matter here. 5277 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg) 5278 .addImm(96) 5279 .add(predOps(ARMCC::AL)); 5280 MI.addRegisterKilled(DReg, TRI, true); 5281 } 5282 5283 bool ARMBaseInstrInfo::hasNOP() const { 5284 return Subtarget.getFeatureBits()[ARM::HasV6KOps]; 5285 } 5286 5287 bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { 5288 if (MI->getNumOperands() < 4) 5289 return true; 5290 unsigned ShOpVal = MI->getOperand(3).getImm(); 5291 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); 5292 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. 5293 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) || 5294 ((ShImm == 1 || ShImm == 2) && 5295 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl)) 5296 return true; 5297 5298 return false; 5299 } 5300 5301 bool ARMBaseInstrInfo::getRegSequenceLikeInputs( 5302 const MachineInstr &MI, unsigned DefIdx, 5303 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { 5304 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); 5305 assert(MI.isRegSequenceLike() && "Invalid kind of instruction"); 5306 5307 switch (MI.getOpcode()) { 5308 case ARM::VMOVDRR: 5309 // dX = VMOVDRR rY, rZ 5310 // is the same as: 5311 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1 5312 // Populate the InputRegs accordingly. 5313 // rY 5314 const MachineOperand *MOReg = &MI.getOperand(1); 5315 if (!MOReg->isUndef()) 5316 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), 5317 MOReg->getSubReg(), ARM::ssub_0)); 5318 // rZ 5319 MOReg = &MI.getOperand(2); 5320 if (!MOReg->isUndef()) 5321 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), 5322 MOReg->getSubReg(), ARM::ssub_1)); 5323 return true; 5324 } 5325 llvm_unreachable("Target dependent opcode missing"); 5326 } 5327 5328 bool ARMBaseInstrInfo::getExtractSubregLikeInputs( 5329 const MachineInstr &MI, unsigned DefIdx, 5330 RegSubRegPairAndIdx &InputReg) const { 5331 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); 5332 assert(MI.isExtractSubregLike() && "Invalid kind of instruction"); 5333 5334 switch (MI.getOpcode()) { 5335 case ARM::VMOVRRD: 5336 // rX, rY = VMOVRRD dZ 5337 // is the same as: 5338 // rX = EXTRACT_SUBREG dZ, ssub_0 5339 // rY = EXTRACT_SUBREG dZ, ssub_1 5340 const MachineOperand &MOReg = MI.getOperand(2); 5341 if (MOReg.isUndef()) 5342 return false; 5343 InputReg.Reg = MOReg.getReg(); 5344 InputReg.SubReg = MOReg.getSubReg(); 5345 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; 5346 return true; 5347 } 5348 llvm_unreachable("Target dependent opcode missing"); 5349 } 5350 5351 bool ARMBaseInstrInfo::getInsertSubregLikeInputs( 5352 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, 5353 RegSubRegPairAndIdx &InsertedReg) const { 5354 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); 5355 assert(MI.isInsertSubregLike() && "Invalid kind of instruction"); 5356 5357 switch (MI.getOpcode()) { 5358 case ARM::VSETLNi32: 5359 // dX = VSETLNi32 dY, rZ, imm 5360 const MachineOperand &MOBaseReg = MI.getOperand(1); 5361 const MachineOperand &MOInsertedReg = MI.getOperand(2); 5362 if (MOInsertedReg.isUndef()) 5363 return false; 5364 const MachineOperand &MOIndex = MI.getOperand(3); 5365 BaseReg.Reg = MOBaseReg.getReg(); 5366 BaseReg.SubReg = MOBaseReg.getSubReg(); 5367 5368 InsertedReg.Reg = MOInsertedReg.getReg(); 5369 InsertedReg.SubReg = MOInsertedReg.getSubReg(); 5370 InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1; 5371 return true; 5372 } 5373 llvm_unreachable("Target dependent opcode missing"); 5374 } 5375 5376 std::pair<unsigned, unsigned> 5377 ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 5378 const unsigned Mask = ARMII::MO_OPTION_MASK; 5379 return std::make_pair(TF & Mask, TF & ~Mask); 5380 } 5381 5382 ArrayRef<std::pair<unsigned, const char *>> 5383 ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 5384 using namespace ARMII; 5385 5386 static const std::pair<unsigned, const char *> TargetFlags[] = { 5387 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}}; 5388 return makeArrayRef(TargetFlags); 5389 } 5390 5391 ArrayRef<std::pair<unsigned, const char *>> 5392 ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { 5393 using namespace ARMII; 5394 5395 static const std::pair<unsigned, const char *> TargetFlags[] = { 5396 {MO_COFFSTUB, "arm-coffstub"}, 5397 {MO_GOT, "arm-got"}, 5398 {MO_SBREL, "arm-sbrel"}, 5399 {MO_DLLIMPORT, "arm-dllimport"}, 5400 {MO_SECREL, "arm-secrel"}, 5401 {MO_NONLAZY, "arm-nonlazy"}}; 5402 return makeArrayRef(TargetFlags); 5403 } 5404 5405 Optional<RegImmPair> ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI, 5406 Register Reg) const { 5407 int Sign = 1; 5408 unsigned Opcode = MI.getOpcode(); 5409 int64_t Offset = 0; 5410 5411 // TODO: Handle cases where Reg is a super- or sub-register of the 5412 // destination register. 5413 const MachineOperand &Op0 = MI.getOperand(0); 5414 if (!Op0.isReg() || Reg != Op0.getReg()) 5415 return None; 5416 5417 // We describe SUBri or ADDri instructions. 5418 if (Opcode == ARM::SUBri) 5419 Sign = -1; 5420 else if (Opcode != ARM::ADDri) 5421 return None; 5422 5423 // TODO: Third operand can be global address (usually some string). Since 5424 // strings can be relocated we cannot calculate their offsets for 5425 // now. 5426 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm()) 5427 return None; 5428 5429 Offset = MI.getOperand(2).getImm() * Sign; 5430 return RegImmPair{MI.getOperand(1).getReg(), Offset}; 5431 } 5432 5433 bool llvm::registerDefinedBetween(unsigned Reg, 5434 MachineBasicBlock::iterator From, 5435 MachineBasicBlock::iterator To, 5436 const TargetRegisterInfo *TRI) { 5437 for (auto I = From; I != To; ++I) 5438 if (I->modifiesRegister(Reg, TRI)) 5439 return true; 5440 return false; 5441 } 5442 5443 MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, 5444 const TargetRegisterInfo *TRI) { 5445 // Search backwards to the instruction that defines CSPR. This may or not 5446 // be a CMP, we check that after this loop. If we find another instruction 5447 // that reads cpsr, we return nullptr. 5448 MachineBasicBlock::iterator CmpMI = Br; 5449 while (CmpMI != Br->getParent()->begin()) { 5450 --CmpMI; 5451 if (CmpMI->modifiesRegister(ARM::CPSR, TRI)) 5452 break; 5453 if (CmpMI->readsRegister(ARM::CPSR, TRI)) 5454 break; 5455 } 5456 5457 // Check that this inst is a CMP r[0-7], #0 and that the register 5458 // is not redefined between the cmp and the br. 5459 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri) 5460 return nullptr; 5461 Register Reg = CmpMI->getOperand(0).getReg(); 5462 Register PredReg; 5463 ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg); 5464 if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0) 5465 return nullptr; 5466 if (!isARMLowRegister(Reg)) 5467 return nullptr; 5468 if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI)) 5469 return nullptr; 5470 5471 return &*CmpMI; 5472 } 5473 5474 unsigned llvm::ConstantMaterializationCost(unsigned Val, 5475 const ARMSubtarget *Subtarget, 5476 bool ForCodesize) { 5477 if (Subtarget->isThumb()) { 5478 if (Val <= 255) // MOV 5479 return ForCodesize ? 2 : 1; 5480 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV 5481 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW 5482 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN 5483 return ForCodesize ? 4 : 1; 5484 if (Val <= 510) // MOV + ADDi8 5485 return ForCodesize ? 4 : 2; 5486 if (~Val <= 255) // MOV + MVN 5487 return ForCodesize ? 4 : 2; 5488 if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL 5489 return ForCodesize ? 4 : 2; 5490 } else { 5491 if (ARM_AM::getSOImmVal(Val) != -1) // MOV 5492 return ForCodesize ? 4 : 1; 5493 if (ARM_AM::getSOImmVal(~Val) != -1) // MVN 5494 return ForCodesize ? 4 : 1; 5495 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW 5496 return ForCodesize ? 4 : 1; 5497 if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs 5498 return ForCodesize ? 8 : 2; 5499 } 5500 if (Subtarget->useMovt()) // MOVW + MOVT 5501 return ForCodesize ? 8 : 2; 5502 return ForCodesize ? 8 : 3; // Literal pool load 5503 } 5504 5505 bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, 5506 const ARMSubtarget *Subtarget, 5507 bool ForCodesize) { 5508 // Check with ForCodesize 5509 unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize); 5510 unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize); 5511 if (Cost1 < Cost2) 5512 return true; 5513 if (Cost1 > Cost2) 5514 return false; 5515 5516 // If they are equal, try with !ForCodesize 5517 return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) < 5518 ConstantMaterializationCost(Val2, Subtarget, !ForCodesize); 5519 } 5520