1 //===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs loop invariant code motion on machine instructions. We 11 // attempt to remove as much code from the body of a loop as possible. 12 // 13 // This pass is not intended to be a replacement or a complete alternative 14 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple 15 // constructs that are not exposed before lowering and instruction selection. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/BitVector.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallSet.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/Analysis/AliasAnalysis.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineDominators.h" 28 #include "llvm/CodeGen/MachineFrameInfo.h" 29 #include "llvm/CodeGen/MachineFunction.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineLoopInfo.h" 33 #include "llvm/CodeGen/MachineMemOperand.h" 34 #include "llvm/CodeGen/MachineOperand.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/PseudoSourceValue.h" 37 #include "llvm/CodeGen/TargetInstrInfo.h" 38 #include "llvm/CodeGen/TargetLowering.h" 39 #include "llvm/CodeGen/TargetRegisterInfo.h" 40 #include "llvm/CodeGen/TargetSchedule.h" 41 #include "llvm/CodeGen/TargetSubtargetInfo.h" 42 #include "llvm/IR/DebugLoc.h" 43 #include "llvm/MC/MCInstrDesc.h" 44 #include "llvm/MC/MCRegisterInfo.h" 45 #include "llvm/Pass.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/CommandLine.h" 48 #include "llvm/Support/Debug.h" 49 #include "llvm/Support/raw_ostream.h" 50 #include <algorithm> 51 #include <cassert> 52 #include <limits> 53 #include <vector> 54 55 using namespace llvm; 56 57 #define DEBUG_TYPE "machinelicm" 58 59 static cl::opt<bool> 60 AvoidSpeculation("avoid-speculation", 61 cl::desc("MachineLICM should avoid speculation"), 62 cl::init(true), cl::Hidden); 63 64 static cl::opt<bool> 65 HoistCheapInsts("hoist-cheap-insts", 66 cl::desc("MachineLICM should hoist even cheap instructions"), 67 cl::init(false), cl::Hidden); 68 69 static cl::opt<bool> 70 SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", 71 cl::desc("MachineLICM should sink instructions into " 72 "loops to avoid register spills"), 73 cl::init(false), cl::Hidden); 74 75 STATISTIC(NumHoisted, 76 "Number of machine instructions hoisted out of loops"); 77 STATISTIC(NumLowRP, 78 "Number of instructions hoisted in low reg pressure situation"); 79 STATISTIC(NumHighLatency, 80 "Number of high latency instructions hoisted"); 81 STATISTIC(NumCSEed, 82 "Number of hoisted machine instructions CSEed"); 83 STATISTIC(NumPostRAHoisted, 84 "Number of machine instructions hoisted out of loops post regalloc"); 85 86 namespace { 87 88 class MachineLICMBase : public MachineFunctionPass { 89 const TargetInstrInfo *TII; 90 const TargetLoweringBase *TLI; 91 const TargetRegisterInfo *TRI; 92 const MachineFrameInfo *MFI; 93 MachineRegisterInfo *MRI; 94 TargetSchedModel SchedModel; 95 bool PreRegAlloc; 96 97 // Various analyses that we use... 98 AliasAnalysis *AA; // Alias analysis info. 99 MachineLoopInfo *MLI; // Current MachineLoopInfo 100 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 101 102 // State that is updated as we process loops 103 bool Changed; // True if a loop is changed. 104 bool FirstInLoop; // True if it's the first LICM in the loop. 105 MachineLoop *CurLoop; // The current loop we are working on. 106 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 107 108 // Exit blocks for CurLoop. 109 SmallVector<MachineBasicBlock *, 8> ExitBlocks; 110 111 bool isExitBlock(const MachineBasicBlock *MBB) const { 112 return is_contained(ExitBlocks, MBB); 113 } 114 115 // Track 'estimated' register pressure. 116 SmallSet<unsigned, 32> RegSeen; 117 SmallVector<unsigned, 8> RegPressure; 118 119 // Register pressure "limit" per register pressure set. If the pressure 120 // is higher than the limit, then it's considered high. 121 SmallVector<unsigned, 8> RegLimit; 122 123 // Register pressure on path leading from loop preheader to current BB. 124 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 125 126 // For each opcode, keep a list of potential CSE instructions. 127 DenseMap<unsigned, std::vector<const MachineInstr *>> CSEMap; 128 129 enum { 130 SpeculateFalse = 0, 131 SpeculateTrue = 1, 132 SpeculateUnknown = 2 133 }; 134 135 // If a MBB does not dominate loop exiting blocks then it may not safe 136 // to hoist loads from this block. 137 // Tri-state: 0 - false, 1 - true, 2 - unknown 138 unsigned SpeculationState; 139 140 public: 141 MachineLICMBase(char &PassID, bool PreRegAlloc) 142 : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {} 143 144 bool runOnMachineFunction(MachineFunction &MF) override; 145 146 void getAnalysisUsage(AnalysisUsage &AU) const override { 147 AU.addRequired<MachineLoopInfo>(); 148 AU.addRequired<MachineDominatorTree>(); 149 AU.addRequired<AAResultsWrapperPass>(); 150 AU.addPreserved<MachineLoopInfo>(); 151 AU.addPreserved<MachineDominatorTree>(); 152 MachineFunctionPass::getAnalysisUsage(AU); 153 } 154 155 void releaseMemory() override { 156 RegSeen.clear(); 157 RegPressure.clear(); 158 RegLimit.clear(); 159 BackTrace.clear(); 160 CSEMap.clear(); 161 } 162 163 private: 164 /// Keep track of information about hoisting candidates. 165 struct CandidateInfo { 166 MachineInstr *MI; 167 unsigned Def; 168 int FI; 169 170 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 171 : MI(mi), Def(def), FI(fi) {} 172 }; 173 174 void HoistRegionPostRA(); 175 176 void HoistPostRA(MachineInstr *MI, unsigned Def); 177 178 void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, 179 BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, 180 SmallVectorImpl<CandidateInfo> &Candidates); 181 182 void AddToLiveIns(unsigned Reg); 183 184 bool IsLICMCandidate(MachineInstr &I); 185 186 bool IsLoopInvariantInst(MachineInstr &I); 187 188 bool HasLoopPHIUse(const MachineInstr *MI) const; 189 190 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, 191 unsigned Reg) const; 192 193 bool IsCheapInstruction(MachineInstr &MI) const; 194 195 bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost, 196 bool Cheap); 197 198 void UpdateBackTraceRegPressure(const MachineInstr *MI); 199 200 bool IsProfitableToHoist(MachineInstr &MI); 201 202 bool IsGuaranteedToExecute(MachineBasicBlock *BB); 203 204 void EnterScope(MachineBasicBlock *MBB); 205 206 void ExitScope(MachineBasicBlock *MBB); 207 208 void ExitScopeIfDone( 209 MachineDomTreeNode *Node, 210 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren, 211 DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap); 212 213 void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); 214 215 void HoistRegion(MachineDomTreeNode *N, bool IsHeader); 216 217 void SinkIntoLoop(); 218 219 void InitRegPressure(MachineBasicBlock *BB); 220 221 DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI, 222 bool ConsiderSeen, 223 bool ConsiderUnseenAsDef); 224 225 void UpdateRegPressure(const MachineInstr *MI, 226 bool ConsiderUnseenAsDef = false); 227 228 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 229 230 const MachineInstr * 231 LookForDuplicate(const MachineInstr *MI, 232 std::vector<const MachineInstr *> &PrevMIs); 233 234 bool EliminateCSE( 235 MachineInstr *MI, 236 DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI); 237 238 bool MayCSE(MachineInstr *MI); 239 240 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 241 242 void InitCSEMap(MachineBasicBlock *BB); 243 244 MachineBasicBlock *getCurPreheader(); 245 }; 246 247 class MachineLICM : public MachineLICMBase { 248 public: 249 static char ID; 250 MachineLICM() : MachineLICMBase(ID, false) { 251 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 252 } 253 }; 254 255 class EarlyMachineLICM : public MachineLICMBase { 256 public: 257 static char ID; 258 EarlyMachineLICM() : MachineLICMBase(ID, true) { 259 initializeEarlyMachineLICMPass(*PassRegistry::getPassRegistry()); 260 } 261 }; 262 263 } // end anonymous namespace 264 265 char MachineLICM::ID; 266 char EarlyMachineLICM::ID; 267 268 char &llvm::MachineLICMID = MachineLICM::ID; 269 char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID; 270 271 INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE, 272 "Machine Loop Invariant Code Motion", false, false) 273 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 274 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 275 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 276 INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE, 277 "Machine Loop Invariant Code Motion", false, false) 278 279 INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm", 280 "Early Machine Loop Invariant Code Motion", false, false) 281 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 282 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 283 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 284 INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm", 285 "Early Machine Loop Invariant Code Motion", false, false) 286 287 /// Test if the given loop is the outer-most loop that has a unique predecessor. 288 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 289 // Check whether this loop even has a unique predecessor. 290 if (!CurLoop->getLoopPredecessor()) 291 return false; 292 // Ok, now check to see if any of its outer loops do. 293 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 294 if (L->getLoopPredecessor()) 295 return false; 296 // None of them did, so this is the outermost with a unique predecessor. 297 return true; 298 } 299 300 bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) { 301 if (skipFunction(MF.getFunction())) 302 return false; 303 304 Changed = FirstInLoop = false; 305 const TargetSubtargetInfo &ST = MF.getSubtarget(); 306 TII = ST.getInstrInfo(); 307 TLI = ST.getTargetLowering(); 308 TRI = ST.getRegisterInfo(); 309 MFI = &MF.getFrameInfo(); 310 MRI = &MF.getRegInfo(); 311 SchedModel.init(ST.getSchedModel(), &ST, TII); 312 313 PreRegAlloc = MRI->isSSA(); 314 315 if (PreRegAlloc) 316 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 317 else 318 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 319 DEBUG(dbgs() << MF.getName() << " ********\n"); 320 321 if (PreRegAlloc) { 322 // Estimate register pressure during pre-regalloc pass. 323 unsigned NumRPS = TRI->getNumRegPressureSets(); 324 RegPressure.resize(NumRPS); 325 std::fill(RegPressure.begin(), RegPressure.end(), 0); 326 RegLimit.resize(NumRPS); 327 for (unsigned i = 0, e = NumRPS; i != e; ++i) 328 RegLimit[i] = TRI->getRegPressureSetLimit(MF, i); 329 } 330 331 // Get our Loop information... 332 MLI = &getAnalysis<MachineLoopInfo>(); 333 DT = &getAnalysis<MachineDominatorTree>(); 334 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 335 336 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 337 while (!Worklist.empty()) { 338 CurLoop = Worklist.pop_back_val(); 339 CurPreheader = nullptr; 340 ExitBlocks.clear(); 341 342 // If this is done before regalloc, only visit outer-most preheader-sporting 343 // loops. 344 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 345 Worklist.append(CurLoop->begin(), CurLoop->end()); 346 continue; 347 } 348 349 CurLoop->getExitBlocks(ExitBlocks); 350 351 if (!PreRegAlloc) 352 HoistRegionPostRA(); 353 else { 354 // CSEMap is initialized for loop header when the first instruction is 355 // being hoisted. 356 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 357 FirstInLoop = true; 358 HoistOutOfLoop(N); 359 CSEMap.clear(); 360 361 if (SinkInstsToAvoidSpills) 362 SinkIntoLoop(); 363 } 364 } 365 366 return Changed; 367 } 368 369 /// Return true if instruction stores to the specified frame. 370 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 371 // If we lost memory operands, conservatively assume that the instruction 372 // writes to all slots. 373 if (MI->memoperands_empty()) 374 return true; 375 for (const MachineMemOperand *MemOp : MI->memoperands()) { 376 if (!MemOp->isStore() || !MemOp->getPseudoValue()) 377 continue; 378 if (const FixedStackPseudoSourceValue *Value = 379 dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) { 380 if (Value->getFrameIndex() == FI) 381 return true; 382 } 383 } 384 return false; 385 } 386 387 /// Examine the instruction for potentai LICM candidate. Also 388 /// gather register def and frame object update information. 389 void MachineLICMBase::ProcessMI(MachineInstr *MI, 390 BitVector &PhysRegDefs, 391 BitVector &PhysRegClobbers, 392 SmallSet<int, 32> &StoredFIs, 393 SmallVectorImpl<CandidateInfo> &Candidates) { 394 bool RuledOut = false; 395 bool HasNonInvariantUse = false; 396 unsigned Def = 0; 397 for (const MachineOperand &MO : MI->operands()) { 398 if (MO.isFI()) { 399 // Remember if the instruction stores to the frame index. 400 int FI = MO.getIndex(); 401 if (!StoredFIs.count(FI) && 402 MFI->isSpillSlotObjectIndex(FI) && 403 InstructionStoresToFI(MI, FI)) 404 StoredFIs.insert(FI); 405 HasNonInvariantUse = true; 406 continue; 407 } 408 409 // We can't hoist an instruction defining a physreg that is clobbered in 410 // the loop. 411 if (MO.isRegMask()) { 412 PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); 413 continue; 414 } 415 416 if (!MO.isReg()) 417 continue; 418 unsigned Reg = MO.getReg(); 419 if (!Reg) 420 continue; 421 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 422 "Not expecting virtual register!"); 423 424 if (!MO.isDef()) { 425 if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) 426 // If it's using a non-loop-invariant register, then it's obviously not 427 // safe to hoist. 428 HasNonInvariantUse = true; 429 continue; 430 } 431 432 if (MO.isImplicit()) { 433 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 434 PhysRegClobbers.set(*AI); 435 if (!MO.isDead()) 436 // Non-dead implicit def? This cannot be hoisted. 437 RuledOut = true; 438 // No need to check if a dead implicit def is also defined by 439 // another instruction. 440 continue; 441 } 442 443 // FIXME: For now, avoid instructions with multiple defs, unless 444 // it's a dead implicit def. 445 if (Def) 446 RuledOut = true; 447 else 448 Def = Reg; 449 450 // If we have already seen another instruction that defines the same 451 // register, then this is not safe. Two defs is indicated by setting a 452 // PhysRegClobbers bit. 453 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { 454 if (PhysRegDefs.test(*AS)) 455 PhysRegClobbers.set(*AS); 456 PhysRegDefs.set(*AS); 457 } 458 if (PhysRegClobbers.test(Reg)) 459 // MI defined register is seen defined by another instruction in 460 // the loop, it cannot be a LICM candidate. 461 RuledOut = true; 462 } 463 464 // Only consider reloads for now and remats which do not have register 465 // operands. FIXME: Consider unfold load folding instructions. 466 if (Def && !RuledOut) { 467 int FI = std::numeric_limits<int>::min(); 468 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 469 (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 470 Candidates.push_back(CandidateInfo(MI, Def, FI)); 471 } 472 } 473 474 /// Walk the specified region of the CFG and hoist loop invariants out to the 475 /// preheader. 476 void MachineLICMBase::HoistRegionPostRA() { 477 MachineBasicBlock *Preheader = getCurPreheader(); 478 if (!Preheader) 479 return; 480 481 unsigned NumRegs = TRI->getNumRegs(); 482 BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. 483 BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. 484 485 SmallVector<CandidateInfo, 32> Candidates; 486 SmallSet<int, 32> StoredFIs; 487 488 // Walk the entire region, count number of defs for each register, and 489 // collect potential LICM candidates. 490 const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 491 for (MachineBasicBlock *BB : Blocks) { 492 // If the header of the loop containing this basic block is a landing pad, 493 // then don't try to hoist instructions out of this loop. 494 const MachineLoop *ML = MLI->getLoopFor(BB); 495 if (ML && ML->getHeader()->isEHPad()) continue; 496 497 // Conservatively treat live-in's as an external def. 498 // FIXME: That means a reload that're reused in successor block(s) will not 499 // be LICM'ed. 500 for (const auto &LI : BB->liveins()) { 501 for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) 502 PhysRegDefs.set(*AI); 503 } 504 505 SpeculationState = SpeculateUnknown; 506 for (MachineInstr &MI : *BB) 507 ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); 508 } 509 510 // Gather the registers read / clobbered by the terminator. 511 BitVector TermRegs(NumRegs); 512 MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); 513 if (TI != Preheader->end()) { 514 for (const MachineOperand &MO : TI->operands()) { 515 if (!MO.isReg()) 516 continue; 517 unsigned Reg = MO.getReg(); 518 if (!Reg) 519 continue; 520 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 521 TermRegs.set(*AI); 522 } 523 } 524 525 // Now evaluate whether the potential candidates qualify. 526 // 1. Check if the candidate defined register is defined by another 527 // instruction in the loop. 528 // 2. If the candidate is a load from stack slot (always true for now), 529 // check if the slot is stored anywhere in the loop. 530 // 3. Make sure candidate def should not clobber 531 // registers read by the terminator. Similarly its def should not be 532 // clobbered by the terminator. 533 for (CandidateInfo &Candidate : Candidates) { 534 if (Candidate.FI != std::numeric_limits<int>::min() && 535 StoredFIs.count(Candidate.FI)) 536 continue; 537 538 unsigned Def = Candidate.Def; 539 if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { 540 bool Safe = true; 541 MachineInstr *MI = Candidate.MI; 542 for (const MachineOperand &MO : MI->operands()) { 543 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 544 continue; 545 unsigned Reg = MO.getReg(); 546 if (PhysRegDefs.test(Reg) || 547 PhysRegClobbers.test(Reg)) { 548 // If it's using a non-loop-invariant register, then it's obviously 549 // not safe to hoist. 550 Safe = false; 551 break; 552 } 553 } 554 if (Safe) 555 HoistPostRA(MI, Candidate.Def); 556 } 557 } 558 } 559 560 /// Add register 'Reg' to the livein sets of BBs in the current loop, and make 561 /// sure it is not killed by any instructions in the loop. 562 void MachineLICMBase::AddToLiveIns(unsigned Reg) { 563 const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 564 for (MachineBasicBlock *BB : Blocks) { 565 if (!BB->isLiveIn(Reg)) 566 BB->addLiveIn(Reg); 567 for (MachineInstr &MI : *BB) { 568 for (MachineOperand &MO : MI.operands()) { 569 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 570 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 571 MO.setIsKill(false); 572 } 573 } 574 } 575 } 576 577 /// When an instruction is found to only use loop invariant operands that is 578 /// safe to hoist, this instruction is called to do the dirty work. 579 void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) { 580 MachineBasicBlock *Preheader = getCurPreheader(); 581 582 // Now move the instructions to the predecessor, inserting it before any 583 // terminator instructions. 584 DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader) << " from " 585 << printMBBReference(*MI->getParent()) << ": " << *MI); 586 587 // Splice the instruction to the preheader. 588 MachineBasicBlock *MBB = MI->getParent(); 589 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 590 591 // Add register to livein list to all the BBs in the current loop since a 592 // loop invariant must be kept live throughout the whole loop. This is 593 // important to ensure later passes do not scavenge the def register. 594 AddToLiveIns(Def); 595 596 ++NumPostRAHoisted; 597 Changed = true; 598 } 599 600 /// Check if this mbb is guaranteed to execute. If not then a load from this mbb 601 /// may not be safe to hoist. 602 bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) { 603 if (SpeculationState != SpeculateUnknown) 604 return SpeculationState == SpeculateFalse; 605 606 if (BB != CurLoop->getHeader()) { 607 // Check loop exiting blocks. 608 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; 609 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); 610 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks) 611 if (!DT->dominates(BB, CurrentLoopExitingBlock)) { 612 SpeculationState = SpeculateTrue; 613 return false; 614 } 615 } 616 617 SpeculationState = SpeculateFalse; 618 return true; 619 } 620 621 void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) { 622 DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n'); 623 624 // Remember livein register pressure. 625 BackTrace.push_back(RegPressure); 626 } 627 628 void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) { 629 DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n'); 630 BackTrace.pop_back(); 631 } 632 633 /// Destroy scope for the MBB that corresponds to the given dominator tree node 634 /// if its a leaf or all of its children are done. Walk up the dominator tree to 635 /// destroy ancestors which are now done. 636 void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node, 637 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 638 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 639 if (OpenChildren[Node]) 640 return; 641 642 // Pop scope. 643 ExitScope(Node->getBlock()); 644 645 // Now traverse upwards to pop ancestors whose offsprings are all done. 646 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 647 unsigned Left = --OpenChildren[Parent]; 648 if (Left != 0) 649 break; 650 ExitScope(Parent->getBlock()); 651 Node = Parent; 652 } 653 } 654 655 /// Walk the specified loop in the CFG (defined by all blocks dominated by the 656 /// specified header block, and that are in the current loop) in depth first 657 /// order w.r.t the DominatorTree. This allows us to visit definitions before 658 /// uses, allowing us to hoist a loop body in one pass without iteration. 659 void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { 660 MachineBasicBlock *Preheader = getCurPreheader(); 661 if (!Preheader) 662 return; 663 664 SmallVector<MachineDomTreeNode*, 32> Scopes; 665 SmallVector<MachineDomTreeNode*, 8> WorkList; 666 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 667 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 668 669 // Perform a DFS walk to determine the order of visit. 670 WorkList.push_back(HeaderN); 671 while (!WorkList.empty()) { 672 MachineDomTreeNode *Node = WorkList.pop_back_val(); 673 assert(Node && "Null dominator tree node?"); 674 MachineBasicBlock *BB = Node->getBlock(); 675 676 // If the header of the loop containing this basic block is a landing pad, 677 // then don't try to hoist instructions out of this loop. 678 const MachineLoop *ML = MLI->getLoopFor(BB); 679 if (ML && ML->getHeader()->isEHPad()) 680 continue; 681 682 // If this subregion is not in the top level loop at all, exit. 683 if (!CurLoop->contains(BB)) 684 continue; 685 686 Scopes.push_back(Node); 687 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 688 unsigned NumChildren = Children.size(); 689 690 // Don't hoist things out of a large switch statement. This often causes 691 // code to be hoisted that wasn't going to be executed, and increases 692 // register pressure in a situation where it's likely to matter. 693 if (BB->succ_size() >= 25) 694 NumChildren = 0; 695 696 OpenChildren[Node] = NumChildren; 697 // Add children in reverse order as then the next popped worklist node is 698 // the first child of this node. This means we ultimately traverse the 699 // DOM tree in exactly the same order as if we'd recursed. 700 for (int i = (int)NumChildren-1; i >= 0; --i) { 701 MachineDomTreeNode *Child = Children[i]; 702 ParentMap[Child] = Node; 703 WorkList.push_back(Child); 704 } 705 } 706 707 if (Scopes.size() == 0) 708 return; 709 710 // Compute registers which are livein into the loop headers. 711 RegSeen.clear(); 712 BackTrace.clear(); 713 InitRegPressure(Preheader); 714 715 // Now perform LICM. 716 for (MachineDomTreeNode *Node : Scopes) { 717 MachineBasicBlock *MBB = Node->getBlock(); 718 719 EnterScope(MBB); 720 721 // Process the block 722 SpeculationState = SpeculateUnknown; 723 for (MachineBasicBlock::iterator 724 MII = MBB->begin(), E = MBB->end(); MII != E; ) { 725 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 726 MachineInstr *MI = &*MII; 727 if (!Hoist(MI, Preheader)) 728 UpdateRegPressure(MI); 729 MII = NextMII; 730 } 731 732 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 733 ExitScopeIfDone(Node, OpenChildren, ParentMap); 734 } 735 } 736 737 /// Sink instructions into loops if profitable. This especially tries to prevent 738 /// register spills caused by register pressure if there is little to no 739 /// overhead moving instructions into loops. 740 void MachineLICMBase::SinkIntoLoop() { 741 MachineBasicBlock *Preheader = getCurPreheader(); 742 if (!Preheader) 743 return; 744 745 SmallVector<MachineInstr *, 8> Candidates; 746 for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); 747 I != Preheader->instr_end(); ++I) { 748 // We need to ensure that we can safely move this instruction into the loop. 749 // As such, it must not have side-effects, e.g. such as a call has. 750 if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) 751 Candidates.push_back(&*I); 752 } 753 754 for (MachineInstr *I : Candidates) { 755 const MachineOperand &MO = I->getOperand(0); 756 if (!MO.isDef() || !MO.isReg() || !MO.getReg()) 757 continue; 758 if (!MRI->hasOneDef(MO.getReg())) 759 continue; 760 bool CanSink = true; 761 MachineBasicBlock *B = nullptr; 762 for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { 763 // FIXME: Come up with a proper cost model that estimates whether sinking 764 // the instruction (and thus possibly executing it on every loop 765 // iteration) is more expensive than a register. 766 // For now assumes that copies are cheap and thus almost always worth it. 767 if (!MI.isCopy()) { 768 CanSink = false; 769 break; 770 } 771 if (!B) { 772 B = MI.getParent(); 773 continue; 774 } 775 B = DT->findNearestCommonDominator(B, MI.getParent()); 776 if (!B) { 777 CanSink = false; 778 break; 779 } 780 } 781 if (!CanSink || !B || B == Preheader) 782 continue; 783 B->splice(B->getFirstNonPHI(), Preheader, I); 784 } 785 } 786 787 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { 788 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); 789 } 790 791 /// Find all virtual register references that are liveout of the preheader to 792 /// initialize the starting "register pressure". Note this does not count live 793 /// through (livein but not used) registers. 794 void MachineLICMBase::InitRegPressure(MachineBasicBlock *BB) { 795 std::fill(RegPressure.begin(), RegPressure.end(), 0); 796 797 // If the preheader has only a single predecessor and it ends with a 798 // fallthrough or an unconditional branch, then scan its predecessor for live 799 // defs as well. This happens whenever the preheader is created by splitting 800 // the critical edge from the loop predecessor to the loop header. 801 if (BB->pred_size() == 1) { 802 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 803 SmallVector<MachineOperand, 4> Cond; 804 if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) 805 InitRegPressure(*BB->pred_begin()); 806 } 807 808 for (const MachineInstr &MI : *BB) 809 UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true); 810 } 811 812 /// Update estimate of register pressure after the specified instruction. 813 void MachineLICMBase::UpdateRegPressure(const MachineInstr *MI, 814 bool ConsiderUnseenAsDef) { 815 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); 816 for (const auto &RPIdAndCost : Cost) { 817 unsigned Class = RPIdAndCost.first; 818 if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second) 819 RegPressure[Class] = 0; 820 else 821 RegPressure[Class] += RPIdAndCost.second; 822 } 823 } 824 825 /// Calculate the additional register pressure that the registers used in MI 826 /// cause. 827 /// 828 /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to 829 /// figure out which usages are live-ins. 830 /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. 831 DenseMap<unsigned, int> 832 MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, 833 bool ConsiderUnseenAsDef) { 834 DenseMap<unsigned, int> Cost; 835 if (MI->isImplicitDef()) 836 return Cost; 837 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 838 const MachineOperand &MO = MI->getOperand(i); 839 if (!MO.isReg() || MO.isImplicit()) 840 continue; 841 unsigned Reg = MO.getReg(); 842 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 843 continue; 844 845 // FIXME: It seems bad to use RegSeen only for some of these calculations. 846 bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false; 847 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 848 849 RegClassWeight W = TRI->getRegClassWeight(RC); 850 int RCCost = 0; 851 if (MO.isDef()) 852 RCCost = W.RegWeight; 853 else { 854 bool isKill = isOperandKill(MO, MRI); 855 if (isNew && !isKill && ConsiderUnseenAsDef) 856 // Haven't seen this, it must be a livein. 857 RCCost = W.RegWeight; 858 else if (!isNew && isKill) 859 RCCost = -W.RegWeight; 860 } 861 if (RCCost == 0) 862 continue; 863 const int *PS = TRI->getRegClassPressureSets(RC); 864 for (; *PS != -1; ++PS) { 865 if (Cost.find(*PS) == Cost.end()) 866 Cost[*PS] = RCCost; 867 else 868 Cost[*PS] += RCCost; 869 } 870 } 871 return Cost; 872 } 873 874 /// Return true if this machine instruction loads from global offset table or 875 /// constant pool. 876 static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { 877 assert(MI.mayLoad() && "Expected MI that loads!"); 878 879 // If we lost memory operands, conservatively assume that the instruction 880 // reads from everything.. 881 if (MI.memoperands_empty()) 882 return true; 883 884 for (MachineMemOperand *MemOp : MI.memoperands()) 885 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) 886 if (PSV->isGOT() || PSV->isConstantPool()) 887 return true; 888 889 return false; 890 } 891 892 /// Returns true if the instruction may be a suitable candidate for LICM. 893 /// e.g. If the instruction is a call, then it's obviously not safe to hoist it. 894 bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { 895 // Check if it's safe to move the instruction. 896 bool DontMoveAcrossStore = true; 897 if (!I.isSafeToMove(AA, DontMoveAcrossStore)) 898 return false; 899 900 // If it is load then check if it is guaranteed to execute by making sure that 901 // it dominates all exiting blocks. If it doesn't, then there is a path out of 902 // the loop which does not execute this load, so we can't hoist it. Loads 903 // from constant memory are not safe to speculate all the time, for example 904 // indexed load from a jump table. 905 // Stores and side effects are already checked by isSafeToMove. 906 if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) && 907 !IsGuaranteedToExecute(I.getParent())) 908 return false; 909 910 return true; 911 } 912 913 /// Returns true if the instruction is loop invariant. 914 /// I.e., all virtual register operands are defined outside of the loop, 915 /// physical registers aren't accessed explicitly, and there are no side 916 /// effects that aren't captured by the operands or other flags. 917 bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) { 918 if (!IsLICMCandidate(I)) 919 return false; 920 921 // The instruction is loop invariant if all of its operands are. 922 for (const MachineOperand &MO : I.operands()) { 923 if (!MO.isReg()) 924 continue; 925 926 unsigned Reg = MO.getReg(); 927 if (Reg == 0) continue; 928 929 // Don't hoist an instruction that uses or defines a physical register. 930 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 931 if (MO.isUse()) { 932 // If the physreg has no defs anywhere, it's just an ambient register 933 // and we can freely move its uses. Alternatively, if it's allocatable, 934 // it could get allocated to something with a def during allocation. 935 // However, if the physreg is known to always be caller saved/restored 936 // then this use is safe to hoist. 937 if (!MRI->isConstantPhysReg(Reg) && 938 !(TRI->isCallerPreservedPhysReg(Reg, *I.getMF()))) 939 return false; 940 // Otherwise it's safe to move. 941 continue; 942 } else if (!MO.isDead()) { 943 // A def that isn't dead. We can't move it. 944 return false; 945 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 946 // If the reg is live into the loop, we can't hoist an instruction 947 // which would clobber it. 948 return false; 949 } 950 } 951 952 if (!MO.isUse()) 953 continue; 954 955 assert(MRI->getVRegDef(Reg) && 956 "Machine instr not mapped for this vreg?!"); 957 958 // If the loop contains the definition of an operand, then the instruction 959 // isn't loop invariant. 960 if (CurLoop->contains(MRI->getVRegDef(Reg))) 961 return false; 962 } 963 964 // If we got this far, the instruction is loop invariant! 965 return true; 966 } 967 968 /// Return true if the specified instruction is used by a phi node and hoisting 969 /// it could cause a copy to be inserted. 970 bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const { 971 SmallVector<const MachineInstr*, 8> Work(1, MI); 972 do { 973 MI = Work.pop_back_val(); 974 for (const MachineOperand &MO : MI->operands()) { 975 if (!MO.isReg() || !MO.isDef()) 976 continue; 977 unsigned Reg = MO.getReg(); 978 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 979 continue; 980 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 981 // A PHI may cause a copy to be inserted. 982 if (UseMI.isPHI()) { 983 // A PHI inside the loop causes a copy because the live range of Reg is 984 // extended across the PHI. 985 if (CurLoop->contains(&UseMI)) 986 return true; 987 // A PHI in an exit block can cause a copy to be inserted if the PHI 988 // has multiple predecessors in the loop with different values. 989 // For now, approximate by rejecting all exit blocks. 990 if (isExitBlock(UseMI.getParent())) 991 return true; 992 continue; 993 } 994 // Look past copies as well. 995 if (UseMI.isCopy() && CurLoop->contains(&UseMI)) 996 Work.push_back(&UseMI); 997 } 998 } 999 } while (!Work.empty()); 1000 return false; 1001 } 1002 1003 /// Compute operand latency between a def of 'Reg' and an use in the current 1004 /// loop, return true if the target considered it high. 1005 bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, 1006 unsigned DefIdx, 1007 unsigned Reg) const { 1008 if (MRI->use_nodbg_empty(Reg)) 1009 return false; 1010 1011 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { 1012 if (UseMI.isCopyLike()) 1013 continue; 1014 if (!CurLoop->contains(UseMI.getParent())) 1015 continue; 1016 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) { 1017 const MachineOperand &MO = UseMI.getOperand(i); 1018 if (!MO.isReg() || !MO.isUse()) 1019 continue; 1020 unsigned MOReg = MO.getReg(); 1021 if (MOReg != Reg) 1022 continue; 1023 1024 if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i)) 1025 return true; 1026 } 1027 1028 // Only look at the first in loop use. 1029 break; 1030 } 1031 1032 return false; 1033 } 1034 1035 /// Return true if the instruction is marked "cheap" or the operand latency 1036 /// between its def and a use is one or less. 1037 bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const { 1038 if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike()) 1039 return true; 1040 1041 bool isCheap = false; 1042 unsigned NumDefs = MI.getDesc().getNumDefs(); 1043 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 1044 MachineOperand &DefMO = MI.getOperand(i); 1045 if (!DefMO.isReg() || !DefMO.isDef()) 1046 continue; 1047 --NumDefs; 1048 unsigned Reg = DefMO.getReg(); 1049 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1050 continue; 1051 1052 if (!TII->hasLowDefLatency(SchedModel, MI, i)) 1053 return false; 1054 isCheap = true; 1055 } 1056 1057 return isCheap; 1058 } 1059 1060 /// Visit BBs from header to current BB, check if hoisting an instruction of the 1061 /// given cost matrix can cause high register pressure. 1062 bool 1063 MachineLICMBase::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, 1064 bool CheapInstr) { 1065 for (const auto &RPIdAndCost : Cost) { 1066 if (RPIdAndCost.second <= 0) 1067 continue; 1068 1069 unsigned Class = RPIdAndCost.first; 1070 int Limit = RegLimit[Class]; 1071 1072 // Don't hoist cheap instructions if they would increase register pressure, 1073 // even if we're under the limit. 1074 if (CheapInstr && !HoistCheapInsts) 1075 return true; 1076 1077 for (const auto &RP : BackTrace) 1078 if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit) 1079 return true; 1080 } 1081 1082 return false; 1083 } 1084 1085 /// Traverse the back trace from header to the current block and update their 1086 /// register pressures to reflect the effect of hoisting MI from the current 1087 /// block to the preheader. 1088 void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) { 1089 // First compute the 'cost' of the instruction, i.e. its contribution 1090 // to register pressure. 1091 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false, 1092 /*ConsiderUnseenAsDef=*/false); 1093 1094 // Update register pressure of blocks from loop header to current block. 1095 for (auto &RP : BackTrace) 1096 for (const auto &RPIdAndCost : Cost) 1097 RP[RPIdAndCost.first] += RPIdAndCost.second; 1098 } 1099 1100 /// Return true if it is potentially profitable to hoist the given loop 1101 /// invariant. 1102 bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { 1103 if (MI.isImplicitDef()) 1104 return true; 1105 1106 // Besides removing computation from the loop, hoisting an instruction has 1107 // these effects: 1108 // 1109 // - The value defined by the instruction becomes live across the entire 1110 // loop. This increases register pressure in the loop. 1111 // 1112 // - If the value is used by a PHI in the loop, a copy will be required for 1113 // lowering the PHI after extending the live range. 1114 // 1115 // - When hoisting the last use of a value in the loop, that value no longer 1116 // needs to be live in the loop. This lowers register pressure in the loop. 1117 1118 bool CheapInstr = IsCheapInstruction(MI); 1119 bool CreatesCopy = HasLoopPHIUse(&MI); 1120 1121 // Don't hoist a cheap instruction if it would create a copy in the loop. 1122 if (CheapInstr && CreatesCopy) { 1123 DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); 1124 return false; 1125 } 1126 1127 // Rematerializable instructions should always be hoisted since the register 1128 // allocator can just pull them down again when needed. 1129 if (TII->isTriviallyReMaterializable(MI, AA)) 1130 return true; 1131 1132 // FIXME: If there are long latency loop-invariant instructions inside the 1133 // loop at this point, why didn't the optimizer's LICM hoist them? 1134 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 1135 const MachineOperand &MO = MI.getOperand(i); 1136 if (!MO.isReg() || MO.isImplicit()) 1137 continue; 1138 unsigned Reg = MO.getReg(); 1139 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1140 continue; 1141 if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { 1142 DEBUG(dbgs() << "Hoist High Latency: " << MI); 1143 ++NumHighLatency; 1144 return true; 1145 } 1146 } 1147 1148 // Estimate register pressure to determine whether to LICM the instruction. 1149 // In low register pressure situation, we can be more aggressive about 1150 // hoisting. Also, favors hoisting long latency instructions even in 1151 // moderately high pressure situation. 1152 // Cheap instructions will only be hoisted if they don't increase register 1153 // pressure at all. 1154 auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false, 1155 /*ConsiderUnseenAsDef=*/false); 1156 1157 // Visit BBs from header to current BB, if hoisting this doesn't cause 1158 // high register pressure, then it's safe to proceed. 1159 if (!CanCauseHighRegPressure(Cost, CheapInstr)) { 1160 DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); 1161 ++NumLowRP; 1162 return true; 1163 } 1164 1165 // Don't risk increasing register pressure if it would create copies. 1166 if (CreatesCopy) { 1167 DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); 1168 return false; 1169 } 1170 1171 // Do not "speculate" in high register pressure situation. If an 1172 // instruction is not guaranteed to be executed in the loop, it's best to be 1173 // conservative. 1174 if (AvoidSpeculation && 1175 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { 1176 DEBUG(dbgs() << "Won't speculate: " << MI); 1177 return false; 1178 } 1179 1180 // High register pressure situation, only hoist if the instruction is going 1181 // to be remat'ed. 1182 if (!TII->isTriviallyReMaterializable(MI, AA) && 1183 !MI.isDereferenceableInvariantLoad(AA)) { 1184 DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); 1185 return false; 1186 } 1187 1188 return true; 1189 } 1190 1191 /// Unfold a load from the given machineinstr if the load itself could be 1192 /// hoisted. Return the unfolded and hoistable load, or null if the load 1193 /// couldn't be unfolded or if it wouldn't be hoistable. 1194 MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) { 1195 // Don't unfold simple loads. 1196 if (MI->canFoldAsLoad()) 1197 return nullptr; 1198 1199 // If not, we may be able to unfold a load and hoist that. 1200 // First test whether the instruction is loading from an amenable 1201 // memory location. 1202 if (!MI->isDereferenceableInvariantLoad(AA)) 1203 return nullptr; 1204 1205 // Next determine the register class for a temporary register. 1206 unsigned LoadRegIndex; 1207 unsigned NewOpc = 1208 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 1209 /*UnfoldLoad=*/true, 1210 /*UnfoldStore=*/false, 1211 &LoadRegIndex); 1212 if (NewOpc == 0) return nullptr; 1213 const MCInstrDesc &MID = TII->get(NewOpc); 1214 MachineFunction &MF = *MI->getMF(); 1215 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); 1216 // Ok, we're unfolding. Create a temporary register and do the unfold. 1217 unsigned Reg = MRI->createVirtualRegister(RC); 1218 1219 SmallVector<MachineInstr *, 2> NewMIs; 1220 bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg, 1221 /*UnfoldLoad=*/true, 1222 /*UnfoldStore=*/false, NewMIs); 1223 (void)Success; 1224 assert(Success && 1225 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 1226 "succeeded!"); 1227 assert(NewMIs.size() == 2 && 1228 "Unfolded a load into multiple instructions!"); 1229 MachineBasicBlock *MBB = MI->getParent(); 1230 MachineBasicBlock::iterator Pos = MI; 1231 MBB->insert(Pos, NewMIs[0]); 1232 MBB->insert(Pos, NewMIs[1]); 1233 // If unfolding produced a load that wasn't loop-invariant or profitable to 1234 // hoist, discard the new instructions and bail. 1235 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 1236 NewMIs[0]->eraseFromParent(); 1237 NewMIs[1]->eraseFromParent(); 1238 return nullptr; 1239 } 1240 1241 // Update register pressure for the unfolded instruction. 1242 UpdateRegPressure(NewMIs[1]); 1243 1244 // Otherwise we successfully unfolded a load that we can hoist. 1245 MI->eraseFromParent(); 1246 return NewMIs[0]; 1247 } 1248 1249 /// Initialize the CSE map with instructions that are in the current loop 1250 /// preheader that may become duplicates of instructions that are hoisted 1251 /// out of the loop. 1252 void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) { 1253 for (MachineInstr &MI : *BB) 1254 CSEMap[MI.getOpcode()].push_back(&MI); 1255 } 1256 1257 /// Find an instruction amount PrevMIs that is a duplicate of MI. 1258 /// Return this instruction if it's found. 1259 const MachineInstr* 1260 MachineLICMBase::LookForDuplicate(const MachineInstr *MI, 1261 std::vector<const MachineInstr*> &PrevMIs) { 1262 for (const MachineInstr *PrevMI : PrevMIs) 1263 if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr))) 1264 return PrevMI; 1265 1266 return nullptr; 1267 } 1268 1269 /// Given a LICM'ed instruction, look for an instruction on the preheader that 1270 /// computes the same value. If it's found, do a RAU on with the definition of 1271 /// the existing instruction rather than hoisting the instruction to the 1272 /// preheader. 1273 bool MachineLICMBase::EliminateCSE(MachineInstr *MI, 1274 DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) { 1275 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1276 // the undef property onto uses. 1277 if (CI == CSEMap.end() || MI->isImplicitDef()) 1278 return false; 1279 1280 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 1281 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 1282 1283 // Replace virtual registers defined by MI by their counterparts defined 1284 // by Dup. 1285 SmallVector<unsigned, 2> Defs; 1286 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1287 const MachineOperand &MO = MI->getOperand(i); 1288 1289 // Physical registers may not differ here. 1290 assert((!MO.isReg() || MO.getReg() == 0 || 1291 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 1292 MO.getReg() == Dup->getOperand(i).getReg()) && 1293 "Instructions with different phys regs are not identical!"); 1294 1295 if (MO.isReg() && MO.isDef() && 1296 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 1297 Defs.push_back(i); 1298 } 1299 1300 SmallVector<const TargetRegisterClass*, 2> OrigRCs; 1301 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1302 unsigned Idx = Defs[i]; 1303 unsigned Reg = MI->getOperand(Idx).getReg(); 1304 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1305 OrigRCs.push_back(MRI->getRegClass(DupReg)); 1306 1307 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { 1308 // Restore old RCs if more than one defs. 1309 for (unsigned j = 0; j != i; ++j) 1310 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); 1311 return false; 1312 } 1313 } 1314 1315 for (unsigned Idx : Defs) { 1316 unsigned Reg = MI->getOperand(Idx).getReg(); 1317 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1318 MRI->replaceRegWith(Reg, DupReg); 1319 MRI->clearKillFlags(DupReg); 1320 } 1321 1322 MI->eraseFromParent(); 1323 ++NumCSEed; 1324 return true; 1325 } 1326 return false; 1327 } 1328 1329 /// Return true if the given instruction will be CSE'd if it's hoisted out of 1330 /// the loop. 1331 bool MachineLICMBase::MayCSE(MachineInstr *MI) { 1332 unsigned Opcode = MI->getOpcode(); 1333 DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator 1334 CI = CSEMap.find(Opcode); 1335 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1336 // the undef property onto uses. 1337 if (CI == CSEMap.end() || MI->isImplicitDef()) 1338 return false; 1339 1340 return LookForDuplicate(MI, CI->second) != nullptr; 1341 } 1342 1343 /// When an instruction is found to use only loop invariant operands 1344 /// that are safe to hoist, this instruction is called to do the dirty work. 1345 /// It returns true if the instruction is hoisted. 1346 bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 1347 // First check whether we should hoist this instruction. 1348 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 1349 // If not, try unfolding a hoistable load. 1350 MI = ExtractHoistableLoad(MI); 1351 if (!MI) return false; 1352 } 1353 1354 // Now move the instructions to the predecessor, inserting it before any 1355 // terminator instructions. 1356 DEBUG({ 1357 dbgs() << "Hoisting " << *MI; 1358 if (MI->getParent()->getBasicBlock()) 1359 dbgs() << " from " << printMBBReference(*MI->getParent()); 1360 if (Preheader->getBasicBlock()) 1361 dbgs() << " to " << printMBBReference(*Preheader); 1362 dbgs() << "\n"; 1363 }); 1364 1365 // If this is the first instruction being hoisted to the preheader, 1366 // initialize the CSE map with potential common expressions. 1367 if (FirstInLoop) { 1368 InitCSEMap(Preheader); 1369 FirstInLoop = false; 1370 } 1371 1372 // Look for opportunity to CSE the hoisted instruction. 1373 unsigned Opcode = MI->getOpcode(); 1374 DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator 1375 CI = CSEMap.find(Opcode); 1376 if (!EliminateCSE(MI, CI)) { 1377 // Otherwise, splice the instruction to the preheader. 1378 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 1379 1380 // Since we are moving the instruction out of its basic block, we do not 1381 // retain its debug location. Doing so would degrade the debugging 1382 // experience and adversely affect the accuracy of profiling information. 1383 MI->setDebugLoc(DebugLoc()); 1384 1385 // Update register pressure for BBs from header to this block. 1386 UpdateBackTraceRegPressure(MI); 1387 1388 // Clear the kill flags of any register this instruction defines, 1389 // since they may need to be live throughout the entire loop 1390 // rather than just live for part of it. 1391 for (MachineOperand &MO : MI->operands()) 1392 if (MO.isReg() && MO.isDef() && !MO.isDead()) 1393 MRI->clearKillFlags(MO.getReg()); 1394 1395 // Add to the CSE map. 1396 if (CI != CSEMap.end()) 1397 CI->second.push_back(MI); 1398 else 1399 CSEMap[Opcode].push_back(MI); 1400 } 1401 1402 ++NumHoisted; 1403 Changed = true; 1404 1405 return true; 1406 } 1407 1408 /// Get the preheader for the current loop, splitting a critical edge if needed. 1409 MachineBasicBlock *MachineLICMBase::getCurPreheader() { 1410 // Determine the block to which to hoist instructions. If we can't find a 1411 // suitable loop predecessor, we can't do any hoisting. 1412 1413 // If we've tried to get a preheader and failed, don't try again. 1414 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 1415 return nullptr; 1416 1417 if (!CurPreheader) { 1418 CurPreheader = CurLoop->getLoopPreheader(); 1419 if (!CurPreheader) { 1420 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 1421 if (!Pred) { 1422 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1423 return nullptr; 1424 } 1425 1426 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this); 1427 if (!CurPreheader) { 1428 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1429 return nullptr; 1430 } 1431 } 1432 } 1433 return CurPreheader; 1434 } 1435