1 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs loop invariant code motion on machine instructions. We 11 // attempt to remove as much code from the body of a loop as possible. 12 // 13 // This pass is not intended to be a replacement or a complete alternative 14 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple 15 // constructs that are not exposed before lowering and instruction selection. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/CodeGen/Passes.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/CodeGen/MachineDominators.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineLoopInfo.h" 27 #include "llvm/CodeGen/MachineMemOperand.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/PseudoSourceValue.h" 30 #include "llvm/CodeGen/TargetSchedule.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include "llvm/Target/TargetInstrInfo.h" 35 #include "llvm/Target/TargetLowering.h" 36 #include "llvm/Target/TargetMachine.h" 37 #include "llvm/Target/TargetRegisterInfo.h" 38 #include "llvm/Target/TargetSubtargetInfo.h" 39 using namespace llvm; 40 41 #define DEBUG_TYPE "machine-licm" 42 43 static cl::opt<bool> 44 AvoidSpeculation("avoid-speculation", 45 cl::desc("MachineLICM should avoid speculation"), 46 cl::init(true), cl::Hidden); 47 48 static cl::opt<bool> 49 HoistCheapInsts("hoist-cheap-insts", 50 cl::desc("MachineLICM should hoist even cheap instructions"), 51 cl::init(false), cl::Hidden); 52 53 static cl::opt<bool> 54 SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", 55 cl::desc("MachineLICM should sink instructions into " 56 "loops to avoid register spills"), 57 cl::init(false), cl::Hidden); 58 59 STATISTIC(NumHoisted, 60 "Number of machine instructions hoisted out of loops"); 61 STATISTIC(NumLowRP, 62 "Number of instructions hoisted in low reg pressure situation"); 63 STATISTIC(NumHighLatency, 64 "Number of high latency instructions hoisted"); 65 STATISTIC(NumCSEed, 66 "Number of hoisted machine instructions CSEed"); 67 STATISTIC(NumPostRAHoisted, 68 "Number of machine instructions hoisted out of loops post regalloc"); 69 70 namespace { 71 class MachineLICM : public MachineFunctionPass { 72 const TargetInstrInfo *TII; 73 const TargetLoweringBase *TLI; 74 const TargetRegisterInfo *TRI; 75 const MachineFrameInfo *MFI; 76 MachineRegisterInfo *MRI; 77 TargetSchedModel SchedModel; 78 bool PreRegAlloc; 79 80 // Various analyses that we use... 81 AliasAnalysis *AA; // Alias analysis info. 82 MachineLoopInfo *MLI; // Current MachineLoopInfo 83 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 84 85 // State that is updated as we process loops 86 bool Changed; // True if a loop is changed. 87 bool FirstInLoop; // True if it's the first LICM in the loop. 88 MachineLoop *CurLoop; // The current loop we are working on. 89 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 90 91 // Exit blocks for CurLoop. 92 SmallVector<MachineBasicBlock*, 8> ExitBlocks; 93 94 bool isExitBlock(const MachineBasicBlock *MBB) const { 95 return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != 96 ExitBlocks.end(); 97 } 98 99 // Track 'estimated' register pressure. 100 SmallSet<unsigned, 32> RegSeen; 101 SmallVector<unsigned, 8> RegPressure; 102 103 // Register pressure "limit" per register pressure set. If the pressure 104 // is higher than the limit, then it's considered high. 105 SmallVector<unsigned, 8> RegLimit; 106 107 // Register pressure on path leading from loop preheader to current BB. 108 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 109 110 // For each opcode, keep a list of potential CSE instructions. 111 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 112 113 enum { 114 SpeculateFalse = 0, 115 SpeculateTrue = 1, 116 SpeculateUnknown = 2 117 }; 118 119 // If a MBB does not dominate loop exiting blocks then it may not safe 120 // to hoist loads from this block. 121 // Tri-state: 0 - false, 1 - true, 2 - unknown 122 unsigned SpeculationState; 123 124 public: 125 static char ID; // Pass identification, replacement for typeid 126 MachineLICM() : 127 MachineFunctionPass(ID), PreRegAlloc(true) { 128 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 129 } 130 131 explicit MachineLICM(bool PreRA) : 132 MachineFunctionPass(ID), PreRegAlloc(PreRA) { 133 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 134 } 135 136 bool runOnMachineFunction(MachineFunction &MF) override; 137 138 void getAnalysisUsage(AnalysisUsage &AU) const override { 139 AU.addRequired<MachineLoopInfo>(); 140 AU.addRequired<MachineDominatorTree>(); 141 AU.addRequired<AAResultsWrapperPass>(); 142 AU.addPreserved<MachineLoopInfo>(); 143 AU.addPreserved<MachineDominatorTree>(); 144 MachineFunctionPass::getAnalysisUsage(AU); 145 } 146 147 void releaseMemory() override { 148 RegSeen.clear(); 149 RegPressure.clear(); 150 RegLimit.clear(); 151 BackTrace.clear(); 152 CSEMap.clear(); 153 } 154 155 private: 156 /// Keep track of information about hoisting candidates. 157 struct CandidateInfo { 158 MachineInstr *MI; 159 unsigned Def; 160 int FI; 161 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 162 : MI(mi), Def(def), FI(fi) {} 163 }; 164 165 void HoistRegionPostRA(); 166 167 void HoistPostRA(MachineInstr *MI, unsigned Def); 168 169 void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, 170 BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, 171 SmallVectorImpl<CandidateInfo> &Candidates); 172 173 void AddToLiveIns(unsigned Reg); 174 175 bool IsLICMCandidate(MachineInstr &I); 176 177 bool IsLoopInvariantInst(MachineInstr &I); 178 179 bool HasLoopPHIUse(const MachineInstr *MI) const; 180 181 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, 182 unsigned Reg) const; 183 184 bool IsCheapInstruction(MachineInstr &MI) const; 185 186 bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost, 187 bool Cheap); 188 189 void UpdateBackTraceRegPressure(const MachineInstr *MI); 190 191 bool IsProfitableToHoist(MachineInstr &MI); 192 193 bool IsGuaranteedToExecute(MachineBasicBlock *BB); 194 195 void EnterScope(MachineBasicBlock *MBB); 196 197 void ExitScope(MachineBasicBlock *MBB); 198 199 void ExitScopeIfDone( 200 MachineDomTreeNode *Node, 201 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren, 202 DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap); 203 204 void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); 205 206 void HoistRegion(MachineDomTreeNode *N, bool IsHeader); 207 208 void SinkIntoLoop(); 209 210 void InitRegPressure(MachineBasicBlock *BB); 211 212 DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI, 213 bool ConsiderSeen, 214 bool ConsiderUnseenAsDef); 215 216 void UpdateRegPressure(const MachineInstr *MI, 217 bool ConsiderUnseenAsDef = false); 218 219 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 220 221 const MachineInstr * 222 LookForDuplicate(const MachineInstr *MI, 223 std::vector<const MachineInstr *> &PrevMIs); 224 225 bool EliminateCSE( 226 MachineInstr *MI, 227 DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI); 228 229 bool MayCSE(MachineInstr *MI); 230 231 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 232 233 void InitCSEMap(MachineBasicBlock *BB); 234 235 MachineBasicBlock *getCurPreheader(); 236 }; 237 } // end anonymous namespace 238 239 char MachineLICM::ID = 0; 240 char &llvm::MachineLICMID = MachineLICM::ID; 241 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", 242 "Machine Loop Invariant Code Motion", false, false) 243 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 244 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 245 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 246 INITIALIZE_PASS_END(MachineLICM, "machinelicm", 247 "Machine Loop Invariant Code Motion", false, false) 248 249 /// Test if the given loop is the outer-most loop that has a unique predecessor. 250 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 251 // Check whether this loop even has a unique predecessor. 252 if (!CurLoop->getLoopPredecessor()) 253 return false; 254 // Ok, now check to see if any of its outer loops do. 255 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 256 if (L->getLoopPredecessor()) 257 return false; 258 // None of them did, so this is the outermost with a unique predecessor. 259 return true; 260 } 261 262 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 263 if (skipOptnoneFunction(*MF.getFunction())) 264 return false; 265 266 Changed = FirstInLoop = false; 267 const TargetSubtargetInfo &ST = MF.getSubtarget(); 268 TII = ST.getInstrInfo(); 269 TLI = ST.getTargetLowering(); 270 TRI = ST.getRegisterInfo(); 271 MFI = MF.getFrameInfo(); 272 MRI = &MF.getRegInfo(); 273 SchedModel.init(ST.getSchedModel(), &ST, TII); 274 275 PreRegAlloc = MRI->isSSA(); 276 277 if (PreRegAlloc) 278 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 279 else 280 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 281 DEBUG(dbgs() << MF.getName() << " ********\n"); 282 283 if (PreRegAlloc) { 284 // Estimate register pressure during pre-regalloc pass. 285 unsigned NumRPS = TRI->getNumRegPressureSets(); 286 RegPressure.resize(NumRPS); 287 std::fill(RegPressure.begin(), RegPressure.end(), 0); 288 RegLimit.resize(NumRPS); 289 for (unsigned i = 0, e = NumRPS; i != e; ++i) 290 RegLimit[i] = TRI->getRegPressureSetLimit(MF, i); 291 } 292 293 // Get our Loop information... 294 MLI = &getAnalysis<MachineLoopInfo>(); 295 DT = &getAnalysis<MachineDominatorTree>(); 296 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 297 298 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 299 while (!Worklist.empty()) { 300 CurLoop = Worklist.pop_back_val(); 301 CurPreheader = nullptr; 302 ExitBlocks.clear(); 303 304 // If this is done before regalloc, only visit outer-most preheader-sporting 305 // loops. 306 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 307 Worklist.append(CurLoop->begin(), CurLoop->end()); 308 continue; 309 } 310 311 CurLoop->getExitBlocks(ExitBlocks); 312 313 if (!PreRegAlloc) 314 HoistRegionPostRA(); 315 else { 316 // CSEMap is initialized for loop header when the first instruction is 317 // being hoisted. 318 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 319 FirstInLoop = true; 320 HoistOutOfLoop(N); 321 CSEMap.clear(); 322 323 if (SinkInstsToAvoidSpills) 324 SinkIntoLoop(); 325 } 326 } 327 328 return Changed; 329 } 330 331 /// Return true if instruction stores to the specified frame. 332 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 333 // If we lost memory operands, conservatively assume that the instruction 334 // writes to all slots. 335 if (MI->memoperands_empty()) 336 return true; 337 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 338 oe = MI->memoperands_end(); o != oe; ++o) { 339 if (!(*o)->isStore() || !(*o)->getPseudoValue()) 340 continue; 341 if (const FixedStackPseudoSourceValue *Value = 342 dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) { 343 if (Value->getFrameIndex() == FI) 344 return true; 345 } 346 } 347 return false; 348 } 349 350 /// Examine the instruction for potentai LICM candidate. Also 351 /// gather register def and frame object update information. 352 void MachineLICM::ProcessMI(MachineInstr *MI, 353 BitVector &PhysRegDefs, 354 BitVector &PhysRegClobbers, 355 SmallSet<int, 32> &StoredFIs, 356 SmallVectorImpl<CandidateInfo> &Candidates) { 357 bool RuledOut = false; 358 bool HasNonInvariantUse = false; 359 unsigned Def = 0; 360 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 361 const MachineOperand &MO = MI->getOperand(i); 362 if (MO.isFI()) { 363 // Remember if the instruction stores to the frame index. 364 int FI = MO.getIndex(); 365 if (!StoredFIs.count(FI) && 366 MFI->isSpillSlotObjectIndex(FI) && 367 InstructionStoresToFI(MI, FI)) 368 StoredFIs.insert(FI); 369 HasNonInvariantUse = true; 370 continue; 371 } 372 373 // We can't hoist an instruction defining a physreg that is clobbered in 374 // the loop. 375 if (MO.isRegMask()) { 376 PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); 377 continue; 378 } 379 380 if (!MO.isReg()) 381 continue; 382 unsigned Reg = MO.getReg(); 383 if (!Reg) 384 continue; 385 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 386 "Not expecting virtual register!"); 387 388 if (!MO.isDef()) { 389 if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) 390 // If it's using a non-loop-invariant register, then it's obviously not 391 // safe to hoist. 392 HasNonInvariantUse = true; 393 continue; 394 } 395 396 if (MO.isImplicit()) { 397 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 398 PhysRegClobbers.set(*AI); 399 if (!MO.isDead()) 400 // Non-dead implicit def? This cannot be hoisted. 401 RuledOut = true; 402 // No need to check if a dead implicit def is also defined by 403 // another instruction. 404 continue; 405 } 406 407 // FIXME: For now, avoid instructions with multiple defs, unless 408 // it's a dead implicit def. 409 if (Def) 410 RuledOut = true; 411 else 412 Def = Reg; 413 414 // If we have already seen another instruction that defines the same 415 // register, then this is not safe. Two defs is indicated by setting a 416 // PhysRegClobbers bit. 417 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { 418 if (PhysRegDefs.test(*AS)) 419 PhysRegClobbers.set(*AS); 420 PhysRegDefs.set(*AS); 421 } 422 if (PhysRegClobbers.test(Reg)) 423 // MI defined register is seen defined by another instruction in 424 // the loop, it cannot be a LICM candidate. 425 RuledOut = true; 426 } 427 428 // Only consider reloads for now and remats which do not have register 429 // operands. FIXME: Consider unfold load folding instructions. 430 if (Def && !RuledOut) { 431 int FI = INT_MIN; 432 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 433 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 434 Candidates.push_back(CandidateInfo(MI, Def, FI)); 435 } 436 } 437 438 /// Walk the specified region of the CFG and hoist loop invariants out to the 439 /// preheader. 440 void MachineLICM::HoistRegionPostRA() { 441 MachineBasicBlock *Preheader = getCurPreheader(); 442 if (!Preheader) 443 return; 444 445 unsigned NumRegs = TRI->getNumRegs(); 446 BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. 447 BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. 448 449 SmallVector<CandidateInfo, 32> Candidates; 450 SmallSet<int, 32> StoredFIs; 451 452 // Walk the entire region, count number of defs for each register, and 453 // collect potential LICM candidates. 454 const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 455 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 456 MachineBasicBlock *BB = Blocks[i]; 457 458 // If the header of the loop containing this basic block is a landing pad, 459 // then don't try to hoist instructions out of this loop. 460 const MachineLoop *ML = MLI->getLoopFor(BB); 461 if (ML && ML->getHeader()->isEHPad()) continue; 462 463 // Conservatively treat live-in's as an external def. 464 // FIXME: That means a reload that're reused in successor block(s) will not 465 // be LICM'ed. 466 for (const auto &LI : BB->liveins()) { 467 for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) 468 PhysRegDefs.set(*AI); 469 } 470 471 SpeculationState = SpeculateUnknown; 472 for (MachineBasicBlock::iterator 473 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 474 MachineInstr *MI = &*MII; 475 ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); 476 } 477 } 478 479 // Gather the registers read / clobbered by the terminator. 480 BitVector TermRegs(NumRegs); 481 MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); 482 if (TI != Preheader->end()) { 483 for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { 484 const MachineOperand &MO = TI->getOperand(i); 485 if (!MO.isReg()) 486 continue; 487 unsigned Reg = MO.getReg(); 488 if (!Reg) 489 continue; 490 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 491 TermRegs.set(*AI); 492 } 493 } 494 495 // Now evaluate whether the potential candidates qualify. 496 // 1. Check if the candidate defined register is defined by another 497 // instruction in the loop. 498 // 2. If the candidate is a load from stack slot (always true for now), 499 // check if the slot is stored anywhere in the loop. 500 // 3. Make sure candidate def should not clobber 501 // registers read by the terminator. Similarly its def should not be 502 // clobbered by the terminator. 503 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 504 if (Candidates[i].FI != INT_MIN && 505 StoredFIs.count(Candidates[i].FI)) 506 continue; 507 508 unsigned Def = Candidates[i].Def; 509 if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { 510 bool Safe = true; 511 MachineInstr *MI = Candidates[i].MI; 512 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 513 const MachineOperand &MO = MI->getOperand(j); 514 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 515 continue; 516 unsigned Reg = MO.getReg(); 517 if (PhysRegDefs.test(Reg) || 518 PhysRegClobbers.test(Reg)) { 519 // If it's using a non-loop-invariant register, then it's obviously 520 // not safe to hoist. 521 Safe = false; 522 break; 523 } 524 } 525 if (Safe) 526 HoistPostRA(MI, Candidates[i].Def); 527 } 528 } 529 } 530 531 /// Add register 'Reg' to the livein sets of BBs in the current loop, and make 532 /// sure it is not killed by any instructions in the loop. 533 void MachineLICM::AddToLiveIns(unsigned Reg) { 534 const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 535 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 536 MachineBasicBlock *BB = Blocks[i]; 537 if (!BB->isLiveIn(Reg)) 538 BB->addLiveIn(Reg); 539 for (MachineBasicBlock::iterator 540 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 541 MachineInstr *MI = &*MII; 542 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 543 MachineOperand &MO = MI->getOperand(i); 544 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 545 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 546 MO.setIsKill(false); 547 } 548 } 549 } 550 } 551 552 /// When an instruction is found to only use loop invariant operands that is 553 /// safe to hoist, this instruction is called to do the dirty work. 554 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 555 MachineBasicBlock *Preheader = getCurPreheader(); 556 557 // Now move the instructions to the predecessor, inserting it before any 558 // terminator instructions. 559 DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" 560 << MI->getParent()->getNumber() << ": " << *MI); 561 562 // Splice the instruction to the preheader. 563 MachineBasicBlock *MBB = MI->getParent(); 564 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 565 566 // Add register to livein list to all the BBs in the current loop since a 567 // loop invariant must be kept live throughout the whole loop. This is 568 // important to ensure later passes do not scavenge the def register. 569 AddToLiveIns(Def); 570 571 ++NumPostRAHoisted; 572 Changed = true; 573 } 574 575 /// Check if this mbb is guaranteed to execute. If not then a load from this mbb 576 /// may not be safe to hoist. 577 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { 578 if (SpeculationState != SpeculateUnknown) 579 return SpeculationState == SpeculateFalse; 580 581 if (BB != CurLoop->getHeader()) { 582 // Check loop exiting blocks. 583 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; 584 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); 585 for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) 586 if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { 587 SpeculationState = SpeculateTrue; 588 return false; 589 } 590 } 591 592 SpeculationState = SpeculateFalse; 593 return true; 594 } 595 596 void MachineLICM::EnterScope(MachineBasicBlock *MBB) { 597 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 598 599 // Remember livein register pressure. 600 BackTrace.push_back(RegPressure); 601 } 602 603 void MachineLICM::ExitScope(MachineBasicBlock *MBB) { 604 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 605 BackTrace.pop_back(); 606 } 607 608 /// Destroy scope for the MBB that corresponds to the given dominator tree node 609 /// if its a leaf or all of its children are done. Walk up the dominator tree to 610 /// destroy ancestors which are now done. 611 void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, 612 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 613 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 614 if (OpenChildren[Node]) 615 return; 616 617 // Pop scope. 618 ExitScope(Node->getBlock()); 619 620 // Now traverse upwards to pop ancestors whose offsprings are all done. 621 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 622 unsigned Left = --OpenChildren[Parent]; 623 if (Left != 0) 624 break; 625 ExitScope(Parent->getBlock()); 626 Node = Parent; 627 } 628 } 629 630 /// Walk the specified loop in the CFG (defined by all blocks dominated by the 631 /// specified header block, and that are in the current loop) in depth first 632 /// order w.r.t the DominatorTree. This allows us to visit definitions before 633 /// uses, allowing us to hoist a loop body in one pass without iteration. 634 /// 635 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { 636 MachineBasicBlock *Preheader = getCurPreheader(); 637 if (!Preheader) 638 return; 639 640 SmallVector<MachineDomTreeNode*, 32> Scopes; 641 SmallVector<MachineDomTreeNode*, 8> WorkList; 642 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 643 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 644 645 // Perform a DFS walk to determine the order of visit. 646 WorkList.push_back(HeaderN); 647 while (!WorkList.empty()) { 648 MachineDomTreeNode *Node = WorkList.pop_back_val(); 649 assert(Node && "Null dominator tree node?"); 650 MachineBasicBlock *BB = Node->getBlock(); 651 652 // If the header of the loop containing this basic block is a landing pad, 653 // then don't try to hoist instructions out of this loop. 654 const MachineLoop *ML = MLI->getLoopFor(BB); 655 if (ML && ML->getHeader()->isEHPad()) 656 continue; 657 658 // If this subregion is not in the top level loop at all, exit. 659 if (!CurLoop->contains(BB)) 660 continue; 661 662 Scopes.push_back(Node); 663 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 664 unsigned NumChildren = Children.size(); 665 666 // Don't hoist things out of a large switch statement. This often causes 667 // code to be hoisted that wasn't going to be executed, and increases 668 // register pressure in a situation where it's likely to matter. 669 if (BB->succ_size() >= 25) 670 NumChildren = 0; 671 672 OpenChildren[Node] = NumChildren; 673 // Add children in reverse order as then the next popped worklist node is 674 // the first child of this node. This means we ultimately traverse the 675 // DOM tree in exactly the same order as if we'd recursed. 676 for (int i = (int)NumChildren-1; i >= 0; --i) { 677 MachineDomTreeNode *Child = Children[i]; 678 ParentMap[Child] = Node; 679 WorkList.push_back(Child); 680 } 681 } 682 683 if (Scopes.size() == 0) 684 return; 685 686 // Compute registers which are livein into the loop headers. 687 RegSeen.clear(); 688 BackTrace.clear(); 689 InitRegPressure(Preheader); 690 691 // Now perform LICM. 692 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 693 MachineDomTreeNode *Node = Scopes[i]; 694 MachineBasicBlock *MBB = Node->getBlock(); 695 696 EnterScope(MBB); 697 698 // Process the block 699 SpeculationState = SpeculateUnknown; 700 for (MachineBasicBlock::iterator 701 MII = MBB->begin(), E = MBB->end(); MII != E; ) { 702 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 703 MachineInstr *MI = &*MII; 704 if (!Hoist(MI, Preheader)) 705 UpdateRegPressure(MI); 706 MII = NextMII; 707 } 708 709 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 710 ExitScopeIfDone(Node, OpenChildren, ParentMap); 711 } 712 } 713 714 /// Sink instructions into loops if profitable. This especially tries to prevent 715 /// register spills caused by register pressure if there is little to no 716 /// overhead moving instructions into loops. 717 void MachineLICM::SinkIntoLoop() { 718 MachineBasicBlock *Preheader = getCurPreheader(); 719 if (!Preheader) 720 return; 721 722 SmallVector<MachineInstr *, 8> Candidates; 723 for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); 724 I != Preheader->instr_end(); ++I) { 725 // We need to ensure that we can safely move this instruction into the loop. 726 // As such, it must not have side-effects, e.g. such as a call has. 727 if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) 728 Candidates.push_back(&*I); 729 } 730 731 for (MachineInstr *I : Candidates) { 732 const MachineOperand &MO = I->getOperand(0); 733 if (!MO.isDef() || !MO.isReg() || !MO.getReg()) 734 continue; 735 if (!MRI->hasOneDef(MO.getReg())) 736 continue; 737 bool CanSink = true; 738 MachineBasicBlock *B = nullptr; 739 for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { 740 // FIXME: Come up with a proper cost model that estimates whether sinking 741 // the instruction (and thus possibly executing it on every loop 742 // iteration) is more expensive than a register. 743 // For now assumes that copies are cheap and thus almost always worth it. 744 if (!MI.isCopy()) { 745 CanSink = false; 746 break; 747 } 748 if (!B) { 749 B = MI.getParent(); 750 continue; 751 } 752 B = DT->findNearestCommonDominator(B, MI.getParent()); 753 if (!B) { 754 CanSink = false; 755 break; 756 } 757 } 758 if (!CanSink || !B || B == Preheader) 759 continue; 760 B->splice(B->getFirstNonPHI(), Preheader, I); 761 } 762 } 763 764 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { 765 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); 766 } 767 768 /// Find all virtual register references that are liveout of the preheader to 769 /// initialize the starting "register pressure". Note this does not count live 770 /// through (livein but not used) registers. 771 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { 772 std::fill(RegPressure.begin(), RegPressure.end(), 0); 773 774 // If the preheader has only a single predecessor and it ends with a 775 // fallthrough or an unconditional branch, then scan its predecessor for live 776 // defs as well. This happens whenever the preheader is created by splitting 777 // the critical edge from the loop predecessor to the loop header. 778 if (BB->pred_size() == 1) { 779 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 780 SmallVector<MachineOperand, 4> Cond; 781 if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) 782 InitRegPressure(*BB->pred_begin()); 783 } 784 785 for (const MachineInstr &MI : *BB) 786 UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true); 787 } 788 789 /// Update estimate of register pressure after the specified instruction. 790 void MachineLICM::UpdateRegPressure(const MachineInstr *MI, 791 bool ConsiderUnseenAsDef) { 792 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); 793 for (const auto &RPIdAndCost : Cost) { 794 unsigned Class = RPIdAndCost.first; 795 if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second) 796 RegPressure[Class] = 0; 797 else 798 RegPressure[Class] += RPIdAndCost.second; 799 } 800 } 801 802 /// Calculate the additional register pressure that the registers used in MI 803 /// cause. 804 /// 805 /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to 806 /// figure out which usages are live-ins. 807 /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. 808 DenseMap<unsigned, int> 809 MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, 810 bool ConsiderUnseenAsDef) { 811 DenseMap<unsigned, int> Cost; 812 if (MI->isImplicitDef()) 813 return Cost; 814 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 815 const MachineOperand &MO = MI->getOperand(i); 816 if (!MO.isReg() || MO.isImplicit()) 817 continue; 818 unsigned Reg = MO.getReg(); 819 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 820 continue; 821 822 // FIXME: It seems bad to use RegSeen only for some of these calculations. 823 bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false; 824 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 825 826 RegClassWeight W = TRI->getRegClassWeight(RC); 827 int RCCost = 0; 828 if (MO.isDef()) 829 RCCost = W.RegWeight; 830 else { 831 bool isKill = isOperandKill(MO, MRI); 832 if (isNew && !isKill && ConsiderUnseenAsDef) 833 // Haven't seen this, it must be a livein. 834 RCCost = W.RegWeight; 835 else if (!isNew && isKill) 836 RCCost = -W.RegWeight; 837 } 838 if (RCCost == 0) 839 continue; 840 const int *PS = TRI->getRegClassPressureSets(RC); 841 for (; *PS != -1; ++PS) { 842 if (Cost.find(*PS) == Cost.end()) 843 Cost[*PS] = RCCost; 844 else 845 Cost[*PS] += RCCost; 846 } 847 } 848 return Cost; 849 } 850 851 /// Return true if this machine instruction loads from global offset table or 852 /// constant pool. 853 static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { 854 assert (MI.mayLoad() && "Expected MI that loads!"); 855 856 // If we lost memory operands, conservatively assume that the instruction 857 // reads from everything.. 858 if (MI.memoperands_empty()) 859 return true; 860 861 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), 862 E = MI.memoperands_end(); I != E; ++I) { 863 if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) { 864 if (PSV->isGOT() || PSV->isConstantPool()) 865 return true; 866 } 867 } 868 return false; 869 } 870 871 /// Returns true if the instruction may be a suitable candidate for LICM. 872 /// e.g. If the instruction is a call, then it's obviously not safe to hoist it. 873 bool MachineLICM::IsLICMCandidate(MachineInstr &I) { 874 // Check if it's safe to move the instruction. 875 bool DontMoveAcrossStore = true; 876 if (!I.isSafeToMove(AA, DontMoveAcrossStore)) 877 return false; 878 879 // If it is load then check if it is guaranteed to execute by making sure that 880 // it dominates all exiting blocks. If it doesn't, then there is a path out of 881 // the loop which does not execute this load, so we can't hoist it. Loads 882 // from constant memory are not safe to speculate all the time, for example 883 // indexed load from a jump table. 884 // Stores and side effects are already checked by isSafeToMove. 885 if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) && 886 !IsGuaranteedToExecute(I.getParent())) 887 return false; 888 889 return true; 890 } 891 892 /// Returns true if the instruction is loop invariant. 893 /// I.e., all virtual register operands are defined outside of the loop, 894 /// physical registers aren't accessed explicitly, and there are no side 895 /// effects that aren't captured by the operands or other flags. 896 /// 897 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 898 if (!IsLICMCandidate(I)) 899 return false; 900 901 // The instruction is loop invariant if all of its operands are. 902 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 903 const MachineOperand &MO = I.getOperand(i); 904 905 if (!MO.isReg()) 906 continue; 907 908 unsigned Reg = MO.getReg(); 909 if (Reg == 0) continue; 910 911 // Don't hoist an instruction that uses or defines a physical register. 912 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 913 if (MO.isUse()) { 914 // If the physreg has no defs anywhere, it's just an ambient register 915 // and we can freely move its uses. Alternatively, if it's allocatable, 916 // it could get allocated to something with a def during allocation. 917 if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) 918 return false; 919 // Otherwise it's safe to move. 920 continue; 921 } else if (!MO.isDead()) { 922 // A def that isn't dead. We can't move it. 923 return false; 924 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 925 // If the reg is live into the loop, we can't hoist an instruction 926 // which would clobber it. 927 return false; 928 } 929 } 930 931 if (!MO.isUse()) 932 continue; 933 934 assert(MRI->getVRegDef(Reg) && 935 "Machine instr not mapped for this vreg?!"); 936 937 // If the loop contains the definition of an operand, then the instruction 938 // isn't loop invariant. 939 if (CurLoop->contains(MRI->getVRegDef(Reg))) 940 return false; 941 } 942 943 // If we got this far, the instruction is loop invariant! 944 return true; 945 } 946 947 948 /// Return true if the specified instruction is used by a phi node and hoisting 949 /// it could cause a copy to be inserted. 950 bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { 951 SmallVector<const MachineInstr*, 8> Work(1, MI); 952 do { 953 MI = Work.pop_back_val(); 954 for (const MachineOperand &MO : MI->operands()) { 955 if (!MO.isReg() || !MO.isDef()) 956 continue; 957 unsigned Reg = MO.getReg(); 958 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 959 continue; 960 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 961 // A PHI may cause a copy to be inserted. 962 if (UseMI.isPHI()) { 963 // A PHI inside the loop causes a copy because the live range of Reg is 964 // extended across the PHI. 965 if (CurLoop->contains(&UseMI)) 966 return true; 967 // A PHI in an exit block can cause a copy to be inserted if the PHI 968 // has multiple predecessors in the loop with different values. 969 // For now, approximate by rejecting all exit blocks. 970 if (isExitBlock(UseMI.getParent())) 971 return true; 972 continue; 973 } 974 // Look past copies as well. 975 if (UseMI.isCopy() && CurLoop->contains(&UseMI)) 976 Work.push_back(&UseMI); 977 } 978 } 979 } while (!Work.empty()); 980 return false; 981 } 982 983 /// Compute operand latency between a def of 'Reg' and an use in the current 984 /// loop, return true if the target considered it high. 985 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, 986 unsigned DefIdx, unsigned Reg) const { 987 if (MRI->use_nodbg_empty(Reg)) 988 return false; 989 990 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { 991 if (UseMI.isCopyLike()) 992 continue; 993 if (!CurLoop->contains(UseMI.getParent())) 994 continue; 995 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) { 996 const MachineOperand &MO = UseMI.getOperand(i); 997 if (!MO.isReg() || !MO.isUse()) 998 continue; 999 unsigned MOReg = MO.getReg(); 1000 if (MOReg != Reg) 1001 continue; 1002 1003 if (TII->hasHighOperandLatency(SchedModel, MRI, &MI, DefIdx, &UseMI, i)) 1004 return true; 1005 } 1006 1007 // Only look at the first in loop use. 1008 break; 1009 } 1010 1011 return false; 1012 } 1013 1014 /// Return true if the instruction is marked "cheap" or the operand latency 1015 /// between its def and a use is one or less. 1016 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { 1017 if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike()) 1018 return true; 1019 1020 bool isCheap = false; 1021 unsigned NumDefs = MI.getDesc().getNumDefs(); 1022 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 1023 MachineOperand &DefMO = MI.getOperand(i); 1024 if (!DefMO.isReg() || !DefMO.isDef()) 1025 continue; 1026 --NumDefs; 1027 unsigned Reg = DefMO.getReg(); 1028 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1029 continue; 1030 1031 if (!TII->hasLowDefLatency(SchedModel, &MI, i)) 1032 return false; 1033 isCheap = true; 1034 } 1035 1036 return isCheap; 1037 } 1038 1039 /// Visit BBs from header to current BB, check if hoisting an instruction of the 1040 /// given cost matrix can cause high register pressure. 1041 bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, 1042 bool CheapInstr) { 1043 for (const auto &RPIdAndCost : Cost) { 1044 if (RPIdAndCost.second <= 0) 1045 continue; 1046 1047 unsigned Class = RPIdAndCost.first; 1048 int Limit = RegLimit[Class]; 1049 1050 // Don't hoist cheap instructions if they would increase register pressure, 1051 // even if we're under the limit. 1052 if (CheapInstr && !HoistCheapInsts) 1053 return true; 1054 1055 for (const auto &RP : BackTrace) 1056 if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit) 1057 return true; 1058 } 1059 1060 return false; 1061 } 1062 1063 /// Traverse the back trace from header to the current block and update their 1064 /// register pressures to reflect the effect of hoisting MI from the current 1065 /// block to the preheader. 1066 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { 1067 // First compute the 'cost' of the instruction, i.e. its contribution 1068 // to register pressure. 1069 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false, 1070 /*ConsiderUnseenAsDef=*/false); 1071 1072 // Update register pressure of blocks from loop header to current block. 1073 for (auto &RP : BackTrace) 1074 for (const auto &RPIdAndCost : Cost) 1075 RP[RPIdAndCost.first] += RPIdAndCost.second; 1076 } 1077 1078 /// Return true if it is potentially profitable to hoist the given loop 1079 /// invariant. 1080 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 1081 if (MI.isImplicitDef()) 1082 return true; 1083 1084 // Besides removing computation from the loop, hoisting an instruction has 1085 // these effects: 1086 // 1087 // - The value defined by the instruction becomes live across the entire 1088 // loop. This increases register pressure in the loop. 1089 // 1090 // - If the value is used by a PHI in the loop, a copy will be required for 1091 // lowering the PHI after extending the live range. 1092 // 1093 // - When hoisting the last use of a value in the loop, that value no longer 1094 // needs to be live in the loop. This lowers register pressure in the loop. 1095 1096 bool CheapInstr = IsCheapInstruction(MI); 1097 bool CreatesCopy = HasLoopPHIUse(&MI); 1098 1099 // Don't hoist a cheap instruction if it would create a copy in the loop. 1100 if (CheapInstr && CreatesCopy) { 1101 DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); 1102 return false; 1103 } 1104 1105 // Rematerializable instructions should always be hoisted since the register 1106 // allocator can just pull them down again when needed. 1107 if (TII->isTriviallyReMaterializable(&MI, AA)) 1108 return true; 1109 1110 // FIXME: If there are long latency loop-invariant instructions inside the 1111 // loop at this point, why didn't the optimizer's LICM hoist them? 1112 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 1113 const MachineOperand &MO = MI.getOperand(i); 1114 if (!MO.isReg() || MO.isImplicit()) 1115 continue; 1116 unsigned Reg = MO.getReg(); 1117 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1118 continue; 1119 if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { 1120 DEBUG(dbgs() << "Hoist High Latency: " << MI); 1121 ++NumHighLatency; 1122 return true; 1123 } 1124 } 1125 1126 // Estimate register pressure to determine whether to LICM the instruction. 1127 // In low register pressure situation, we can be more aggressive about 1128 // hoisting. Also, favors hoisting long latency instructions even in 1129 // moderately high pressure situation. 1130 // Cheap instructions will only be hoisted if they don't increase register 1131 // pressure at all. 1132 auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false, 1133 /*ConsiderUnseenAsDef=*/false); 1134 1135 // Visit BBs from header to current BB, if hoisting this doesn't cause 1136 // high register pressure, then it's safe to proceed. 1137 if (!CanCauseHighRegPressure(Cost, CheapInstr)) { 1138 DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); 1139 ++NumLowRP; 1140 return true; 1141 } 1142 1143 // Don't risk increasing register pressure if it would create copies. 1144 if (CreatesCopy) { 1145 DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); 1146 return false; 1147 } 1148 1149 // Do not "speculate" in high register pressure situation. If an 1150 // instruction is not guaranteed to be executed in the loop, it's best to be 1151 // conservative. 1152 if (AvoidSpeculation && 1153 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { 1154 DEBUG(dbgs() << "Won't speculate: " << MI); 1155 return false; 1156 } 1157 1158 // High register pressure situation, only hoist if the instruction is going 1159 // to be remat'ed. 1160 if (!TII->isTriviallyReMaterializable(&MI, AA) && 1161 !MI.isInvariantLoad(AA)) { 1162 DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); 1163 return false; 1164 } 1165 1166 return true; 1167 } 1168 1169 /// Unfold a load from the given machineinstr if the load itself could be 1170 /// hoisted. Return the unfolded and hoistable load, or null if the load 1171 /// couldn't be unfolded or if it wouldn't be hoistable. 1172 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 1173 // Don't unfold simple loads. 1174 if (MI->canFoldAsLoad()) 1175 return nullptr; 1176 1177 // If not, we may be able to unfold a load and hoist that. 1178 // First test whether the instruction is loading from an amenable 1179 // memory location. 1180 if (!MI->isInvariantLoad(AA)) 1181 return nullptr; 1182 1183 // Next determine the register class for a temporary register. 1184 unsigned LoadRegIndex; 1185 unsigned NewOpc = 1186 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 1187 /*UnfoldLoad=*/true, 1188 /*UnfoldStore=*/false, 1189 &LoadRegIndex); 1190 if (NewOpc == 0) return nullptr; 1191 const MCInstrDesc &MID = TII->get(NewOpc); 1192 if (MID.getNumDefs() != 1) return nullptr; 1193 MachineFunction &MF = *MI->getParent()->getParent(); 1194 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); 1195 // Ok, we're unfolding. Create a temporary register and do the unfold. 1196 unsigned Reg = MRI->createVirtualRegister(RC); 1197 1198 SmallVector<MachineInstr *, 2> NewMIs; 1199 bool Success = 1200 TII->unfoldMemoryOperand(MF, MI, Reg, 1201 /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 1202 NewMIs); 1203 (void)Success; 1204 assert(Success && 1205 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 1206 "succeeded!"); 1207 assert(NewMIs.size() == 2 && 1208 "Unfolded a load into multiple instructions!"); 1209 MachineBasicBlock *MBB = MI->getParent(); 1210 MachineBasicBlock::iterator Pos = MI; 1211 MBB->insert(Pos, NewMIs[0]); 1212 MBB->insert(Pos, NewMIs[1]); 1213 // If unfolding produced a load that wasn't loop-invariant or profitable to 1214 // hoist, discard the new instructions and bail. 1215 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 1216 NewMIs[0]->eraseFromParent(); 1217 NewMIs[1]->eraseFromParent(); 1218 return nullptr; 1219 } 1220 1221 // Update register pressure for the unfolded instruction. 1222 UpdateRegPressure(NewMIs[1]); 1223 1224 // Otherwise we successfully unfolded a load that we can hoist. 1225 MI->eraseFromParent(); 1226 return NewMIs[0]; 1227 } 1228 1229 /// Initialize the CSE map with instructions that are in the current loop 1230 /// preheader that may become duplicates of instructions that are hoisted 1231 /// out of the loop. 1232 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 1233 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 1234 const MachineInstr *MI = &*I; 1235 unsigned Opcode = MI->getOpcode(); 1236 CSEMap[Opcode].push_back(MI); 1237 } 1238 } 1239 1240 /// Find an instruction amount PrevMIs that is a duplicate of MI. 1241 /// Return this instruction if it's found. 1242 const MachineInstr* 1243 MachineLICM::LookForDuplicate(const MachineInstr *MI, 1244 std::vector<const MachineInstr*> &PrevMIs) { 1245 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 1246 const MachineInstr *PrevMI = PrevMIs[i]; 1247 if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr))) 1248 return PrevMI; 1249 } 1250 return nullptr; 1251 } 1252 1253 /// Given a LICM'ed instruction, look for an instruction on the preheader that 1254 /// computes the same value. If it's found, do a RAU on with the definition of 1255 /// the existing instruction rather than hoisting the instruction to the 1256 /// preheader. 1257 bool MachineLICM::EliminateCSE(MachineInstr *MI, 1258 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 1259 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1260 // the undef property onto uses. 1261 if (CI == CSEMap.end() || MI->isImplicitDef()) 1262 return false; 1263 1264 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 1265 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 1266 1267 // Replace virtual registers defined by MI by their counterparts defined 1268 // by Dup. 1269 SmallVector<unsigned, 2> Defs; 1270 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1271 const MachineOperand &MO = MI->getOperand(i); 1272 1273 // Physical registers may not differ here. 1274 assert((!MO.isReg() || MO.getReg() == 0 || 1275 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 1276 MO.getReg() == Dup->getOperand(i).getReg()) && 1277 "Instructions with different phys regs are not identical!"); 1278 1279 if (MO.isReg() && MO.isDef() && 1280 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 1281 Defs.push_back(i); 1282 } 1283 1284 SmallVector<const TargetRegisterClass*, 2> OrigRCs; 1285 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1286 unsigned Idx = Defs[i]; 1287 unsigned Reg = MI->getOperand(Idx).getReg(); 1288 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1289 OrigRCs.push_back(MRI->getRegClass(DupReg)); 1290 1291 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { 1292 // Restore old RCs if more than one defs. 1293 for (unsigned j = 0; j != i; ++j) 1294 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); 1295 return false; 1296 } 1297 } 1298 1299 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1300 unsigned Idx = Defs[i]; 1301 unsigned Reg = MI->getOperand(Idx).getReg(); 1302 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1303 MRI->replaceRegWith(Reg, DupReg); 1304 MRI->clearKillFlags(DupReg); 1305 } 1306 1307 MI->eraseFromParent(); 1308 ++NumCSEed; 1309 return true; 1310 } 1311 return false; 1312 } 1313 1314 /// Return true if the given instruction will be CSE'd if it's hoisted out of 1315 /// the loop. 1316 bool MachineLICM::MayCSE(MachineInstr *MI) { 1317 unsigned Opcode = MI->getOpcode(); 1318 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1319 CI = CSEMap.find(Opcode); 1320 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1321 // the undef property onto uses. 1322 if (CI == CSEMap.end() || MI->isImplicitDef()) 1323 return false; 1324 1325 return LookForDuplicate(MI, CI->second) != nullptr; 1326 } 1327 1328 /// When an instruction is found to use only loop invariant operands 1329 /// that are safe to hoist, this instruction is called to do the dirty work. 1330 /// It returns true if the instruction is hoisted. 1331 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 1332 // First check whether we should hoist this instruction. 1333 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 1334 // If not, try unfolding a hoistable load. 1335 MI = ExtractHoistableLoad(MI); 1336 if (!MI) return false; 1337 } 1338 1339 // Now move the instructions to the predecessor, inserting it before any 1340 // terminator instructions. 1341 DEBUG({ 1342 dbgs() << "Hoisting " << *MI; 1343 if (Preheader->getBasicBlock()) 1344 dbgs() << " to MachineBasicBlock " 1345 << Preheader->getName(); 1346 if (MI->getParent()->getBasicBlock()) 1347 dbgs() << " from MachineBasicBlock " 1348 << MI->getParent()->getName(); 1349 dbgs() << "\n"; 1350 }); 1351 1352 // If this is the first instruction being hoisted to the preheader, 1353 // initialize the CSE map with potential common expressions. 1354 if (FirstInLoop) { 1355 InitCSEMap(Preheader); 1356 FirstInLoop = false; 1357 } 1358 1359 // Look for opportunity to CSE the hoisted instruction. 1360 unsigned Opcode = MI->getOpcode(); 1361 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1362 CI = CSEMap.find(Opcode); 1363 if (!EliminateCSE(MI, CI)) { 1364 // Otherwise, splice the instruction to the preheader. 1365 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 1366 1367 // Update register pressure for BBs from header to this block. 1368 UpdateBackTraceRegPressure(MI); 1369 1370 // Clear the kill flags of any register this instruction defines, 1371 // since they may need to be live throughout the entire loop 1372 // rather than just live for part of it. 1373 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1374 MachineOperand &MO = MI->getOperand(i); 1375 if (MO.isReg() && MO.isDef() && !MO.isDead()) 1376 MRI->clearKillFlags(MO.getReg()); 1377 } 1378 1379 // Add to the CSE map. 1380 if (CI != CSEMap.end()) 1381 CI->second.push_back(MI); 1382 else 1383 CSEMap[Opcode].push_back(MI); 1384 } 1385 1386 ++NumHoisted; 1387 Changed = true; 1388 1389 return true; 1390 } 1391 1392 /// Get the preheader for the current loop, splitting a critical edge if needed. 1393 MachineBasicBlock *MachineLICM::getCurPreheader() { 1394 // Determine the block to which to hoist instructions. If we can't find a 1395 // suitable loop predecessor, we can't do any hoisting. 1396 1397 // If we've tried to get a preheader and failed, don't try again. 1398 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 1399 return nullptr; 1400 1401 if (!CurPreheader) { 1402 CurPreheader = CurLoop->getLoopPreheader(); 1403 if (!CurPreheader) { 1404 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 1405 if (!Pred) { 1406 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1407 return nullptr; 1408 } 1409 1410 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); 1411 if (!CurPreheader) { 1412 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1413 return nullptr; 1414 } 1415 } 1416 } 1417 return CurPreheader; 1418 } 1419