1 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs loop invariant code motion on machine instructions. We 11 // attempt to remove as much code from the body of a loop as possible. 12 // 13 // This pass is not intended to be a replacement or a complete alternative 14 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple 15 // constructs that are not exposed before lowering and instruction selection. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/CodeGen/Passes.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/CodeGen/MachineDominators.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineLoopInfo.h" 27 #include "llvm/CodeGen/MachineMemOperand.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/PseudoSourceValue.h" 30 #include "llvm/MC/MCInstrItineraries.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include "llvm/Target/TargetInstrInfo.h" 35 #include "llvm/Target/TargetLowering.h" 36 #include "llvm/Target/TargetMachine.h" 37 #include "llvm/Target/TargetRegisterInfo.h" 38 #include "llvm/Target/TargetSubtargetInfo.h" 39 using namespace llvm; 40 41 #define DEBUG_TYPE "machine-licm" 42 43 static cl::opt<bool> 44 AvoidSpeculation("avoid-speculation", 45 cl::desc("MachineLICM should avoid speculation"), 46 cl::init(true), cl::Hidden); 47 48 static cl::opt<bool> 49 HoistCheapInsts("hoist-cheap-insts", 50 cl::desc("MachineLICM should hoist even cheap instructions"), 51 cl::init(false), cl::Hidden); 52 53 static cl::opt<bool> 54 SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", 55 cl::desc("MachineLICM should sink instructions into " 56 "loops to avoid register spills"), 57 cl::init(false), cl::Hidden); 58 59 STATISTIC(NumHoisted, 60 "Number of machine instructions hoisted out of loops"); 61 STATISTIC(NumLowRP, 62 "Number of instructions hoisted in low reg pressure situation"); 63 STATISTIC(NumHighLatency, 64 "Number of high latency instructions hoisted"); 65 STATISTIC(NumCSEed, 66 "Number of hoisted machine instructions CSEed"); 67 STATISTIC(NumPostRAHoisted, 68 "Number of machine instructions hoisted out of loops post regalloc"); 69 70 namespace { 71 class MachineLICM : public MachineFunctionPass { 72 const TargetInstrInfo *TII; 73 const TargetLoweringBase *TLI; 74 const TargetRegisterInfo *TRI; 75 const MachineFrameInfo *MFI; 76 MachineRegisterInfo *MRI; 77 const InstrItineraryData *InstrItins; 78 bool PreRegAlloc; 79 80 // Various analyses that we use... 81 AliasAnalysis *AA; // Alias analysis info. 82 MachineLoopInfo *MLI; // Current MachineLoopInfo 83 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 84 85 // State that is updated as we process loops 86 bool Changed; // True if a loop is changed. 87 bool FirstInLoop; // True if it's the first LICM in the loop. 88 MachineLoop *CurLoop; // The current loop we are working on. 89 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 90 91 // Exit blocks for CurLoop. 92 SmallVector<MachineBasicBlock*, 8> ExitBlocks; 93 94 bool isExitBlock(const MachineBasicBlock *MBB) const { 95 return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != 96 ExitBlocks.end(); 97 } 98 99 // Track 'estimated' register pressure. 100 SmallSet<unsigned, 32> RegSeen; 101 SmallVector<unsigned, 8> RegPressure; 102 103 // Register pressure "limit" per register class. If the pressure 104 // is higher than the limit, then it's considered high. 105 SmallVector<unsigned, 8> RegLimit; 106 107 // Register pressure on path leading from loop preheader to current BB. 108 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 109 110 // For each opcode, keep a list of potential CSE instructions. 111 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 112 113 enum { 114 SpeculateFalse = 0, 115 SpeculateTrue = 1, 116 SpeculateUnknown = 2 117 }; 118 119 // If a MBB does not dominate loop exiting blocks then it may not safe 120 // to hoist loads from this block. 121 // Tri-state: 0 - false, 1 - true, 2 - unknown 122 unsigned SpeculationState; 123 124 public: 125 static char ID; // Pass identification, replacement for typeid 126 MachineLICM() : 127 MachineFunctionPass(ID), PreRegAlloc(true) { 128 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 129 } 130 131 explicit MachineLICM(bool PreRA) : 132 MachineFunctionPass(ID), PreRegAlloc(PreRA) { 133 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 134 } 135 136 bool runOnMachineFunction(MachineFunction &MF) override; 137 138 void getAnalysisUsage(AnalysisUsage &AU) const override { 139 AU.addRequired<MachineLoopInfo>(); 140 AU.addRequired<MachineDominatorTree>(); 141 AU.addRequired<AliasAnalysis>(); 142 AU.addPreserved<MachineLoopInfo>(); 143 AU.addPreserved<MachineDominatorTree>(); 144 MachineFunctionPass::getAnalysisUsage(AU); 145 } 146 147 void releaseMemory() override { 148 RegSeen.clear(); 149 RegPressure.clear(); 150 RegLimit.clear(); 151 BackTrace.clear(); 152 CSEMap.clear(); 153 } 154 155 private: 156 /// CandidateInfo - Keep track of information about hoisting candidates. 157 struct CandidateInfo { 158 MachineInstr *MI; 159 unsigned Def; 160 int FI; 161 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 162 : MI(mi), Def(def), FI(fi) {} 163 }; 164 165 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 166 /// invariants out to the preheader. 167 void HoistRegionPostRA(); 168 169 /// HoistPostRA - When an instruction is found to only use loop invariant 170 /// operands that is safe to hoist, this instruction is called to do the 171 /// dirty work. 172 void HoistPostRA(MachineInstr *MI, unsigned Def); 173 174 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 175 /// gather register def and frame object update information. 176 void ProcessMI(MachineInstr *MI, 177 BitVector &PhysRegDefs, 178 BitVector &PhysRegClobbers, 179 SmallSet<int, 32> &StoredFIs, 180 SmallVectorImpl<CandidateInfo> &Candidates); 181 182 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 183 /// current loop. 184 void AddToLiveIns(unsigned Reg); 185 186 /// IsLICMCandidate - Returns true if the instruction may be a suitable 187 /// candidate for LICM. e.g. If the instruction is a call, then it's 188 /// obviously not safe to hoist it. 189 bool IsLICMCandidate(MachineInstr &I); 190 191 /// IsLoopInvariantInst - Returns true if the instruction is loop 192 /// invariant. I.e., all virtual register operands are defined outside of 193 /// the loop, physical registers aren't accessed (explicitly or implicitly), 194 /// and the instruction is hoistable. 195 /// 196 bool IsLoopInvariantInst(MachineInstr &I); 197 198 /// HasLoopPHIUse - Return true if the specified instruction is used by any 199 /// phi node in the current loop. 200 bool HasLoopPHIUse(const MachineInstr *MI) const; 201 202 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 203 /// and an use in the current loop, return true if the target considered 204 /// it 'high'. 205 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, 206 unsigned Reg) const; 207 208 bool IsCheapInstruction(MachineInstr &MI) const; 209 210 /// CanCauseHighRegPressure - Visit BBs from header to current BB, 211 /// check if hoisting an instruction of the given cost matrix can cause high 212 /// register pressure. 213 bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost, 214 bool Cheap); 215 216 /// UpdateBackTraceRegPressure - Traverse the back trace from header to 217 /// the current block and update their register pressures to reflect the 218 /// effect of hoisting MI from the current block to the preheader. 219 void UpdateBackTraceRegPressure(const MachineInstr *MI); 220 221 /// IsProfitableToHoist - Return true if it is potentially profitable to 222 /// hoist the given loop invariant. 223 bool IsProfitableToHoist(MachineInstr &MI); 224 225 /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 226 /// If not then a load from this mbb may not be safe to hoist. 227 bool IsGuaranteedToExecute(MachineBasicBlock *BB); 228 229 void EnterScope(MachineBasicBlock *MBB); 230 231 void ExitScope(MachineBasicBlock *MBB); 232 233 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given 234 /// dominator tree node if its a leaf or all of its children are done. Walk 235 /// up the dominator tree to destroy ancestors which are now done. 236 void ExitScopeIfDone(MachineDomTreeNode *Node, 237 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 238 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 239 240 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 241 /// blocks dominated by the specified header block, and that are in the 242 /// current loop) in depth first order w.r.t the DominatorTree. This allows 243 /// us to visit definitions before uses, allowing us to hoist a loop body in 244 /// one pass without iteration. 245 /// 246 void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); 247 void HoistRegion(MachineDomTreeNode *N, bool IsHeader); 248 249 /// SinkIntoLoop - Sink instructions into loops if profitable. This 250 /// especially tries to prevent register spills caused by register pressure 251 /// if there is little to no overhead moving instructions into loops. 252 void SinkIntoLoop(); 253 254 /// getRegisterClassIDAndCost - For a given register return the ID and cost 255 /// of its representative register class by reference. 256 void getRegisterClassIDAndCost(unsigned Reg, unsigned &RCId, 257 unsigned &RCCost) const; 258 259 /// InitRegPressure - Find all virtual register references that are liveout 260 /// of the preheader to initialize the starting "register pressure". Note 261 /// this does not count live through (livein but not used) registers. 262 void InitRegPressure(MachineBasicBlock *BB); 263 264 /// calcRegisterCost - Calculate the additional register pressure that the 265 /// registers used in MI cause. 266 /// 267 /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to 268 /// figure out which usages are live-ins. 269 /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. 270 DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI, 271 bool ConsiderSeen, 272 bool ConsiderUnseenAsDef); 273 274 /// UpdateRegPressure - Update estimate of register pressure after the 275 /// specified instruction. 276 void UpdateRegPressure(const MachineInstr *MI, 277 bool ConsiderUnseenAsDef = false); 278 279 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if 280 /// the load itself could be hoisted. Return the unfolded and hoistable 281 /// load, or null if the load couldn't be unfolded or if it wouldn't 282 /// be hoistable. 283 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 284 285 /// LookForDuplicate - Find an instruction amount PrevMIs that is a 286 /// duplicate of MI. Return this instruction if it's found. 287 const MachineInstr *LookForDuplicate(const MachineInstr *MI, 288 std::vector<const MachineInstr*> &PrevMIs); 289 290 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on 291 /// the preheader that compute the same value. If it's found, do a RAU on 292 /// with the definition of the existing instruction rather than hoisting 293 /// the instruction to the preheader. 294 bool EliminateCSE(MachineInstr *MI, 295 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); 296 297 /// MayCSE - Return true if the given instruction will be CSE'd if it's 298 /// hoisted out of the loop. 299 bool MayCSE(MachineInstr *MI); 300 301 /// Hoist - When an instruction is found to only use loop invariant operands 302 /// that is safe to hoist, this instruction is called to do the dirty work. 303 /// It returns true if the instruction is hoisted. 304 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 305 306 /// InitCSEMap - Initialize the CSE map with instructions that are in the 307 /// current loop preheader that may become duplicates of instructions that 308 /// are hoisted out of the loop. 309 void InitCSEMap(MachineBasicBlock *BB); 310 311 /// getCurPreheader - Get the preheader for the current loop, splitting 312 /// a critical edge if needed. 313 MachineBasicBlock *getCurPreheader(); 314 }; 315 } // end anonymous namespace 316 317 char MachineLICM::ID = 0; 318 char &llvm::MachineLICMID = MachineLICM::ID; 319 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", 320 "Machine Loop Invariant Code Motion", false, false) 321 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 322 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 323 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 324 INITIALIZE_PASS_END(MachineLICM, "machinelicm", 325 "Machine Loop Invariant Code Motion", false, false) 326 327 /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most 328 /// loop that has a unique predecessor. 329 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 330 // Check whether this loop even has a unique predecessor. 331 if (!CurLoop->getLoopPredecessor()) 332 return false; 333 // Ok, now check to see if any of its outer loops do. 334 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 335 if (L->getLoopPredecessor()) 336 return false; 337 // None of them did, so this is the outermost with a unique predecessor. 338 return true; 339 } 340 341 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 342 if (skipOptnoneFunction(*MF.getFunction())) 343 return false; 344 345 Changed = FirstInLoop = false; 346 TII = MF.getSubtarget().getInstrInfo(); 347 TLI = MF.getSubtarget().getTargetLowering(); 348 TRI = MF.getSubtarget().getRegisterInfo(); 349 MFI = MF.getFrameInfo(); 350 MRI = &MF.getRegInfo(); 351 InstrItins = MF.getSubtarget().getInstrItineraryData(); 352 353 PreRegAlloc = MRI->isSSA(); 354 355 if (PreRegAlloc) 356 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 357 else 358 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 359 DEBUG(dbgs() << MF.getName() << " ********\n"); 360 361 if (PreRegAlloc) { 362 // Estimate register pressure during pre-regalloc pass. 363 unsigned NumRC = TRI->getNumRegClasses(); 364 RegPressure.resize(NumRC); 365 std::fill(RegPressure.begin(), RegPressure.end(), 0); 366 RegLimit.resize(NumRC); 367 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 368 E = TRI->regclass_end(); I != E; ++I) 369 RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); 370 } 371 372 // Get our Loop information... 373 MLI = &getAnalysis<MachineLoopInfo>(); 374 DT = &getAnalysis<MachineDominatorTree>(); 375 AA = &getAnalysis<AliasAnalysis>(); 376 377 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 378 while (!Worklist.empty()) { 379 CurLoop = Worklist.pop_back_val(); 380 CurPreheader = nullptr; 381 ExitBlocks.clear(); 382 383 // If this is done before regalloc, only visit outer-most preheader-sporting 384 // loops. 385 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 386 Worklist.append(CurLoop->begin(), CurLoop->end()); 387 continue; 388 } 389 390 CurLoop->getExitBlocks(ExitBlocks); 391 392 if (!PreRegAlloc) 393 HoistRegionPostRA(); 394 else { 395 // CSEMap is initialized for loop header when the first instruction is 396 // being hoisted. 397 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 398 FirstInLoop = true; 399 HoistOutOfLoop(N); 400 CSEMap.clear(); 401 402 if (SinkInstsToAvoidSpills) 403 SinkIntoLoop(); 404 } 405 } 406 407 return Changed; 408 } 409 410 /// InstructionStoresToFI - Return true if instruction stores to the 411 /// specified frame. 412 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 413 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 414 oe = MI->memoperands_end(); o != oe; ++o) { 415 if (!(*o)->isStore() || !(*o)->getPseudoValue()) 416 continue; 417 if (const FixedStackPseudoSourceValue *Value = 418 dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) { 419 if (Value->getFrameIndex() == FI) 420 return true; 421 } 422 } 423 return false; 424 } 425 426 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 427 /// gather register def and frame object update information. 428 void MachineLICM::ProcessMI(MachineInstr *MI, 429 BitVector &PhysRegDefs, 430 BitVector &PhysRegClobbers, 431 SmallSet<int, 32> &StoredFIs, 432 SmallVectorImpl<CandidateInfo> &Candidates) { 433 bool RuledOut = false; 434 bool HasNonInvariantUse = false; 435 unsigned Def = 0; 436 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 437 const MachineOperand &MO = MI->getOperand(i); 438 if (MO.isFI()) { 439 // Remember if the instruction stores to the frame index. 440 int FI = MO.getIndex(); 441 if (!StoredFIs.count(FI) && 442 MFI->isSpillSlotObjectIndex(FI) && 443 InstructionStoresToFI(MI, FI)) 444 StoredFIs.insert(FI); 445 HasNonInvariantUse = true; 446 continue; 447 } 448 449 // We can't hoist an instruction defining a physreg that is clobbered in 450 // the loop. 451 if (MO.isRegMask()) { 452 PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); 453 continue; 454 } 455 456 if (!MO.isReg()) 457 continue; 458 unsigned Reg = MO.getReg(); 459 if (!Reg) 460 continue; 461 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 462 "Not expecting virtual register!"); 463 464 if (!MO.isDef()) { 465 if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) 466 // If it's using a non-loop-invariant register, then it's obviously not 467 // safe to hoist. 468 HasNonInvariantUse = true; 469 continue; 470 } 471 472 if (MO.isImplicit()) { 473 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 474 PhysRegClobbers.set(*AI); 475 if (!MO.isDead()) 476 // Non-dead implicit def? This cannot be hoisted. 477 RuledOut = true; 478 // No need to check if a dead implicit def is also defined by 479 // another instruction. 480 continue; 481 } 482 483 // FIXME: For now, avoid instructions with multiple defs, unless 484 // it's a dead implicit def. 485 if (Def) 486 RuledOut = true; 487 else 488 Def = Reg; 489 490 // If we have already seen another instruction that defines the same 491 // register, then this is not safe. Two defs is indicated by setting a 492 // PhysRegClobbers bit. 493 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { 494 if (PhysRegDefs.test(*AS)) 495 PhysRegClobbers.set(*AS); 496 PhysRegDefs.set(*AS); 497 } 498 if (PhysRegClobbers.test(Reg)) 499 // MI defined register is seen defined by another instruction in 500 // the loop, it cannot be a LICM candidate. 501 RuledOut = true; 502 } 503 504 // Only consider reloads for now and remats which do not have register 505 // operands. FIXME: Consider unfold load folding instructions. 506 if (Def && !RuledOut) { 507 int FI = INT_MIN; 508 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 509 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 510 Candidates.push_back(CandidateInfo(MI, Def, FI)); 511 } 512 } 513 514 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 515 /// invariants out to the preheader. 516 void MachineLICM::HoistRegionPostRA() { 517 MachineBasicBlock *Preheader = getCurPreheader(); 518 if (!Preheader) 519 return; 520 521 unsigned NumRegs = TRI->getNumRegs(); 522 BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. 523 BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. 524 525 SmallVector<CandidateInfo, 32> Candidates; 526 SmallSet<int, 32> StoredFIs; 527 528 // Walk the entire region, count number of defs for each register, and 529 // collect potential LICM candidates. 530 const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 531 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 532 MachineBasicBlock *BB = Blocks[i]; 533 534 // If the header of the loop containing this basic block is a landing pad, 535 // then don't try to hoist instructions out of this loop. 536 const MachineLoop *ML = MLI->getLoopFor(BB); 537 if (ML && ML->getHeader()->isLandingPad()) continue; 538 539 // Conservatively treat live-in's as an external def. 540 // FIXME: That means a reload that're reused in successor block(s) will not 541 // be LICM'ed. 542 for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), 543 E = BB->livein_end(); I != E; ++I) { 544 unsigned Reg = *I; 545 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 546 PhysRegDefs.set(*AI); 547 } 548 549 SpeculationState = SpeculateUnknown; 550 for (MachineBasicBlock::iterator 551 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 552 MachineInstr *MI = &*MII; 553 ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); 554 } 555 } 556 557 // Gather the registers read / clobbered by the terminator. 558 BitVector TermRegs(NumRegs); 559 MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); 560 if (TI != Preheader->end()) { 561 for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { 562 const MachineOperand &MO = TI->getOperand(i); 563 if (!MO.isReg()) 564 continue; 565 unsigned Reg = MO.getReg(); 566 if (!Reg) 567 continue; 568 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 569 TermRegs.set(*AI); 570 } 571 } 572 573 // Now evaluate whether the potential candidates qualify. 574 // 1. Check if the candidate defined register is defined by another 575 // instruction in the loop. 576 // 2. If the candidate is a load from stack slot (always true for now), 577 // check if the slot is stored anywhere in the loop. 578 // 3. Make sure candidate def should not clobber 579 // registers read by the terminator. Similarly its def should not be 580 // clobbered by the terminator. 581 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 582 if (Candidates[i].FI != INT_MIN && 583 StoredFIs.count(Candidates[i].FI)) 584 continue; 585 586 unsigned Def = Candidates[i].Def; 587 if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { 588 bool Safe = true; 589 MachineInstr *MI = Candidates[i].MI; 590 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 591 const MachineOperand &MO = MI->getOperand(j); 592 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 593 continue; 594 unsigned Reg = MO.getReg(); 595 if (PhysRegDefs.test(Reg) || 596 PhysRegClobbers.test(Reg)) { 597 // If it's using a non-loop-invariant register, then it's obviously 598 // not safe to hoist. 599 Safe = false; 600 break; 601 } 602 } 603 if (Safe) 604 HoistPostRA(MI, Candidates[i].Def); 605 } 606 } 607 } 608 609 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current 610 /// loop, and make sure it is not killed by any instructions in the loop. 611 void MachineLICM::AddToLiveIns(unsigned Reg) { 612 const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 613 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 614 MachineBasicBlock *BB = Blocks[i]; 615 if (!BB->isLiveIn(Reg)) 616 BB->addLiveIn(Reg); 617 for (MachineBasicBlock::iterator 618 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 619 MachineInstr *MI = &*MII; 620 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 621 MachineOperand &MO = MI->getOperand(i); 622 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 623 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 624 MO.setIsKill(false); 625 } 626 } 627 } 628 } 629 630 /// HoistPostRA - When an instruction is found to only use loop invariant 631 /// operands that is safe to hoist, this instruction is called to do the 632 /// dirty work. 633 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 634 MachineBasicBlock *Preheader = getCurPreheader(); 635 636 // Now move the instructions to the predecessor, inserting it before any 637 // terminator instructions. 638 DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" 639 << MI->getParent()->getNumber() << ": " << *MI); 640 641 // Splice the instruction to the preheader. 642 MachineBasicBlock *MBB = MI->getParent(); 643 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 644 645 // Add register to livein list to all the BBs in the current loop since a 646 // loop invariant must be kept live throughout the whole loop. This is 647 // important to ensure later passes do not scavenge the def register. 648 AddToLiveIns(Def); 649 650 ++NumPostRAHoisted; 651 Changed = true; 652 } 653 654 // IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 655 // If not then a load from this mbb may not be safe to hoist. 656 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { 657 if (SpeculationState != SpeculateUnknown) 658 return SpeculationState == SpeculateFalse; 659 660 if (BB != CurLoop->getHeader()) { 661 // Check loop exiting blocks. 662 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; 663 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); 664 for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) 665 if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { 666 SpeculationState = SpeculateTrue; 667 return false; 668 } 669 } 670 671 SpeculationState = SpeculateFalse; 672 return true; 673 } 674 675 void MachineLICM::EnterScope(MachineBasicBlock *MBB) { 676 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 677 678 // Remember livein register pressure. 679 BackTrace.push_back(RegPressure); 680 } 681 682 void MachineLICM::ExitScope(MachineBasicBlock *MBB) { 683 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 684 BackTrace.pop_back(); 685 } 686 687 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 688 /// dominator tree node if its a leaf or all of its children are done. Walk 689 /// up the dominator tree to destroy ancestors which are now done. 690 void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, 691 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 692 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 693 if (OpenChildren[Node]) 694 return; 695 696 // Pop scope. 697 ExitScope(Node->getBlock()); 698 699 // Now traverse upwards to pop ancestors whose offsprings are all done. 700 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 701 unsigned Left = --OpenChildren[Parent]; 702 if (Left != 0) 703 break; 704 ExitScope(Parent->getBlock()); 705 Node = Parent; 706 } 707 } 708 709 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 710 /// blocks dominated by the specified header block, and that are in the 711 /// current loop) in depth first order w.r.t the DominatorTree. This allows 712 /// us to visit definitions before uses, allowing us to hoist a loop body in 713 /// one pass without iteration. 714 /// 715 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { 716 MachineBasicBlock *Preheader = getCurPreheader(); 717 if (!Preheader) 718 return; 719 720 SmallVector<MachineDomTreeNode*, 32> Scopes; 721 SmallVector<MachineDomTreeNode*, 8> WorkList; 722 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 723 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 724 725 // Perform a DFS walk to determine the order of visit. 726 WorkList.push_back(HeaderN); 727 while (!WorkList.empty()) { 728 MachineDomTreeNode *Node = WorkList.pop_back_val(); 729 assert(Node && "Null dominator tree node?"); 730 MachineBasicBlock *BB = Node->getBlock(); 731 732 // If the header of the loop containing this basic block is a landing pad, 733 // then don't try to hoist instructions out of this loop. 734 const MachineLoop *ML = MLI->getLoopFor(BB); 735 if (ML && ML->getHeader()->isLandingPad()) 736 continue; 737 738 // If this subregion is not in the top level loop at all, exit. 739 if (!CurLoop->contains(BB)) 740 continue; 741 742 Scopes.push_back(Node); 743 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 744 unsigned NumChildren = Children.size(); 745 746 // Don't hoist things out of a large switch statement. This often causes 747 // code to be hoisted that wasn't going to be executed, and increases 748 // register pressure in a situation where it's likely to matter. 749 if (BB->succ_size() >= 25) 750 NumChildren = 0; 751 752 OpenChildren[Node] = NumChildren; 753 // Add children in reverse order as then the next popped worklist node is 754 // the first child of this node. This means we ultimately traverse the 755 // DOM tree in exactly the same order as if we'd recursed. 756 for (int i = (int)NumChildren-1; i >= 0; --i) { 757 MachineDomTreeNode *Child = Children[i]; 758 ParentMap[Child] = Node; 759 WorkList.push_back(Child); 760 } 761 } 762 763 if (Scopes.size() == 0) 764 return; 765 766 // Compute registers which are livein into the loop headers. 767 RegSeen.clear(); 768 BackTrace.clear(); 769 InitRegPressure(Preheader); 770 771 // Now perform LICM. 772 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 773 MachineDomTreeNode *Node = Scopes[i]; 774 MachineBasicBlock *MBB = Node->getBlock(); 775 776 EnterScope(MBB); 777 778 // Process the block 779 SpeculationState = SpeculateUnknown; 780 for (MachineBasicBlock::iterator 781 MII = MBB->begin(), E = MBB->end(); MII != E; ) { 782 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 783 MachineInstr *MI = &*MII; 784 if (!Hoist(MI, Preheader)) 785 UpdateRegPressure(MI); 786 MII = NextMII; 787 } 788 789 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 790 ExitScopeIfDone(Node, OpenChildren, ParentMap); 791 } 792 } 793 794 void MachineLICM::SinkIntoLoop() { 795 MachineBasicBlock *Preheader = getCurPreheader(); 796 if (!Preheader) 797 return; 798 799 SmallVector<MachineInstr *, 8> Candidates; 800 for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); 801 I != Preheader->instr_end(); ++I) { 802 // We need to ensure that we can safely move this instruction into the loop. 803 // As such, it must not have side-effects, e.g. such as a call has. 804 if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I)) 805 Candidates.push_back(I); 806 } 807 808 for (MachineInstr *I : Candidates) { 809 const MachineOperand &MO = I->getOperand(0); 810 if (!MO.isDef() || !MO.isReg() || !MO.getReg()) 811 continue; 812 if (!MRI->hasOneDef(MO.getReg())) 813 continue; 814 bool CanSink = true; 815 MachineBasicBlock *B = nullptr; 816 for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { 817 // FIXME: Come up with a proper cost model that estimates whether sinking 818 // the instruction (and thus possibly executing it on every loop 819 // iteration) is more expensive than a register. 820 // For now assumes that copies are cheap and thus almost always worth it. 821 if (!MI.isCopy()) { 822 CanSink = false; 823 break; 824 } 825 if (!B) { 826 B = MI.getParent(); 827 continue; 828 } 829 B = DT->findNearestCommonDominator(B, MI.getParent()); 830 if (!B) { 831 CanSink = false; 832 break; 833 } 834 } 835 if (!CanSink || !B || B == Preheader) 836 continue; 837 B->splice(B->getFirstNonPHI(), Preheader, I); 838 } 839 } 840 841 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { 842 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); 843 } 844 845 void MachineLICM::getRegisterClassIDAndCost(unsigned Reg, unsigned &RCId, 846 unsigned &RCCost) const { 847 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 848 MVT VT = *RC->vt_begin(); 849 if (VT == MVT::Untyped) { 850 RCId = RC->getID(); 851 RCCost = 1; 852 } else { 853 RCId = TLI->getRepRegClassFor(VT)->getID(); 854 RCCost = TLI->getRepRegClassCostFor(VT); 855 } 856 } 857 858 /// InitRegPressure - Find all virtual register references that are liveout of 859 /// the preheader to initialize the starting "register pressure". Note this 860 /// does not count live through (livein but not used) registers. 861 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { 862 std::fill(RegPressure.begin(), RegPressure.end(), 0); 863 864 // If the preheader has only a single predecessor and it ends with a 865 // fallthrough or an unconditional branch, then scan its predecessor for live 866 // defs as well. This happens whenever the preheader is created by splitting 867 // the critical edge from the loop predecessor to the loop header. 868 if (BB->pred_size() == 1) { 869 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 870 SmallVector<MachineOperand, 4> Cond; 871 if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) 872 InitRegPressure(*BB->pred_begin()); 873 } 874 875 for (const MachineInstr &MI : *BB) 876 UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true); 877 } 878 879 /// UpdateRegPressure - Update estimate of register pressure after the 880 /// specified instruction. 881 void MachineLICM::UpdateRegPressure(const MachineInstr *MI, 882 bool ConsiderUnseenAsDef) { 883 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); 884 for (const auto &ClassAndCost : Cost) { 885 unsigned Class = ClassAndCost.first; 886 if (static_cast<int>(RegPressure[Class]) < -ClassAndCost.second) 887 RegPressure[Class] = 0; 888 else 889 RegPressure[Class] += ClassAndCost.second; 890 } 891 } 892 893 DenseMap<unsigned, int> 894 MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, 895 bool ConsiderUnseenAsDef) { 896 DenseMap<unsigned, int> Cost; 897 if (MI->isImplicitDef()) 898 return Cost; 899 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 900 const MachineOperand &MO = MI->getOperand(i); 901 if (!MO.isReg() || MO.isImplicit()) 902 continue; 903 unsigned Reg = MO.getReg(); 904 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 905 continue; 906 907 // FIXME: It seems bad to use RegSeen only for some of these calculations. 908 bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false; 909 unsigned RCId, RCCost; 910 getRegisterClassIDAndCost(Reg, RCId, RCCost); 911 int PriorCost = 0; 912 if (Cost.find(RCId) != Cost.end()) 913 PriorCost = Cost[RCId]; 914 if (MO.isDef()) 915 Cost[RCId] = PriorCost + RCCost; 916 else { 917 bool isKill = isOperandKill(MO, MRI); 918 if (isNew && !isKill && ConsiderUnseenAsDef) 919 // Haven't seen this, it must be a livein. 920 Cost[RCId] = PriorCost + RCCost; 921 else if (!isNew && isKill) 922 Cost[RCId] = PriorCost - RCCost; 923 } 924 } 925 return Cost; 926 } 927 928 /// isLoadFromGOTOrConstantPool - Return true if this machine instruction 929 /// loads from global offset table or constant pool. 930 static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { 931 assert (MI.mayLoad() && "Expected MI that loads!"); 932 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), 933 E = MI.memoperands_end(); I != E; ++I) { 934 if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) { 935 if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) 936 return true; 937 } 938 } 939 return false; 940 } 941 942 /// IsLICMCandidate - Returns true if the instruction may be a suitable 943 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously 944 /// not safe to hoist it. 945 bool MachineLICM::IsLICMCandidate(MachineInstr &I) { 946 // Check if it's safe to move the instruction. 947 bool DontMoveAcrossStore = true; 948 if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) 949 return false; 950 951 // If it is load then check if it is guaranteed to execute by making sure that 952 // it dominates all exiting blocks. If it doesn't, then there is a path out of 953 // the loop which does not execute this load, so we can't hoist it. Loads 954 // from constant memory are not safe to speculate all the time, for example 955 // indexed load from a jump table. 956 // Stores and side effects are already checked by isSafeToMove. 957 if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && 958 !IsGuaranteedToExecute(I.getParent())) 959 return false; 960 961 return true; 962 } 963 964 /// IsLoopInvariantInst - Returns true if the instruction is loop 965 /// invariant. I.e., all virtual register operands are defined outside of the 966 /// loop, physical registers aren't accessed explicitly, and there are no side 967 /// effects that aren't captured by the operands or other flags. 968 /// 969 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 970 if (!IsLICMCandidate(I)) 971 return false; 972 973 // The instruction is loop invariant if all of its operands are. 974 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 975 const MachineOperand &MO = I.getOperand(i); 976 977 if (!MO.isReg()) 978 continue; 979 980 unsigned Reg = MO.getReg(); 981 if (Reg == 0) continue; 982 983 // Don't hoist an instruction that uses or defines a physical register. 984 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 985 if (MO.isUse()) { 986 // If the physreg has no defs anywhere, it's just an ambient register 987 // and we can freely move its uses. Alternatively, if it's allocatable, 988 // it could get allocated to something with a def during allocation. 989 if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) 990 return false; 991 // Otherwise it's safe to move. 992 continue; 993 } else if (!MO.isDead()) { 994 // A def that isn't dead. We can't move it. 995 return false; 996 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 997 // If the reg is live into the loop, we can't hoist an instruction 998 // which would clobber it. 999 return false; 1000 } 1001 } 1002 1003 if (!MO.isUse()) 1004 continue; 1005 1006 assert(MRI->getVRegDef(Reg) && 1007 "Machine instr not mapped for this vreg?!"); 1008 1009 // If the loop contains the definition of an operand, then the instruction 1010 // isn't loop invariant. 1011 if (CurLoop->contains(MRI->getVRegDef(Reg))) 1012 return false; 1013 } 1014 1015 // If we got this far, the instruction is loop invariant! 1016 return true; 1017 } 1018 1019 1020 /// HasLoopPHIUse - Return true if the specified instruction is used by a 1021 /// phi node and hoisting it could cause a copy to be inserted. 1022 bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { 1023 SmallVector<const MachineInstr*, 8> Work(1, MI); 1024 do { 1025 MI = Work.pop_back_val(); 1026 for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { 1027 if (!MO->isReg() || !MO->isDef()) 1028 continue; 1029 unsigned Reg = MO->getReg(); 1030 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1031 continue; 1032 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 1033 // A PHI may cause a copy to be inserted. 1034 if (UseMI.isPHI()) { 1035 // A PHI inside the loop causes a copy because the live range of Reg is 1036 // extended across the PHI. 1037 if (CurLoop->contains(&UseMI)) 1038 return true; 1039 // A PHI in an exit block can cause a copy to be inserted if the PHI 1040 // has multiple predecessors in the loop with different values. 1041 // For now, approximate by rejecting all exit blocks. 1042 if (isExitBlock(UseMI.getParent())) 1043 return true; 1044 continue; 1045 } 1046 // Look past copies as well. 1047 if (UseMI.isCopy() && CurLoop->contains(&UseMI)) 1048 Work.push_back(&UseMI); 1049 } 1050 } 1051 } while (!Work.empty()); 1052 return false; 1053 } 1054 1055 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 1056 /// and an use in the current loop, return true if the target considered 1057 /// it 'high'. 1058 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, 1059 unsigned DefIdx, unsigned Reg) const { 1060 if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg)) 1061 return false; 1062 1063 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { 1064 if (UseMI.isCopyLike()) 1065 continue; 1066 if (!CurLoop->contains(UseMI.getParent())) 1067 continue; 1068 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) { 1069 const MachineOperand &MO = UseMI.getOperand(i); 1070 if (!MO.isReg() || !MO.isUse()) 1071 continue; 1072 unsigned MOReg = MO.getReg(); 1073 if (MOReg != Reg) 1074 continue; 1075 1076 if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, &UseMI, i)) 1077 return true; 1078 } 1079 1080 // Only look at the first in loop use. 1081 break; 1082 } 1083 1084 return false; 1085 } 1086 1087 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or 1088 /// the operand latency between its def and a use is one or less. 1089 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { 1090 if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike()) 1091 return true; 1092 if (!InstrItins || InstrItins->isEmpty()) 1093 return false; 1094 1095 bool isCheap = false; 1096 unsigned NumDefs = MI.getDesc().getNumDefs(); 1097 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 1098 MachineOperand &DefMO = MI.getOperand(i); 1099 if (!DefMO.isReg() || !DefMO.isDef()) 1100 continue; 1101 --NumDefs; 1102 unsigned Reg = DefMO.getReg(); 1103 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1104 continue; 1105 1106 if (!TII->hasLowDefLatency(InstrItins, &MI, i)) 1107 return false; 1108 isCheap = true; 1109 } 1110 1111 return isCheap; 1112 } 1113 1114 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check 1115 /// if hoisting an instruction of the given cost matrix can cause high 1116 /// register pressure. 1117 bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, 1118 bool CheapInstr) { 1119 for (const auto &ClassAndCost : Cost) { 1120 if (ClassAndCost.second <= 0) 1121 continue; 1122 1123 unsigned Class = ClassAndCost.first; 1124 int Limit = RegLimit[Class]; 1125 1126 // Don't hoist cheap instructions if they would increase register pressure, 1127 // even if we're under the limit. 1128 if (CheapInstr && !HoistCheapInsts) 1129 return true; 1130 1131 for (const auto &RP : BackTrace) 1132 if (static_cast<int>(RP[Class]) + ClassAndCost.second >= Limit) 1133 return true; 1134 } 1135 1136 return false; 1137 } 1138 1139 /// UpdateBackTraceRegPressure - Traverse the back trace from header to the 1140 /// current block and update their register pressures to reflect the effect 1141 /// of hoisting MI from the current block to the preheader. 1142 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { 1143 // First compute the 'cost' of the instruction, i.e. its contribution 1144 // to register pressure. 1145 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false, 1146 /*ConsiderUnseenAsDef=*/false); 1147 1148 // Update register pressure of blocks from loop header to current block. 1149 for (auto &RP : BackTrace) 1150 for (const auto &ClassAndCost : Cost) 1151 RP[ClassAndCost.first] += ClassAndCost.second; 1152 } 1153 1154 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist 1155 /// the given loop invariant. 1156 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 1157 if (MI.isImplicitDef()) 1158 return true; 1159 1160 // Besides removing computation from the loop, hoisting an instruction has 1161 // these effects: 1162 // 1163 // - The value defined by the instruction becomes live across the entire 1164 // loop. This increases register pressure in the loop. 1165 // 1166 // - If the value is used by a PHI in the loop, a copy will be required for 1167 // lowering the PHI after extending the live range. 1168 // 1169 // - When hoisting the last use of a value in the loop, that value no longer 1170 // needs to be live in the loop. This lowers register pressure in the loop. 1171 1172 bool CheapInstr = IsCheapInstruction(MI); 1173 bool CreatesCopy = HasLoopPHIUse(&MI); 1174 1175 // Don't hoist a cheap instruction if it would create a copy in the loop. 1176 if (CheapInstr && CreatesCopy) { 1177 DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); 1178 return false; 1179 } 1180 1181 // Rematerializable instructions should always be hoisted since the register 1182 // allocator can just pull them down again when needed. 1183 if (TII->isTriviallyReMaterializable(&MI, AA)) 1184 return true; 1185 1186 // FIXME: If there are long latency loop-invariant instructions inside the 1187 // loop at this point, why didn't the optimizer's LICM hoist them? 1188 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 1189 const MachineOperand &MO = MI.getOperand(i); 1190 if (!MO.isReg() || MO.isImplicit()) 1191 continue; 1192 unsigned Reg = MO.getReg(); 1193 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1194 continue; 1195 if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { 1196 DEBUG(dbgs() << "Hoist High Latency: " << MI); 1197 ++NumHighLatency; 1198 return true; 1199 } 1200 } 1201 1202 // Estimate register pressure to determine whether to LICM the instruction. 1203 // In low register pressure situation, we can be more aggressive about 1204 // hoisting. Also, favors hoisting long latency instructions even in 1205 // moderately high pressure situation. 1206 // Cheap instructions will only be hoisted if they don't increase register 1207 // pressure at all. 1208 auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false, 1209 /*ConsiderUnseenAsDef=*/false); 1210 1211 // Visit BBs from header to current BB, if hoisting this doesn't cause 1212 // high register pressure, then it's safe to proceed. 1213 if (!CanCauseHighRegPressure(Cost, CheapInstr)) { 1214 DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); 1215 ++NumLowRP; 1216 return true; 1217 } 1218 1219 // Don't risk increasing register pressure if it would create copies. 1220 if (CreatesCopy) { 1221 DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); 1222 return false; 1223 } 1224 1225 // Do not "speculate" in high register pressure situation. If an 1226 // instruction is not guaranteed to be executed in the loop, it's best to be 1227 // conservative. 1228 if (AvoidSpeculation && 1229 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { 1230 DEBUG(dbgs() << "Won't speculate: " << MI); 1231 return false; 1232 } 1233 1234 // High register pressure situation, only hoist if the instruction is going 1235 // to be remat'ed. 1236 if (!TII->isTriviallyReMaterializable(&MI, AA) && 1237 !MI.isInvariantLoad(AA)) { 1238 DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); 1239 return false; 1240 } 1241 1242 return true; 1243 } 1244 1245 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 1246 // Don't unfold simple loads. 1247 if (MI->canFoldAsLoad()) 1248 return nullptr; 1249 1250 // If not, we may be able to unfold a load and hoist that. 1251 // First test whether the instruction is loading from an amenable 1252 // memory location. 1253 if (!MI->isInvariantLoad(AA)) 1254 return nullptr; 1255 1256 // Next determine the register class for a temporary register. 1257 unsigned LoadRegIndex; 1258 unsigned NewOpc = 1259 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 1260 /*UnfoldLoad=*/true, 1261 /*UnfoldStore=*/false, 1262 &LoadRegIndex); 1263 if (NewOpc == 0) return nullptr; 1264 const MCInstrDesc &MID = TII->get(NewOpc); 1265 if (MID.getNumDefs() != 1) return nullptr; 1266 MachineFunction &MF = *MI->getParent()->getParent(); 1267 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); 1268 // Ok, we're unfolding. Create a temporary register and do the unfold. 1269 unsigned Reg = MRI->createVirtualRegister(RC); 1270 1271 SmallVector<MachineInstr *, 2> NewMIs; 1272 bool Success = 1273 TII->unfoldMemoryOperand(MF, MI, Reg, 1274 /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 1275 NewMIs); 1276 (void)Success; 1277 assert(Success && 1278 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 1279 "succeeded!"); 1280 assert(NewMIs.size() == 2 && 1281 "Unfolded a load into multiple instructions!"); 1282 MachineBasicBlock *MBB = MI->getParent(); 1283 MachineBasicBlock::iterator Pos = MI; 1284 MBB->insert(Pos, NewMIs[0]); 1285 MBB->insert(Pos, NewMIs[1]); 1286 // If unfolding produced a load that wasn't loop-invariant or profitable to 1287 // hoist, discard the new instructions and bail. 1288 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 1289 NewMIs[0]->eraseFromParent(); 1290 NewMIs[1]->eraseFromParent(); 1291 return nullptr; 1292 } 1293 1294 // Update register pressure for the unfolded instruction. 1295 UpdateRegPressure(NewMIs[1]); 1296 1297 // Otherwise we successfully unfolded a load that we can hoist. 1298 MI->eraseFromParent(); 1299 return NewMIs[0]; 1300 } 1301 1302 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 1303 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 1304 const MachineInstr *MI = &*I; 1305 unsigned Opcode = MI->getOpcode(); 1306 CSEMap[Opcode].push_back(MI); 1307 } 1308 } 1309 1310 const MachineInstr* 1311 MachineLICM::LookForDuplicate(const MachineInstr *MI, 1312 std::vector<const MachineInstr*> &PrevMIs) { 1313 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 1314 const MachineInstr *PrevMI = PrevMIs[i]; 1315 if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr))) 1316 return PrevMI; 1317 } 1318 return nullptr; 1319 } 1320 1321 bool MachineLICM::EliminateCSE(MachineInstr *MI, 1322 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 1323 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1324 // the undef property onto uses. 1325 if (CI == CSEMap.end() || MI->isImplicitDef()) 1326 return false; 1327 1328 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 1329 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 1330 1331 // Replace virtual registers defined by MI by their counterparts defined 1332 // by Dup. 1333 SmallVector<unsigned, 2> Defs; 1334 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1335 const MachineOperand &MO = MI->getOperand(i); 1336 1337 // Physical registers may not differ here. 1338 assert((!MO.isReg() || MO.getReg() == 0 || 1339 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 1340 MO.getReg() == Dup->getOperand(i).getReg()) && 1341 "Instructions with different phys regs are not identical!"); 1342 1343 if (MO.isReg() && MO.isDef() && 1344 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 1345 Defs.push_back(i); 1346 } 1347 1348 SmallVector<const TargetRegisterClass*, 2> OrigRCs; 1349 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1350 unsigned Idx = Defs[i]; 1351 unsigned Reg = MI->getOperand(Idx).getReg(); 1352 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1353 OrigRCs.push_back(MRI->getRegClass(DupReg)); 1354 1355 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { 1356 // Restore old RCs if more than one defs. 1357 for (unsigned j = 0; j != i; ++j) 1358 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); 1359 return false; 1360 } 1361 } 1362 1363 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1364 unsigned Idx = Defs[i]; 1365 unsigned Reg = MI->getOperand(Idx).getReg(); 1366 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1367 MRI->replaceRegWith(Reg, DupReg); 1368 MRI->clearKillFlags(DupReg); 1369 } 1370 1371 MI->eraseFromParent(); 1372 ++NumCSEed; 1373 return true; 1374 } 1375 return false; 1376 } 1377 1378 /// MayCSE - Return true if the given instruction will be CSE'd if it's 1379 /// hoisted out of the loop. 1380 bool MachineLICM::MayCSE(MachineInstr *MI) { 1381 unsigned Opcode = MI->getOpcode(); 1382 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1383 CI = CSEMap.find(Opcode); 1384 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1385 // the undef property onto uses. 1386 if (CI == CSEMap.end() || MI->isImplicitDef()) 1387 return false; 1388 1389 return LookForDuplicate(MI, CI->second) != nullptr; 1390 } 1391 1392 /// Hoist - When an instruction is found to use only loop invariant operands 1393 /// that are safe to hoist, this instruction is called to do the dirty work. 1394 /// 1395 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 1396 // First check whether we should hoist this instruction. 1397 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 1398 // If not, try unfolding a hoistable load. 1399 MI = ExtractHoistableLoad(MI); 1400 if (!MI) return false; 1401 } 1402 1403 // Now move the instructions to the predecessor, inserting it before any 1404 // terminator instructions. 1405 DEBUG({ 1406 dbgs() << "Hoisting " << *MI; 1407 if (Preheader->getBasicBlock()) 1408 dbgs() << " to MachineBasicBlock " 1409 << Preheader->getName(); 1410 if (MI->getParent()->getBasicBlock()) 1411 dbgs() << " from MachineBasicBlock " 1412 << MI->getParent()->getName(); 1413 dbgs() << "\n"; 1414 }); 1415 1416 // If this is the first instruction being hoisted to the preheader, 1417 // initialize the CSE map with potential common expressions. 1418 if (FirstInLoop) { 1419 InitCSEMap(Preheader); 1420 FirstInLoop = false; 1421 } 1422 1423 // Look for opportunity to CSE the hoisted instruction. 1424 unsigned Opcode = MI->getOpcode(); 1425 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1426 CI = CSEMap.find(Opcode); 1427 if (!EliminateCSE(MI, CI)) { 1428 // Otherwise, splice the instruction to the preheader. 1429 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 1430 1431 // Update register pressure for BBs from header to this block. 1432 UpdateBackTraceRegPressure(MI); 1433 1434 // Clear the kill flags of any register this instruction defines, 1435 // since they may need to be live throughout the entire loop 1436 // rather than just live for part of it. 1437 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1438 MachineOperand &MO = MI->getOperand(i); 1439 if (MO.isReg() && MO.isDef() && !MO.isDead()) 1440 MRI->clearKillFlags(MO.getReg()); 1441 } 1442 1443 // Add to the CSE map. 1444 if (CI != CSEMap.end()) 1445 CI->second.push_back(MI); 1446 else 1447 CSEMap[Opcode].push_back(MI); 1448 } 1449 1450 ++NumHoisted; 1451 Changed = true; 1452 1453 return true; 1454 } 1455 1456 MachineBasicBlock *MachineLICM::getCurPreheader() { 1457 // Determine the block to which to hoist instructions. If we can't find a 1458 // suitable loop predecessor, we can't do any hoisting. 1459 1460 // If we've tried to get a preheader and failed, don't try again. 1461 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 1462 return nullptr; 1463 1464 if (!CurPreheader) { 1465 CurPreheader = CurLoop->getLoopPreheader(); 1466 if (!CurPreheader) { 1467 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 1468 if (!Pred) { 1469 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1470 return nullptr; 1471 } 1472 1473 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); 1474 if (!CurPreheader) { 1475 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1476 return nullptr; 1477 } 1478 } 1479 } 1480 return CurPreheader; 1481 } 1482