1 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs loop invariant code motion on machine instructions. We 11 // attempt to remove as much code from the body of a loop as possible. 12 // 13 // This pass does not attempt to throttle itself to limit register pressure. 14 // The register allocation phases are expected to perform rematerialization 15 // to recover when register pressure is high. 16 // 17 // This pass is not intended to be a replacement or a complete alternative 18 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple 19 // constructs that are not exposed before lowering and instruction selection. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #include "llvm/CodeGen/Passes.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/SmallSet.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/CodeGen/MachineDominators.h" 29 #include "llvm/CodeGen/MachineFrameInfo.h" 30 #include "llvm/CodeGen/MachineLoopInfo.h" 31 #include "llvm/CodeGen/MachineMemOperand.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/PseudoSourceValue.h" 34 #include "llvm/MC/MCInstrItineraries.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/Target/TargetInstrInfo.h" 39 #include "llvm/Target/TargetLowering.h" 40 #include "llvm/Target/TargetMachine.h" 41 #include "llvm/Target/TargetRegisterInfo.h" 42 #include "llvm/Target/TargetSubtargetInfo.h" 43 using namespace llvm; 44 45 #define DEBUG_TYPE "machine-licm" 46 47 static cl::opt<bool> 48 AvoidSpeculation("avoid-speculation", 49 cl::desc("MachineLICM should avoid speculation"), 50 cl::init(true), cl::Hidden); 51 52 STATISTIC(NumHoisted, 53 "Number of machine instructions hoisted out of loops"); 54 STATISTIC(NumLowRP, 55 "Number of instructions hoisted in low reg pressure situation"); 56 STATISTIC(NumHighLatency, 57 "Number of high latency instructions hoisted"); 58 STATISTIC(NumCSEed, 59 "Number of hoisted machine instructions CSEed"); 60 STATISTIC(NumPostRAHoisted, 61 "Number of machine instructions hoisted out of loops post regalloc"); 62 63 namespace { 64 class MachineLICM : public MachineFunctionPass { 65 const TargetMachine *TM; 66 const TargetInstrInfo *TII; 67 const TargetLoweringBase *TLI; 68 const TargetRegisterInfo *TRI; 69 const MachineFrameInfo *MFI; 70 MachineRegisterInfo *MRI; 71 const InstrItineraryData *InstrItins; 72 bool PreRegAlloc; 73 74 // Various analyses that we use... 75 AliasAnalysis *AA; // Alias analysis info. 76 MachineLoopInfo *MLI; // Current MachineLoopInfo 77 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 78 79 // State that is updated as we process loops 80 bool Changed; // True if a loop is changed. 81 bool FirstInLoop; // True if it's the first LICM in the loop. 82 MachineLoop *CurLoop; // The current loop we are working on. 83 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 84 85 // Exit blocks for CurLoop. 86 SmallVector<MachineBasicBlock*, 8> ExitBlocks; 87 88 bool isExitBlock(const MachineBasicBlock *MBB) const { 89 return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != 90 ExitBlocks.end(); 91 } 92 93 // Track 'estimated' register pressure. 94 SmallSet<unsigned, 32> RegSeen; 95 SmallVector<unsigned, 8> RegPressure; 96 97 // Register pressure "limit" per register class. If the pressure 98 // is higher than the limit, then it's considered high. 99 SmallVector<unsigned, 8> RegLimit; 100 101 // Register pressure on path leading from loop preheader to current BB. 102 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 103 104 // For each opcode, keep a list of potential CSE instructions. 105 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 106 107 enum { 108 SpeculateFalse = 0, 109 SpeculateTrue = 1, 110 SpeculateUnknown = 2 111 }; 112 113 // If a MBB does not dominate loop exiting blocks then it may not safe 114 // to hoist loads from this block. 115 // Tri-state: 0 - false, 1 - true, 2 - unknown 116 unsigned SpeculationState; 117 118 public: 119 static char ID; // Pass identification, replacement for typeid 120 MachineLICM() : 121 MachineFunctionPass(ID), PreRegAlloc(true) { 122 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 123 } 124 125 explicit MachineLICM(bool PreRA) : 126 MachineFunctionPass(ID), PreRegAlloc(PreRA) { 127 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 128 } 129 130 bool runOnMachineFunction(MachineFunction &MF) override; 131 132 void getAnalysisUsage(AnalysisUsage &AU) const override { 133 AU.addRequired<MachineLoopInfo>(); 134 AU.addRequired<MachineDominatorTree>(); 135 AU.addRequired<AliasAnalysis>(); 136 AU.addPreserved<MachineLoopInfo>(); 137 AU.addPreserved<MachineDominatorTree>(); 138 MachineFunctionPass::getAnalysisUsage(AU); 139 } 140 141 void releaseMemory() override { 142 RegSeen.clear(); 143 RegPressure.clear(); 144 RegLimit.clear(); 145 BackTrace.clear(); 146 CSEMap.clear(); 147 } 148 149 private: 150 /// CandidateInfo - Keep track of information about hoisting candidates. 151 struct CandidateInfo { 152 MachineInstr *MI; 153 unsigned Def; 154 int FI; 155 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 156 : MI(mi), Def(def), FI(fi) {} 157 }; 158 159 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 160 /// invariants out to the preheader. 161 void HoistRegionPostRA(); 162 163 /// HoistPostRA - When an instruction is found to only use loop invariant 164 /// operands that is safe to hoist, this instruction is called to do the 165 /// dirty work. 166 void HoistPostRA(MachineInstr *MI, unsigned Def); 167 168 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 169 /// gather register def and frame object update information. 170 void ProcessMI(MachineInstr *MI, 171 BitVector &PhysRegDefs, 172 BitVector &PhysRegClobbers, 173 SmallSet<int, 32> &StoredFIs, 174 SmallVectorImpl<CandidateInfo> &Candidates); 175 176 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 177 /// current loop. 178 void AddToLiveIns(unsigned Reg); 179 180 /// IsLICMCandidate - Returns true if the instruction may be a suitable 181 /// candidate for LICM. e.g. If the instruction is a call, then it's 182 /// obviously not safe to hoist it. 183 bool IsLICMCandidate(MachineInstr &I); 184 185 /// IsLoopInvariantInst - Returns true if the instruction is loop 186 /// invariant. I.e., all virtual register operands are defined outside of 187 /// the loop, physical registers aren't accessed (explicitly or implicitly), 188 /// and the instruction is hoistable. 189 /// 190 bool IsLoopInvariantInst(MachineInstr &I); 191 192 /// HasLoopPHIUse - Return true if the specified instruction is used by any 193 /// phi node in the current loop. 194 bool HasLoopPHIUse(const MachineInstr *MI) const; 195 196 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 197 /// and an use in the current loop, return true if the target considered 198 /// it 'high'. 199 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, 200 unsigned Reg) const; 201 202 bool IsCheapInstruction(MachineInstr &MI) const; 203 204 /// CanCauseHighRegPressure - Visit BBs from header to current BB, 205 /// check if hoisting an instruction of the given cost matrix can cause high 206 /// register pressure. 207 bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap); 208 209 /// UpdateBackTraceRegPressure - Traverse the back trace from header to 210 /// the current block and update their register pressures to reflect the 211 /// effect of hoisting MI from the current block to the preheader. 212 void UpdateBackTraceRegPressure(const MachineInstr *MI); 213 214 /// IsProfitableToHoist - Return true if it is potentially profitable to 215 /// hoist the given loop invariant. 216 bool IsProfitableToHoist(MachineInstr &MI); 217 218 /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 219 /// If not then a load from this mbb may not be safe to hoist. 220 bool IsGuaranteedToExecute(MachineBasicBlock *BB); 221 222 void EnterScope(MachineBasicBlock *MBB); 223 224 void ExitScope(MachineBasicBlock *MBB); 225 226 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given 227 /// dominator tree node if its a leaf or all of its children are done. Walk 228 /// up the dominator tree to destroy ancestors which are now done. 229 void ExitScopeIfDone(MachineDomTreeNode *Node, 230 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 231 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 232 233 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 234 /// blocks dominated by the specified header block, and that are in the 235 /// current loop) in depth first order w.r.t the DominatorTree. This allows 236 /// us to visit definitions before uses, allowing us to hoist a loop body in 237 /// one pass without iteration. 238 /// 239 void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); 240 void HoistRegion(MachineDomTreeNode *N, bool IsHeader); 241 242 /// getRegisterClassIDAndCost - For a given MI, register, and the operand 243 /// index, return the ID and cost of its representative register class by 244 /// reference. 245 void getRegisterClassIDAndCost(const MachineInstr *MI, 246 unsigned Reg, unsigned OpIdx, 247 unsigned &RCId, unsigned &RCCost) const; 248 249 /// InitRegPressure - Find all virtual register references that are liveout 250 /// of the preheader to initialize the starting "register pressure". Note 251 /// this does not count live through (livein but not used) registers. 252 void InitRegPressure(MachineBasicBlock *BB); 253 254 /// UpdateRegPressure - Update estimate of register pressure after the 255 /// specified instruction. 256 void UpdateRegPressure(const MachineInstr *MI); 257 258 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if 259 /// the load itself could be hoisted. Return the unfolded and hoistable 260 /// load, or null if the load couldn't be unfolded or if it wouldn't 261 /// be hoistable. 262 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 263 264 /// LookForDuplicate - Find an instruction amount PrevMIs that is a 265 /// duplicate of MI. Return this instruction if it's found. 266 const MachineInstr *LookForDuplicate(const MachineInstr *MI, 267 std::vector<const MachineInstr*> &PrevMIs); 268 269 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on 270 /// the preheader that compute the same value. If it's found, do a RAU on 271 /// with the definition of the existing instruction rather than hoisting 272 /// the instruction to the preheader. 273 bool EliminateCSE(MachineInstr *MI, 274 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); 275 276 /// MayCSE - Return true if the given instruction will be CSE'd if it's 277 /// hoisted out of the loop. 278 bool MayCSE(MachineInstr *MI); 279 280 /// Hoist - When an instruction is found to only use loop invariant operands 281 /// that is safe to hoist, this instruction is called to do the dirty work. 282 /// It returns true if the instruction is hoisted. 283 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 284 285 /// InitCSEMap - Initialize the CSE map with instructions that are in the 286 /// current loop preheader that may become duplicates of instructions that 287 /// are hoisted out of the loop. 288 void InitCSEMap(MachineBasicBlock *BB); 289 290 /// getCurPreheader - Get the preheader for the current loop, splitting 291 /// a critical edge if needed. 292 MachineBasicBlock *getCurPreheader(); 293 }; 294 } // end anonymous namespace 295 296 char MachineLICM::ID = 0; 297 char &llvm::MachineLICMID = MachineLICM::ID; 298 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", 299 "Machine Loop Invariant Code Motion", false, false) 300 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 301 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 302 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 303 INITIALIZE_PASS_END(MachineLICM, "machinelicm", 304 "Machine Loop Invariant Code Motion", false, false) 305 306 /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most 307 /// loop that has a unique predecessor. 308 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 309 // Check whether this loop even has a unique predecessor. 310 if (!CurLoop->getLoopPredecessor()) 311 return false; 312 // Ok, now check to see if any of its outer loops do. 313 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 314 if (L->getLoopPredecessor()) 315 return false; 316 // None of them did, so this is the outermost with a unique predecessor. 317 return true; 318 } 319 320 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 321 if (skipOptnoneFunction(*MF.getFunction())) 322 return false; 323 324 Changed = FirstInLoop = false; 325 TM = &MF.getTarget(); 326 TII = TM->getSubtargetImpl()->getInstrInfo(); 327 TLI = TM->getSubtargetImpl()->getTargetLowering(); 328 TRI = TM->getSubtargetImpl()->getRegisterInfo(); 329 MFI = MF.getFrameInfo(); 330 MRI = &MF.getRegInfo(); 331 InstrItins = TM->getSubtargetImpl()->getInstrItineraryData(); 332 333 PreRegAlloc = MRI->isSSA(); 334 335 if (PreRegAlloc) 336 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 337 else 338 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 339 DEBUG(dbgs() << MF.getName() << " ********\n"); 340 341 if (PreRegAlloc) { 342 // Estimate register pressure during pre-regalloc pass. 343 unsigned NumRC = TRI->getNumRegClasses(); 344 RegPressure.resize(NumRC); 345 std::fill(RegPressure.begin(), RegPressure.end(), 0); 346 RegLimit.resize(NumRC); 347 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 348 E = TRI->regclass_end(); I != E; ++I) 349 RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); 350 } 351 352 // Get our Loop information... 353 MLI = &getAnalysis<MachineLoopInfo>(); 354 DT = &getAnalysis<MachineDominatorTree>(); 355 AA = &getAnalysis<AliasAnalysis>(); 356 357 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 358 while (!Worklist.empty()) { 359 CurLoop = Worklist.pop_back_val(); 360 CurPreheader = nullptr; 361 ExitBlocks.clear(); 362 363 // If this is done before regalloc, only visit outer-most preheader-sporting 364 // loops. 365 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 366 Worklist.append(CurLoop->begin(), CurLoop->end()); 367 continue; 368 } 369 370 CurLoop->getExitBlocks(ExitBlocks); 371 372 if (!PreRegAlloc) 373 HoistRegionPostRA(); 374 else { 375 // CSEMap is initialized for loop header when the first instruction is 376 // being hoisted. 377 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 378 FirstInLoop = true; 379 HoistOutOfLoop(N); 380 CSEMap.clear(); 381 } 382 } 383 384 return Changed; 385 } 386 387 /// InstructionStoresToFI - Return true if instruction stores to the 388 /// specified frame. 389 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 390 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 391 oe = MI->memoperands_end(); o != oe; ++o) { 392 if (!(*o)->isStore() || !(*o)->getPseudoValue()) 393 continue; 394 if (const FixedStackPseudoSourceValue *Value = 395 dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) { 396 if (Value->getFrameIndex() == FI) 397 return true; 398 } 399 } 400 return false; 401 } 402 403 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 404 /// gather register def and frame object update information. 405 void MachineLICM::ProcessMI(MachineInstr *MI, 406 BitVector &PhysRegDefs, 407 BitVector &PhysRegClobbers, 408 SmallSet<int, 32> &StoredFIs, 409 SmallVectorImpl<CandidateInfo> &Candidates) { 410 bool RuledOut = false; 411 bool HasNonInvariantUse = false; 412 unsigned Def = 0; 413 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 414 const MachineOperand &MO = MI->getOperand(i); 415 if (MO.isFI()) { 416 // Remember if the instruction stores to the frame index. 417 int FI = MO.getIndex(); 418 if (!StoredFIs.count(FI) && 419 MFI->isSpillSlotObjectIndex(FI) && 420 InstructionStoresToFI(MI, FI)) 421 StoredFIs.insert(FI); 422 HasNonInvariantUse = true; 423 continue; 424 } 425 426 // We can't hoist an instruction defining a physreg that is clobbered in 427 // the loop. 428 if (MO.isRegMask()) { 429 PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); 430 continue; 431 } 432 433 if (!MO.isReg()) 434 continue; 435 unsigned Reg = MO.getReg(); 436 if (!Reg) 437 continue; 438 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 439 "Not expecting virtual register!"); 440 441 if (!MO.isDef()) { 442 if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) 443 // If it's using a non-loop-invariant register, then it's obviously not 444 // safe to hoist. 445 HasNonInvariantUse = true; 446 continue; 447 } 448 449 if (MO.isImplicit()) { 450 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 451 PhysRegClobbers.set(*AI); 452 if (!MO.isDead()) 453 // Non-dead implicit def? This cannot be hoisted. 454 RuledOut = true; 455 // No need to check if a dead implicit def is also defined by 456 // another instruction. 457 continue; 458 } 459 460 // FIXME: For now, avoid instructions with multiple defs, unless 461 // it's a dead implicit def. 462 if (Def) 463 RuledOut = true; 464 else 465 Def = Reg; 466 467 // If we have already seen another instruction that defines the same 468 // register, then this is not safe. Two defs is indicated by setting a 469 // PhysRegClobbers bit. 470 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { 471 if (PhysRegDefs.test(*AS)) 472 PhysRegClobbers.set(*AS); 473 PhysRegDefs.set(*AS); 474 } 475 if (PhysRegClobbers.test(Reg)) 476 // MI defined register is seen defined by another instruction in 477 // the loop, it cannot be a LICM candidate. 478 RuledOut = true; 479 } 480 481 // Only consider reloads for now and remats which do not have register 482 // operands. FIXME: Consider unfold load folding instructions. 483 if (Def && !RuledOut) { 484 int FI = INT_MIN; 485 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 486 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 487 Candidates.push_back(CandidateInfo(MI, Def, FI)); 488 } 489 } 490 491 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 492 /// invariants out to the preheader. 493 void MachineLICM::HoistRegionPostRA() { 494 MachineBasicBlock *Preheader = getCurPreheader(); 495 if (!Preheader) 496 return; 497 498 unsigned NumRegs = TRI->getNumRegs(); 499 BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. 500 BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. 501 502 SmallVector<CandidateInfo, 32> Candidates; 503 SmallSet<int, 32> StoredFIs; 504 505 // Walk the entire region, count number of defs for each register, and 506 // collect potential LICM candidates. 507 const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 508 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 509 MachineBasicBlock *BB = Blocks[i]; 510 511 // If the header of the loop containing this basic block is a landing pad, 512 // then don't try to hoist instructions out of this loop. 513 const MachineLoop *ML = MLI->getLoopFor(BB); 514 if (ML && ML->getHeader()->isLandingPad()) continue; 515 516 // Conservatively treat live-in's as an external def. 517 // FIXME: That means a reload that're reused in successor block(s) will not 518 // be LICM'ed. 519 for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), 520 E = BB->livein_end(); I != E; ++I) { 521 unsigned Reg = *I; 522 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 523 PhysRegDefs.set(*AI); 524 } 525 526 SpeculationState = SpeculateUnknown; 527 for (MachineBasicBlock::iterator 528 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 529 MachineInstr *MI = &*MII; 530 ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); 531 } 532 } 533 534 // Gather the registers read / clobbered by the terminator. 535 BitVector TermRegs(NumRegs); 536 MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); 537 if (TI != Preheader->end()) { 538 for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { 539 const MachineOperand &MO = TI->getOperand(i); 540 if (!MO.isReg()) 541 continue; 542 unsigned Reg = MO.getReg(); 543 if (!Reg) 544 continue; 545 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 546 TermRegs.set(*AI); 547 } 548 } 549 550 // Now evaluate whether the potential candidates qualify. 551 // 1. Check if the candidate defined register is defined by another 552 // instruction in the loop. 553 // 2. If the candidate is a load from stack slot (always true for now), 554 // check if the slot is stored anywhere in the loop. 555 // 3. Make sure candidate def should not clobber 556 // registers read by the terminator. Similarly its def should not be 557 // clobbered by the terminator. 558 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 559 if (Candidates[i].FI != INT_MIN && 560 StoredFIs.count(Candidates[i].FI)) 561 continue; 562 563 unsigned Def = Candidates[i].Def; 564 if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { 565 bool Safe = true; 566 MachineInstr *MI = Candidates[i].MI; 567 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 568 const MachineOperand &MO = MI->getOperand(j); 569 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 570 continue; 571 unsigned Reg = MO.getReg(); 572 if (PhysRegDefs.test(Reg) || 573 PhysRegClobbers.test(Reg)) { 574 // If it's using a non-loop-invariant register, then it's obviously 575 // not safe to hoist. 576 Safe = false; 577 break; 578 } 579 } 580 if (Safe) 581 HoistPostRA(MI, Candidates[i].Def); 582 } 583 } 584 } 585 586 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current 587 /// loop, and make sure it is not killed by any instructions in the loop. 588 void MachineLICM::AddToLiveIns(unsigned Reg) { 589 const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 590 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 591 MachineBasicBlock *BB = Blocks[i]; 592 if (!BB->isLiveIn(Reg)) 593 BB->addLiveIn(Reg); 594 for (MachineBasicBlock::iterator 595 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 596 MachineInstr *MI = &*MII; 597 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 598 MachineOperand &MO = MI->getOperand(i); 599 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 600 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 601 MO.setIsKill(false); 602 } 603 } 604 } 605 } 606 607 /// HoistPostRA - When an instruction is found to only use loop invariant 608 /// operands that is safe to hoist, this instruction is called to do the 609 /// dirty work. 610 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 611 MachineBasicBlock *Preheader = getCurPreheader(); 612 613 // Now move the instructions to the predecessor, inserting it before any 614 // terminator instructions. 615 DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" 616 << MI->getParent()->getNumber() << ": " << *MI); 617 618 // Splice the instruction to the preheader. 619 MachineBasicBlock *MBB = MI->getParent(); 620 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 621 622 // Add register to livein list to all the BBs in the current loop since a 623 // loop invariant must be kept live throughout the whole loop. This is 624 // important to ensure later passes do not scavenge the def register. 625 AddToLiveIns(Def); 626 627 ++NumPostRAHoisted; 628 Changed = true; 629 } 630 631 // IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 632 // If not then a load from this mbb may not be safe to hoist. 633 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { 634 if (SpeculationState != SpeculateUnknown) 635 return SpeculationState == SpeculateFalse; 636 637 if (BB != CurLoop->getHeader()) { 638 // Check loop exiting blocks. 639 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; 640 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); 641 for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) 642 if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { 643 SpeculationState = SpeculateTrue; 644 return false; 645 } 646 } 647 648 SpeculationState = SpeculateFalse; 649 return true; 650 } 651 652 void MachineLICM::EnterScope(MachineBasicBlock *MBB) { 653 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 654 655 // Remember livein register pressure. 656 BackTrace.push_back(RegPressure); 657 } 658 659 void MachineLICM::ExitScope(MachineBasicBlock *MBB) { 660 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 661 BackTrace.pop_back(); 662 } 663 664 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 665 /// dominator tree node if its a leaf or all of its children are done. Walk 666 /// up the dominator tree to destroy ancestors which are now done. 667 void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, 668 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 669 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 670 if (OpenChildren[Node]) 671 return; 672 673 // Pop scope. 674 ExitScope(Node->getBlock()); 675 676 // Now traverse upwards to pop ancestors whose offsprings are all done. 677 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 678 unsigned Left = --OpenChildren[Parent]; 679 if (Left != 0) 680 break; 681 ExitScope(Parent->getBlock()); 682 Node = Parent; 683 } 684 } 685 686 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 687 /// blocks dominated by the specified header block, and that are in the 688 /// current loop) in depth first order w.r.t the DominatorTree. This allows 689 /// us to visit definitions before uses, allowing us to hoist a loop body in 690 /// one pass without iteration. 691 /// 692 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { 693 SmallVector<MachineDomTreeNode*, 32> Scopes; 694 SmallVector<MachineDomTreeNode*, 8> WorkList; 695 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 696 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 697 698 // Perform a DFS walk to determine the order of visit. 699 WorkList.push_back(HeaderN); 700 do { 701 MachineDomTreeNode *Node = WorkList.pop_back_val(); 702 assert(Node && "Null dominator tree node?"); 703 MachineBasicBlock *BB = Node->getBlock(); 704 705 // If the header of the loop containing this basic block is a landing pad, 706 // then don't try to hoist instructions out of this loop. 707 const MachineLoop *ML = MLI->getLoopFor(BB); 708 if (ML && ML->getHeader()->isLandingPad()) 709 continue; 710 711 // If this subregion is not in the top level loop at all, exit. 712 if (!CurLoop->contains(BB)) 713 continue; 714 715 Scopes.push_back(Node); 716 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 717 unsigned NumChildren = Children.size(); 718 719 // Don't hoist things out of a large switch statement. This often causes 720 // code to be hoisted that wasn't going to be executed, and increases 721 // register pressure in a situation where it's likely to matter. 722 if (BB->succ_size() >= 25) 723 NumChildren = 0; 724 725 OpenChildren[Node] = NumChildren; 726 // Add children in reverse order as then the next popped worklist node is 727 // the first child of this node. This means we ultimately traverse the 728 // DOM tree in exactly the same order as if we'd recursed. 729 for (int i = (int)NumChildren-1; i >= 0; --i) { 730 MachineDomTreeNode *Child = Children[i]; 731 ParentMap[Child] = Node; 732 WorkList.push_back(Child); 733 } 734 } while (!WorkList.empty()); 735 736 if (Scopes.size() != 0) { 737 MachineBasicBlock *Preheader = getCurPreheader(); 738 if (!Preheader) 739 return; 740 741 // Compute registers which are livein into the loop headers. 742 RegSeen.clear(); 743 BackTrace.clear(); 744 InitRegPressure(Preheader); 745 } 746 747 // Now perform LICM. 748 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 749 MachineDomTreeNode *Node = Scopes[i]; 750 MachineBasicBlock *MBB = Node->getBlock(); 751 752 MachineBasicBlock *Preheader = getCurPreheader(); 753 if (!Preheader) 754 continue; 755 756 EnterScope(MBB); 757 758 // Process the block 759 SpeculationState = SpeculateUnknown; 760 for (MachineBasicBlock::iterator 761 MII = MBB->begin(), E = MBB->end(); MII != E; ) { 762 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 763 MachineInstr *MI = &*MII; 764 if (!Hoist(MI, Preheader)) 765 UpdateRegPressure(MI); 766 MII = NextMII; 767 } 768 769 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 770 ExitScopeIfDone(Node, OpenChildren, ParentMap); 771 } 772 } 773 774 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { 775 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); 776 } 777 778 /// getRegisterClassIDAndCost - For a given MI, register, and the operand 779 /// index, return the ID and cost of its representative register class. 780 void 781 MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, 782 unsigned Reg, unsigned OpIdx, 783 unsigned &RCId, unsigned &RCCost) const { 784 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 785 MVT VT = *RC->vt_begin(); 786 if (VT == MVT::Untyped) { 787 RCId = RC->getID(); 788 RCCost = 1; 789 } else { 790 RCId = TLI->getRepRegClassFor(VT)->getID(); 791 RCCost = TLI->getRepRegClassCostFor(VT); 792 } 793 } 794 795 /// InitRegPressure - Find all virtual register references that are liveout of 796 /// the preheader to initialize the starting "register pressure". Note this 797 /// does not count live through (livein but not used) registers. 798 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { 799 std::fill(RegPressure.begin(), RegPressure.end(), 0); 800 801 // If the preheader has only a single predecessor and it ends with a 802 // fallthrough or an unconditional branch, then scan its predecessor for live 803 // defs as well. This happens whenever the preheader is created by splitting 804 // the critical edge from the loop predecessor to the loop header. 805 if (BB->pred_size() == 1) { 806 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 807 SmallVector<MachineOperand, 4> Cond; 808 if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) 809 InitRegPressure(*BB->pred_begin()); 810 } 811 812 for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); 813 MII != E; ++MII) { 814 MachineInstr *MI = &*MII; 815 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 816 const MachineOperand &MO = MI->getOperand(i); 817 if (!MO.isReg() || MO.isImplicit()) 818 continue; 819 unsigned Reg = MO.getReg(); 820 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 821 continue; 822 823 bool isNew = RegSeen.insert(Reg); 824 unsigned RCId, RCCost; 825 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 826 if (MO.isDef()) 827 RegPressure[RCId] += RCCost; 828 else { 829 bool isKill = isOperandKill(MO, MRI); 830 if (isNew && !isKill) 831 // Haven't seen this, it must be a livein. 832 RegPressure[RCId] += RCCost; 833 else if (!isNew && isKill) 834 RegPressure[RCId] -= RCCost; 835 } 836 } 837 } 838 } 839 840 /// UpdateRegPressure - Update estimate of register pressure after the 841 /// specified instruction. 842 void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { 843 if (MI->isImplicitDef()) 844 return; 845 846 SmallVector<unsigned, 4> Defs; 847 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 848 const MachineOperand &MO = MI->getOperand(i); 849 if (!MO.isReg() || MO.isImplicit()) 850 continue; 851 unsigned Reg = MO.getReg(); 852 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 853 continue; 854 855 bool isNew = RegSeen.insert(Reg); 856 if (MO.isDef()) 857 Defs.push_back(Reg); 858 else if (!isNew && isOperandKill(MO, MRI)) { 859 unsigned RCId, RCCost; 860 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 861 if (RCCost > RegPressure[RCId]) 862 RegPressure[RCId] = 0; 863 else 864 RegPressure[RCId] -= RCCost; 865 } 866 } 867 868 unsigned Idx = 0; 869 while (!Defs.empty()) { 870 unsigned Reg = Defs.pop_back_val(); 871 unsigned RCId, RCCost; 872 getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost); 873 RegPressure[RCId] += RCCost; 874 ++Idx; 875 } 876 } 877 878 /// isLoadFromGOTOrConstantPool - Return true if this machine instruction 879 /// loads from global offset table or constant pool. 880 static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { 881 assert (MI.mayLoad() && "Expected MI that loads!"); 882 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), 883 E = MI.memoperands_end(); I != E; ++I) { 884 if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) { 885 if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) 886 return true; 887 } 888 } 889 return false; 890 } 891 892 /// IsLICMCandidate - Returns true if the instruction may be a suitable 893 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously 894 /// not safe to hoist it. 895 bool MachineLICM::IsLICMCandidate(MachineInstr &I) { 896 // Check if it's safe to move the instruction. 897 bool DontMoveAcrossStore = true; 898 if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) 899 return false; 900 901 // If it is load then check if it is guaranteed to execute by making sure that 902 // it dominates all exiting blocks. If it doesn't, then there is a path out of 903 // the loop which does not execute this load, so we can't hoist it. Loads 904 // from constant memory are not safe to speculate all the time, for example 905 // indexed load from a jump table. 906 // Stores and side effects are already checked by isSafeToMove. 907 if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && 908 !IsGuaranteedToExecute(I.getParent())) 909 return false; 910 911 return true; 912 } 913 914 /// IsLoopInvariantInst - Returns true if the instruction is loop 915 /// invariant. I.e., all virtual register operands are defined outside of the 916 /// loop, physical registers aren't accessed explicitly, and there are no side 917 /// effects that aren't captured by the operands or other flags. 918 /// 919 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 920 if (!IsLICMCandidate(I)) 921 return false; 922 923 // The instruction is loop invariant if all of its operands are. 924 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 925 const MachineOperand &MO = I.getOperand(i); 926 927 if (!MO.isReg()) 928 continue; 929 930 unsigned Reg = MO.getReg(); 931 if (Reg == 0) continue; 932 933 // Don't hoist an instruction that uses or defines a physical register. 934 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 935 if (MO.isUse()) { 936 // If the physreg has no defs anywhere, it's just an ambient register 937 // and we can freely move its uses. Alternatively, if it's allocatable, 938 // it could get allocated to something with a def during allocation. 939 if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) 940 return false; 941 // Otherwise it's safe to move. 942 continue; 943 } else if (!MO.isDead()) { 944 // A def that isn't dead. We can't move it. 945 return false; 946 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 947 // If the reg is live into the loop, we can't hoist an instruction 948 // which would clobber it. 949 return false; 950 } 951 } 952 953 if (!MO.isUse()) 954 continue; 955 956 assert(MRI->getVRegDef(Reg) && 957 "Machine instr not mapped for this vreg?!"); 958 959 // If the loop contains the definition of an operand, then the instruction 960 // isn't loop invariant. 961 if (CurLoop->contains(MRI->getVRegDef(Reg))) 962 return false; 963 } 964 965 // If we got this far, the instruction is loop invariant! 966 return true; 967 } 968 969 970 /// HasLoopPHIUse - Return true if the specified instruction is used by a 971 /// phi node and hoisting it could cause a copy to be inserted. 972 bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { 973 SmallVector<const MachineInstr*, 8> Work(1, MI); 974 do { 975 MI = Work.pop_back_val(); 976 for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { 977 if (!MO->isReg() || !MO->isDef()) 978 continue; 979 unsigned Reg = MO->getReg(); 980 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 981 continue; 982 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 983 // A PHI may cause a copy to be inserted. 984 if (UseMI.isPHI()) { 985 // A PHI inside the loop causes a copy because the live range of Reg is 986 // extended across the PHI. 987 if (CurLoop->contains(&UseMI)) 988 return true; 989 // A PHI in an exit block can cause a copy to be inserted if the PHI 990 // has multiple predecessors in the loop with different values. 991 // For now, approximate by rejecting all exit blocks. 992 if (isExitBlock(UseMI.getParent())) 993 return true; 994 continue; 995 } 996 // Look past copies as well. 997 if (UseMI.isCopy() && CurLoop->contains(&UseMI)) 998 Work.push_back(&UseMI); 999 } 1000 } 1001 } while (!Work.empty()); 1002 return false; 1003 } 1004 1005 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 1006 /// and an use in the current loop, return true if the target considered 1007 /// it 'high'. 1008 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, 1009 unsigned DefIdx, unsigned Reg) const { 1010 if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg)) 1011 return false; 1012 1013 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { 1014 if (UseMI.isCopyLike()) 1015 continue; 1016 if (!CurLoop->contains(UseMI.getParent())) 1017 continue; 1018 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) { 1019 const MachineOperand &MO = UseMI.getOperand(i); 1020 if (!MO.isReg() || !MO.isUse()) 1021 continue; 1022 unsigned MOReg = MO.getReg(); 1023 if (MOReg != Reg) 1024 continue; 1025 1026 if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, &UseMI, i)) 1027 return true; 1028 } 1029 1030 // Only look at the first in loop use. 1031 break; 1032 } 1033 1034 return false; 1035 } 1036 1037 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or 1038 /// the operand latency between its def and a use is one or less. 1039 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { 1040 if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike()) 1041 return true; 1042 if (!InstrItins || InstrItins->isEmpty()) 1043 return false; 1044 1045 bool isCheap = false; 1046 unsigned NumDefs = MI.getDesc().getNumDefs(); 1047 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 1048 MachineOperand &DefMO = MI.getOperand(i); 1049 if (!DefMO.isReg() || !DefMO.isDef()) 1050 continue; 1051 --NumDefs; 1052 unsigned Reg = DefMO.getReg(); 1053 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1054 continue; 1055 1056 if (!TII->hasLowDefLatency(InstrItins, &MI, i)) 1057 return false; 1058 isCheap = true; 1059 } 1060 1061 return isCheap; 1062 } 1063 1064 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check 1065 /// if hoisting an instruction of the given cost matrix can cause high 1066 /// register pressure. 1067 bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, 1068 bool CheapInstr) { 1069 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 1070 CI != CE; ++CI) { 1071 if (CI->second <= 0) 1072 continue; 1073 1074 unsigned RCId = CI->first; 1075 unsigned Limit = RegLimit[RCId]; 1076 int Cost = CI->second; 1077 1078 // Don't hoist cheap instructions if they would increase register pressure, 1079 // even if we're under the limit. 1080 if (CheapInstr) 1081 return true; 1082 1083 for (unsigned i = BackTrace.size(); i != 0; --i) { 1084 SmallVectorImpl<unsigned> &RP = BackTrace[i-1]; 1085 if (RP[RCId] + Cost >= Limit) 1086 return true; 1087 } 1088 } 1089 1090 return false; 1091 } 1092 1093 /// UpdateBackTraceRegPressure - Traverse the back trace from header to the 1094 /// current block and update their register pressures to reflect the effect 1095 /// of hoisting MI from the current block to the preheader. 1096 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { 1097 if (MI->isImplicitDef()) 1098 return; 1099 1100 // First compute the 'cost' of the instruction, i.e. its contribution 1101 // to register pressure. 1102 DenseMap<unsigned, int> Cost; 1103 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 1104 const MachineOperand &MO = MI->getOperand(i); 1105 if (!MO.isReg() || MO.isImplicit()) 1106 continue; 1107 unsigned Reg = MO.getReg(); 1108 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1109 continue; 1110 1111 unsigned RCId, RCCost; 1112 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 1113 if (MO.isDef()) { 1114 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1115 if (CI != Cost.end()) 1116 CI->second += RCCost; 1117 else 1118 Cost.insert(std::make_pair(RCId, RCCost)); 1119 } else if (isOperandKill(MO, MRI)) { 1120 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1121 if (CI != Cost.end()) 1122 CI->second -= RCCost; 1123 else 1124 Cost.insert(std::make_pair(RCId, -RCCost)); 1125 } 1126 } 1127 1128 // Update register pressure of blocks from loop header to current block. 1129 for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { 1130 SmallVectorImpl<unsigned> &RP = BackTrace[i]; 1131 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 1132 CI != CE; ++CI) { 1133 unsigned RCId = CI->first; 1134 RP[RCId] += CI->second; 1135 } 1136 } 1137 } 1138 1139 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist 1140 /// the given loop invariant. 1141 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 1142 if (MI.isImplicitDef()) 1143 return true; 1144 1145 // Besides removing computation from the loop, hoisting an instruction has 1146 // these effects: 1147 // 1148 // - The value defined by the instruction becomes live across the entire 1149 // loop. This increases register pressure in the loop. 1150 // 1151 // - If the value is used by a PHI in the loop, a copy will be required for 1152 // lowering the PHI after extending the live range. 1153 // 1154 // - When hoisting the last use of a value in the loop, that value no longer 1155 // needs to be live in the loop. This lowers register pressure in the loop. 1156 1157 bool CheapInstr = IsCheapInstruction(MI); 1158 bool CreatesCopy = HasLoopPHIUse(&MI); 1159 1160 // Don't hoist a cheap instruction if it would create a copy in the loop. 1161 if (CheapInstr && CreatesCopy) { 1162 DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); 1163 return false; 1164 } 1165 1166 // Rematerializable instructions should always be hoisted since the register 1167 // allocator can just pull them down again when needed. 1168 if (TII->isTriviallyReMaterializable(&MI, AA)) 1169 return true; 1170 1171 // Estimate register pressure to determine whether to LICM the instruction. 1172 // In low register pressure situation, we can be more aggressive about 1173 // hoisting. Also, favors hoisting long latency instructions even in 1174 // moderately high pressure situation. 1175 // Cheap instructions will only be hoisted if they don't increase register 1176 // pressure at all. 1177 // FIXME: If there are long latency loop-invariant instructions inside the 1178 // loop at this point, why didn't the optimizer's LICM hoist them? 1179 DenseMap<unsigned, int> Cost; 1180 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 1181 const MachineOperand &MO = MI.getOperand(i); 1182 if (!MO.isReg() || MO.isImplicit()) 1183 continue; 1184 unsigned Reg = MO.getReg(); 1185 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1186 continue; 1187 1188 unsigned RCId, RCCost; 1189 getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); 1190 if (MO.isDef()) { 1191 if (HasHighOperandLatency(MI, i, Reg)) { 1192 DEBUG(dbgs() << "Hoist High Latency: " << MI); 1193 ++NumHighLatency; 1194 return true; 1195 } 1196 Cost[RCId] += RCCost; 1197 } else if (isOperandKill(MO, MRI)) { 1198 // Is a virtual register use is a kill, hoisting it out of the loop 1199 // may actually reduce register pressure or be register pressure 1200 // neutral. 1201 Cost[RCId] -= RCCost; 1202 } 1203 } 1204 1205 // Visit BBs from header to current BB, if hoisting this doesn't cause 1206 // high register pressure, then it's safe to proceed. 1207 if (!CanCauseHighRegPressure(Cost, CheapInstr)) { 1208 DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); 1209 ++NumLowRP; 1210 return true; 1211 } 1212 1213 // Don't risk increasing register pressure if it would create copies. 1214 if (CreatesCopy) { 1215 DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); 1216 return false; 1217 } 1218 1219 // Do not "speculate" in high register pressure situation. If an 1220 // instruction is not guaranteed to be executed in the loop, it's best to be 1221 // conservative. 1222 if (AvoidSpeculation && 1223 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { 1224 DEBUG(dbgs() << "Won't speculate: " << MI); 1225 return false; 1226 } 1227 1228 // High register pressure situation, only hoist if the instruction is going 1229 // to be remat'ed. 1230 if (!TII->isTriviallyReMaterializable(&MI, AA) && 1231 !MI.isInvariantLoad(AA)) { 1232 DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); 1233 return false; 1234 } 1235 1236 return true; 1237 } 1238 1239 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 1240 // Don't unfold simple loads. 1241 if (MI->canFoldAsLoad()) 1242 return nullptr; 1243 1244 // If not, we may be able to unfold a load and hoist that. 1245 // First test whether the instruction is loading from an amenable 1246 // memory location. 1247 if (!MI->isInvariantLoad(AA)) 1248 return nullptr; 1249 1250 // Next determine the register class for a temporary register. 1251 unsigned LoadRegIndex; 1252 unsigned NewOpc = 1253 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 1254 /*UnfoldLoad=*/true, 1255 /*UnfoldStore=*/false, 1256 &LoadRegIndex); 1257 if (NewOpc == 0) return nullptr; 1258 const MCInstrDesc &MID = TII->get(NewOpc); 1259 if (MID.getNumDefs() != 1) return nullptr; 1260 MachineFunction &MF = *MI->getParent()->getParent(); 1261 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); 1262 // Ok, we're unfolding. Create a temporary register and do the unfold. 1263 unsigned Reg = MRI->createVirtualRegister(RC); 1264 1265 SmallVector<MachineInstr *, 2> NewMIs; 1266 bool Success = 1267 TII->unfoldMemoryOperand(MF, MI, Reg, 1268 /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 1269 NewMIs); 1270 (void)Success; 1271 assert(Success && 1272 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 1273 "succeeded!"); 1274 assert(NewMIs.size() == 2 && 1275 "Unfolded a load into multiple instructions!"); 1276 MachineBasicBlock *MBB = MI->getParent(); 1277 MachineBasicBlock::iterator Pos = MI; 1278 MBB->insert(Pos, NewMIs[0]); 1279 MBB->insert(Pos, NewMIs[1]); 1280 // If unfolding produced a load that wasn't loop-invariant or profitable to 1281 // hoist, discard the new instructions and bail. 1282 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 1283 NewMIs[0]->eraseFromParent(); 1284 NewMIs[1]->eraseFromParent(); 1285 return nullptr; 1286 } 1287 1288 // Update register pressure for the unfolded instruction. 1289 UpdateRegPressure(NewMIs[1]); 1290 1291 // Otherwise we successfully unfolded a load that we can hoist. 1292 MI->eraseFromParent(); 1293 return NewMIs[0]; 1294 } 1295 1296 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 1297 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 1298 const MachineInstr *MI = &*I; 1299 unsigned Opcode = MI->getOpcode(); 1300 CSEMap[Opcode].push_back(MI); 1301 } 1302 } 1303 1304 const MachineInstr* 1305 MachineLICM::LookForDuplicate(const MachineInstr *MI, 1306 std::vector<const MachineInstr*> &PrevMIs) { 1307 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 1308 const MachineInstr *PrevMI = PrevMIs[i]; 1309 if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr))) 1310 return PrevMI; 1311 } 1312 return nullptr; 1313 } 1314 1315 bool MachineLICM::EliminateCSE(MachineInstr *MI, 1316 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 1317 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1318 // the undef property onto uses. 1319 if (CI == CSEMap.end() || MI->isImplicitDef()) 1320 return false; 1321 1322 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 1323 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 1324 1325 // Replace virtual registers defined by MI by their counterparts defined 1326 // by Dup. 1327 SmallVector<unsigned, 2> Defs; 1328 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1329 const MachineOperand &MO = MI->getOperand(i); 1330 1331 // Physical registers may not differ here. 1332 assert((!MO.isReg() || MO.getReg() == 0 || 1333 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 1334 MO.getReg() == Dup->getOperand(i).getReg()) && 1335 "Instructions with different phys regs are not identical!"); 1336 1337 if (MO.isReg() && MO.isDef() && 1338 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 1339 Defs.push_back(i); 1340 } 1341 1342 SmallVector<const TargetRegisterClass*, 2> OrigRCs; 1343 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1344 unsigned Idx = Defs[i]; 1345 unsigned Reg = MI->getOperand(Idx).getReg(); 1346 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1347 OrigRCs.push_back(MRI->getRegClass(DupReg)); 1348 1349 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { 1350 // Restore old RCs if more than one defs. 1351 for (unsigned j = 0; j != i; ++j) 1352 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); 1353 return false; 1354 } 1355 } 1356 1357 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1358 unsigned Idx = Defs[i]; 1359 unsigned Reg = MI->getOperand(Idx).getReg(); 1360 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1361 MRI->replaceRegWith(Reg, DupReg); 1362 MRI->clearKillFlags(DupReg); 1363 } 1364 1365 MI->eraseFromParent(); 1366 ++NumCSEed; 1367 return true; 1368 } 1369 return false; 1370 } 1371 1372 /// MayCSE - Return true if the given instruction will be CSE'd if it's 1373 /// hoisted out of the loop. 1374 bool MachineLICM::MayCSE(MachineInstr *MI) { 1375 unsigned Opcode = MI->getOpcode(); 1376 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1377 CI = CSEMap.find(Opcode); 1378 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1379 // the undef property onto uses. 1380 if (CI == CSEMap.end() || MI->isImplicitDef()) 1381 return false; 1382 1383 return LookForDuplicate(MI, CI->second) != nullptr; 1384 } 1385 1386 /// Hoist - When an instruction is found to use only loop invariant operands 1387 /// that are safe to hoist, this instruction is called to do the dirty work. 1388 /// 1389 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 1390 // First check whether we should hoist this instruction. 1391 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 1392 // If not, try unfolding a hoistable load. 1393 MI = ExtractHoistableLoad(MI); 1394 if (!MI) return false; 1395 } 1396 1397 // Now move the instructions to the predecessor, inserting it before any 1398 // terminator instructions. 1399 DEBUG({ 1400 dbgs() << "Hoisting " << *MI; 1401 if (Preheader->getBasicBlock()) 1402 dbgs() << " to MachineBasicBlock " 1403 << Preheader->getName(); 1404 if (MI->getParent()->getBasicBlock()) 1405 dbgs() << " from MachineBasicBlock " 1406 << MI->getParent()->getName(); 1407 dbgs() << "\n"; 1408 }); 1409 1410 // If this is the first instruction being hoisted to the preheader, 1411 // initialize the CSE map with potential common expressions. 1412 if (FirstInLoop) { 1413 InitCSEMap(Preheader); 1414 FirstInLoop = false; 1415 } 1416 1417 // Look for opportunity to CSE the hoisted instruction. 1418 unsigned Opcode = MI->getOpcode(); 1419 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1420 CI = CSEMap.find(Opcode); 1421 if (!EliminateCSE(MI, CI)) { 1422 // Otherwise, splice the instruction to the preheader. 1423 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 1424 1425 // Update register pressure for BBs from header to this block. 1426 UpdateBackTraceRegPressure(MI); 1427 1428 // Clear the kill flags of any register this instruction defines, 1429 // since they may need to be live throughout the entire loop 1430 // rather than just live for part of it. 1431 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1432 MachineOperand &MO = MI->getOperand(i); 1433 if (MO.isReg() && MO.isDef() && !MO.isDead()) 1434 MRI->clearKillFlags(MO.getReg()); 1435 } 1436 1437 // Add to the CSE map. 1438 if (CI != CSEMap.end()) 1439 CI->second.push_back(MI); 1440 else 1441 CSEMap[Opcode].push_back(MI); 1442 } 1443 1444 ++NumHoisted; 1445 Changed = true; 1446 1447 return true; 1448 } 1449 1450 MachineBasicBlock *MachineLICM::getCurPreheader() { 1451 // Determine the block to which to hoist instructions. If we can't find a 1452 // suitable loop predecessor, we can't do any hoisting. 1453 1454 // If we've tried to get a preheader and failed, don't try again. 1455 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 1456 return nullptr; 1457 1458 if (!CurPreheader) { 1459 CurPreheader = CurLoop->getLoopPreheader(); 1460 if (!CurPreheader) { 1461 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 1462 if (!Pred) { 1463 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1464 return nullptr; 1465 } 1466 1467 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); 1468 if (!CurPreheader) { 1469 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1470 return nullptr; 1471 } 1472 } 1473 } 1474 return CurPreheader; 1475 } 1476