1 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs loop invariant code motion on machine instructions. We 11 // attempt to remove as much code from the body of a loop as possible. 12 // 13 // This pass does not attempt to throttle itself to limit register pressure. 14 // The register allocation phases are expected to perform rematerialization 15 // to recover when register pressure is high. 16 // 17 // This pass is not intended to be a replacement or a complete alternative 18 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple 19 // constructs that are not exposed before lowering and instruction selection. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #define DEBUG_TYPE "machine-licm" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/CodeGen/MachineDominators.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineLoopInfo.h" 28 #include "llvm/CodeGen/MachineMemOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/PseudoSourceValue.h" 31 #include "llvm/MC/MCInstrItineraries.h" 32 #include "llvm/Target/TargetLowering.h" 33 #include "llvm/Target/TargetRegisterInfo.h" 34 #include "llvm/Target/TargetInstrInfo.h" 35 #include "llvm/Target/TargetMachine.h" 36 #include "llvm/Analysis/AliasAnalysis.h" 37 #include "llvm/ADT/DenseMap.h" 38 #include "llvm/ADT/SmallSet.h" 39 #include "llvm/ADT/Statistic.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/raw_ostream.h" 43 using namespace llvm; 44 45 static cl::opt<bool> 46 AvoidSpeculation("avoid-speculation", 47 cl::desc("MachineLICM should avoid speculation"), 48 cl::init(true), cl::Hidden); 49 50 STATISTIC(NumHoisted, 51 "Number of machine instructions hoisted out of loops"); 52 STATISTIC(NumLowRP, 53 "Number of instructions hoisted in low reg pressure situation"); 54 STATISTIC(NumHighLatency, 55 "Number of high latency instructions hoisted"); 56 STATISTIC(NumCSEed, 57 "Number of hoisted machine instructions CSEed"); 58 STATISTIC(NumPostRAHoisted, 59 "Number of machine instructions hoisted out of loops post regalloc"); 60 61 namespace { 62 class MachineLICM : public MachineFunctionPass { 63 const TargetMachine *TM; 64 const TargetInstrInfo *TII; 65 const TargetLowering *TLI; 66 const TargetRegisterInfo *TRI; 67 const MachineFrameInfo *MFI; 68 MachineRegisterInfo *MRI; 69 const InstrItineraryData *InstrItins; 70 bool PreRegAlloc; 71 BitVector ReservedRegs; 72 73 // Various analyses that we use... 74 AliasAnalysis *AA; // Alias analysis info. 75 MachineLoopInfo *MLI; // Current MachineLoopInfo 76 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 77 78 // State that is updated as we process loops 79 bool Changed; // True if a loop is changed. 80 bool FirstInLoop; // True if it's the first LICM in the loop. 81 MachineLoop *CurLoop; // The current loop we are working on. 82 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 83 84 // Track 'estimated' register pressure. 85 SmallSet<unsigned, 32> RegSeen; 86 SmallVector<unsigned, 8> RegPressure; 87 88 // Register pressure "limit" per register class. If the pressure 89 // is higher than the limit, then it's considered high. 90 SmallVector<unsigned, 8> RegLimit; 91 92 // Register pressure on path leading from loop preheader to current BB. 93 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 94 95 // For each opcode, keep a list of potential CSE instructions. 96 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 97 98 enum { 99 SpeculateFalse = 0, 100 SpeculateTrue = 1, 101 SpeculateUnknown = 2 102 }; 103 104 // If a MBB does not dominate loop exiting blocks then it may not safe 105 // to hoist loads from this block. 106 // Tri-state: 0 - false, 1 - true, 2 - unknown 107 unsigned SpeculationState; 108 109 public: 110 static char ID; // Pass identification, replacement for typeid 111 MachineLICM() : 112 MachineFunctionPass(ID), PreRegAlloc(true) { 113 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 114 } 115 116 explicit MachineLICM(bool PreRA) : 117 MachineFunctionPass(ID), PreRegAlloc(PreRA) { 118 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 119 } 120 121 virtual bool runOnMachineFunction(MachineFunction &MF); 122 123 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 124 AU.addRequired<MachineLoopInfo>(); 125 AU.addRequired<MachineDominatorTree>(); 126 AU.addRequired<AliasAnalysis>(); 127 AU.addPreserved<MachineLoopInfo>(); 128 AU.addPreserved<MachineDominatorTree>(); 129 MachineFunctionPass::getAnalysisUsage(AU); 130 } 131 132 virtual void releaseMemory() { 133 RegSeen.clear(); 134 RegPressure.clear(); 135 RegLimit.clear(); 136 BackTrace.clear(); 137 for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator 138 CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) 139 CI->second.clear(); 140 CSEMap.clear(); 141 } 142 143 private: 144 /// CandidateInfo - Keep track of information about hoisting candidates. 145 struct CandidateInfo { 146 MachineInstr *MI; 147 unsigned Def; 148 int FI; 149 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 150 : MI(mi), Def(def), FI(fi) {} 151 }; 152 153 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 154 /// invariants out to the preheader. 155 void HoistRegionPostRA(); 156 157 /// HoistPostRA - When an instruction is found to only use loop invariant 158 /// operands that is safe to hoist, this instruction is called to do the 159 /// dirty work. 160 void HoistPostRA(MachineInstr *MI, unsigned Def); 161 162 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 163 /// gather register def and frame object update information. 164 void ProcessMI(MachineInstr *MI, 165 BitVector &PhysRegDefs, 166 BitVector &PhysRegClobbers, 167 SmallSet<int, 32> &StoredFIs, 168 SmallVector<CandidateInfo, 32> &Candidates); 169 170 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 171 /// current loop. 172 void AddToLiveIns(unsigned Reg); 173 174 /// IsLICMCandidate - Returns true if the instruction may be a suitable 175 /// candidate for LICM. e.g. If the instruction is a call, then it's 176 /// obviously not safe to hoist it. 177 bool IsLICMCandidate(MachineInstr &I); 178 179 /// IsLoopInvariantInst - Returns true if the instruction is loop 180 /// invariant. I.e., all virtual register operands are defined outside of 181 /// the loop, physical registers aren't accessed (explicitly or implicitly), 182 /// and the instruction is hoistable. 183 /// 184 bool IsLoopInvariantInst(MachineInstr &I); 185 186 /// HasAnyPHIUse - Return true if the specified register is used by any 187 /// phi node. 188 bool HasAnyPHIUse(unsigned Reg) const; 189 190 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 191 /// and an use in the current loop, return true if the target considered 192 /// it 'high'. 193 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, 194 unsigned Reg) const; 195 196 bool IsCheapInstruction(MachineInstr &MI) const; 197 198 /// CanCauseHighRegPressure - Visit BBs from header to current BB, 199 /// check if hoisting an instruction of the given cost matrix can cause high 200 /// register pressure. 201 bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost); 202 203 /// UpdateBackTraceRegPressure - Traverse the back trace from header to 204 /// the current block and update their register pressures to reflect the 205 /// effect of hoisting MI from the current block to the preheader. 206 void UpdateBackTraceRegPressure(const MachineInstr *MI); 207 208 /// IsProfitableToHoist - Return true if it is potentially profitable to 209 /// hoist the given loop invariant. 210 bool IsProfitableToHoist(MachineInstr &MI); 211 212 /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 213 /// If not then a load from this mbb may not be safe to hoist. 214 bool IsGuaranteedToExecute(MachineBasicBlock *BB); 215 216 void EnterScope(MachineBasicBlock *MBB); 217 218 void ExitScope(MachineBasicBlock *MBB); 219 220 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given 221 /// dominator tree node if its a leaf or all of its children are done. Walk 222 /// up the dominator tree to destroy ancestors which are now done. 223 void ExitScopeIfDone(MachineDomTreeNode *Node, 224 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 225 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 226 227 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 228 /// blocks dominated by the specified header block, and that are in the 229 /// current loop) in depth first order w.r.t the DominatorTree. This allows 230 /// us to visit definitions before uses, allowing us to hoist a loop body in 231 /// one pass without iteration. 232 /// 233 void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); 234 void HoistRegion(MachineDomTreeNode *N, bool IsHeader); 235 236 /// getRegisterClassIDAndCost - For a given MI, register, and the operand 237 /// index, return the ID and cost of its representative register class by 238 /// reference. 239 void getRegisterClassIDAndCost(const MachineInstr *MI, 240 unsigned Reg, unsigned OpIdx, 241 unsigned &RCId, unsigned &RCCost) const; 242 243 /// InitRegPressure - Find all virtual register references that are liveout 244 /// of the preheader to initialize the starting "register pressure". Note 245 /// this does not count live through (livein but not used) registers. 246 void InitRegPressure(MachineBasicBlock *BB); 247 248 /// UpdateRegPressure - Update estimate of register pressure after the 249 /// specified instruction. 250 void UpdateRegPressure(const MachineInstr *MI); 251 252 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if 253 /// the load itself could be hoisted. Return the unfolded and hoistable 254 /// load, or null if the load couldn't be unfolded or if it wouldn't 255 /// be hoistable. 256 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 257 258 /// LookForDuplicate - Find an instruction amount PrevMIs that is a 259 /// duplicate of MI. Return this instruction if it's found. 260 const MachineInstr *LookForDuplicate(const MachineInstr *MI, 261 std::vector<const MachineInstr*> &PrevMIs); 262 263 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on 264 /// the preheader that compute the same value. If it's found, do a RAU on 265 /// with the definition of the existing instruction rather than hoisting 266 /// the instruction to the preheader. 267 bool EliminateCSE(MachineInstr *MI, 268 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); 269 270 /// MayCSE - Return true if the given instruction will be CSE'd if it's 271 /// hoisted out of the loop. 272 bool MayCSE(MachineInstr *MI); 273 274 /// Hoist - When an instruction is found to only use loop invariant operands 275 /// that is safe to hoist, this instruction is called to do the dirty work. 276 /// It returns true if the instruction is hoisted. 277 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 278 279 /// InitCSEMap - Initialize the CSE map with instructions that are in the 280 /// current loop preheader that may become duplicates of instructions that 281 /// are hoisted out of the loop. 282 void InitCSEMap(MachineBasicBlock *BB); 283 284 /// getCurPreheader - Get the preheader for the current loop, splitting 285 /// a critical edge if needed. 286 MachineBasicBlock *getCurPreheader(); 287 }; 288 } // end anonymous namespace 289 290 char MachineLICM::ID = 0; 291 char &llvm::MachineLICMID = MachineLICM::ID; 292 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", 293 "Machine Loop Invariant Code Motion", false, false) 294 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 295 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 296 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 297 INITIALIZE_PASS_END(MachineLICM, "machinelicm", 298 "Machine Loop Invariant Code Motion", false, false) 299 300 /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most 301 /// loop that has a unique predecessor. 302 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 303 // Check whether this loop even has a unique predecessor. 304 if (!CurLoop->getLoopPredecessor()) 305 return false; 306 // Ok, now check to see if any of its outer loops do. 307 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 308 if (L->getLoopPredecessor()) 309 return false; 310 // None of them did, so this is the outermost with a unique predecessor. 311 return true; 312 } 313 314 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 315 Changed = FirstInLoop = false; 316 TM = &MF.getTarget(); 317 TII = TM->getInstrInfo(); 318 TLI = TM->getTargetLowering(); 319 TRI = TM->getRegisterInfo(); 320 MFI = MF.getFrameInfo(); 321 MRI = &MF.getRegInfo(); 322 InstrItins = TM->getInstrItineraryData(); 323 324 PreRegAlloc = MRI->isSSA(); 325 326 if (PreRegAlloc) 327 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 328 else 329 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 330 DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); 331 332 if (PreRegAlloc) { 333 // Estimate register pressure during pre-regalloc pass. 334 unsigned NumRC = TRI->getNumRegClasses(); 335 RegPressure.resize(NumRC); 336 std::fill(RegPressure.begin(), RegPressure.end(), 0); 337 RegLimit.resize(NumRC); 338 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 339 E = TRI->regclass_end(); I != E; ++I) 340 RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); 341 } else { 342 ReservedRegs = TRI->getReservedRegs(MF); 343 } 344 345 // Get our Loop information... 346 MLI = &getAnalysis<MachineLoopInfo>(); 347 DT = &getAnalysis<MachineDominatorTree>(); 348 AA = &getAnalysis<AliasAnalysis>(); 349 350 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 351 while (!Worklist.empty()) { 352 CurLoop = Worklist.pop_back_val(); 353 CurPreheader = 0; 354 355 // If this is done before regalloc, only visit outer-most preheader-sporting 356 // loops. 357 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 358 Worklist.append(CurLoop->begin(), CurLoop->end()); 359 continue; 360 } 361 362 if (!PreRegAlloc) 363 HoistRegionPostRA(); 364 else { 365 // CSEMap is initialized for loop header when the first instruction is 366 // being hoisted. 367 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 368 FirstInLoop = true; 369 HoistOutOfLoop(N); 370 CSEMap.clear(); 371 } 372 } 373 374 return Changed; 375 } 376 377 /// InstructionStoresToFI - Return true if instruction stores to the 378 /// specified frame. 379 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 380 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 381 oe = MI->memoperands_end(); o != oe; ++o) { 382 if (!(*o)->isStore() || !(*o)->getValue()) 383 continue; 384 if (const FixedStackPseudoSourceValue *Value = 385 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 386 if (Value->getFrameIndex() == FI) 387 return true; 388 } 389 } 390 return false; 391 } 392 393 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 394 /// gather register def and frame object update information. 395 void MachineLICM::ProcessMI(MachineInstr *MI, 396 BitVector &PhysRegDefs, 397 BitVector &PhysRegClobbers, 398 SmallSet<int, 32> &StoredFIs, 399 SmallVector<CandidateInfo, 32> &Candidates) { 400 bool RuledOut = false; 401 bool HasNonInvariantUse = false; 402 unsigned Def = 0; 403 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 404 const MachineOperand &MO = MI->getOperand(i); 405 if (MO.isFI()) { 406 // Remember if the instruction stores to the frame index. 407 int FI = MO.getIndex(); 408 if (!StoredFIs.count(FI) && 409 MFI->isSpillSlotObjectIndex(FI) && 410 InstructionStoresToFI(MI, FI)) 411 StoredFIs.insert(FI); 412 HasNonInvariantUse = true; 413 continue; 414 } 415 416 // We can't hoist an instruction defining a physreg that is clobbered in 417 // the loop. 418 if (MO.isRegMask()) { 419 PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); 420 continue; 421 } 422 423 if (!MO.isReg()) 424 continue; 425 unsigned Reg = MO.getReg(); 426 if (!Reg) 427 continue; 428 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 429 "Not expecting virtual register!"); 430 431 if (!MO.isDef()) { 432 // Allow reserved register reads to be hoisted. 433 if (ReservedRegs.test(Reg)) 434 continue; 435 if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) 436 // If it's using a non-loop-invariant register, then it's obviously not 437 // safe to hoist. 438 HasNonInvariantUse = true; 439 continue; 440 } 441 442 if (MO.isImplicit()) { 443 for (const unsigned *AS = TRI->getOverlaps(Reg); *AS; ++AS) 444 PhysRegClobbers.set(*AS); 445 if (!MO.isDead()) 446 // Non-dead implicit def? This cannot be hoisted. 447 RuledOut = true; 448 // No need to check if a dead implicit def is also defined by 449 // another instruction. 450 continue; 451 } 452 453 // FIXME: For now, avoid instructions with multiple defs, unless 454 // it's a dead implicit def. 455 if (Def) 456 RuledOut = true; 457 else 458 Def = Reg; 459 460 // If we have already seen another instruction that defines the same 461 // register, then this is not safe. Two defs is indicated by setting a 462 // PhysRegClobbers bit. 463 for (const unsigned *AS = TRI->getOverlaps(Reg); *AS; ++AS) { 464 if (PhysRegDefs.test(*AS)) 465 PhysRegClobbers.set(*AS); 466 if (PhysRegClobbers.test(*AS)) 467 // MI defined register is seen defined by another instruction in 468 // the loop, it cannot be a LICM candidate. 469 RuledOut = true; 470 PhysRegDefs.set(*AS); 471 } 472 } 473 474 // Only consider reloads for now and remats which do not have register 475 // operands. FIXME: Consider unfold load folding instructions. 476 if (Def && !RuledOut) { 477 int FI = INT_MIN; 478 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 479 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 480 Candidates.push_back(CandidateInfo(MI, Def, FI)); 481 } 482 } 483 484 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 485 /// invariants out to the preheader. 486 void MachineLICM::HoistRegionPostRA() { 487 unsigned NumRegs = TRI->getNumRegs(); 488 BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. 489 BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. 490 491 SmallVector<CandidateInfo, 32> Candidates; 492 SmallSet<int, 32> StoredFIs; 493 494 // Walk the entire region, count number of defs for each register, and 495 // collect potential LICM candidates. 496 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 497 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 498 MachineBasicBlock *BB = Blocks[i]; 499 500 // If the header of the loop containing this basic block is a landing pad, 501 // then don't try to hoist instructions out of this loop. 502 const MachineLoop *ML = MLI->getLoopFor(BB); 503 if (ML && ML->getHeader()->isLandingPad()) continue; 504 505 // Conservatively treat live-in's as an external def. 506 // FIXME: That means a reload that're reused in successor block(s) will not 507 // be LICM'ed. 508 for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), 509 E = BB->livein_end(); I != E; ++I) { 510 unsigned Reg = *I; 511 for (const unsigned *AS = TRI->getOverlaps(Reg); *AS; ++AS) 512 PhysRegDefs.set(*AS); 513 } 514 515 SpeculationState = SpeculateUnknown; 516 for (MachineBasicBlock::iterator 517 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 518 MachineInstr *MI = &*MII; 519 ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); 520 } 521 } 522 523 // Now evaluate whether the potential candidates qualify. 524 // 1. Check if the candidate defined register is defined by another 525 // instruction in the loop. 526 // 2. If the candidate is a load from stack slot (always true for now), 527 // check if the slot is stored anywhere in the loop. 528 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 529 if (Candidates[i].FI != INT_MIN && 530 StoredFIs.count(Candidates[i].FI)) 531 continue; 532 533 if (!PhysRegClobbers.test(Candidates[i].Def)) { 534 bool Safe = true; 535 MachineInstr *MI = Candidates[i].MI; 536 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 537 const MachineOperand &MO = MI->getOperand(j); 538 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 539 continue; 540 // Allow hoisting of reserved register reads that aren't call preserved. 541 // For example %rip. 542 // IsLoopInvariantInst() already checks MRI->isConstantPhysReg(). 543 if (ReservedRegs.test(MO.getReg())) 544 continue; 545 if (PhysRegDefs.test(MO.getReg()) || 546 PhysRegClobbers.test(MO.getReg())) { 547 // If it's using a non-loop-invariant register, then it's obviously 548 // not safe to hoist. 549 Safe = false; 550 break; 551 } 552 } 553 if (Safe) 554 HoistPostRA(MI, Candidates[i].Def); 555 } 556 } 557 } 558 559 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current 560 /// loop, and make sure it is not killed by any instructions in the loop. 561 void MachineLICM::AddToLiveIns(unsigned Reg) { 562 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 563 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 564 MachineBasicBlock *BB = Blocks[i]; 565 if (!BB->isLiveIn(Reg)) 566 BB->addLiveIn(Reg); 567 for (MachineBasicBlock::iterator 568 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 569 MachineInstr *MI = &*MII; 570 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 571 MachineOperand &MO = MI->getOperand(i); 572 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 573 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 574 MO.setIsKill(false); 575 } 576 } 577 } 578 } 579 580 /// HoistPostRA - When an instruction is found to only use loop invariant 581 /// operands that is safe to hoist, this instruction is called to do the 582 /// dirty work. 583 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 584 MachineBasicBlock *Preheader = getCurPreheader(); 585 if (!Preheader) return; 586 587 // Now move the instructions to the predecessor, inserting it before any 588 // terminator instructions. 589 DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" 590 << MI->getParent()->getNumber() << ": " << *MI); 591 592 // Splice the instruction to the preheader. 593 MachineBasicBlock *MBB = MI->getParent(); 594 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 595 596 // Add register to livein list to all the BBs in the current loop since a 597 // loop invariant must be kept live throughout the whole loop. This is 598 // important to ensure later passes do not scavenge the def register. 599 AddToLiveIns(Def); 600 601 ++NumPostRAHoisted; 602 Changed = true; 603 } 604 605 // IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 606 // If not then a load from this mbb may not be safe to hoist. 607 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { 608 if (SpeculationState != SpeculateUnknown) 609 return SpeculationState == SpeculateFalse; 610 611 if (BB != CurLoop->getHeader()) { 612 // Check loop exiting blocks. 613 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; 614 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); 615 for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) 616 if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { 617 SpeculationState = SpeculateTrue; 618 return false; 619 } 620 } 621 622 SpeculationState = SpeculateFalse; 623 return true; 624 } 625 626 void MachineLICM::EnterScope(MachineBasicBlock *MBB) { 627 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 628 629 // Remember livein register pressure. 630 BackTrace.push_back(RegPressure); 631 } 632 633 void MachineLICM::ExitScope(MachineBasicBlock *MBB) { 634 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 635 BackTrace.pop_back(); 636 } 637 638 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 639 /// dominator tree node if its a leaf or all of its children are done. Walk 640 /// up the dominator tree to destroy ancestors which are now done. 641 void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, 642 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 643 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 644 if (OpenChildren[Node]) 645 return; 646 647 // Pop scope. 648 ExitScope(Node->getBlock()); 649 650 // Now traverse upwards to pop ancestors whose offsprings are all done. 651 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 652 unsigned Left = --OpenChildren[Parent]; 653 if (Left != 0) 654 break; 655 ExitScope(Parent->getBlock()); 656 Node = Parent; 657 } 658 } 659 660 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 661 /// blocks dominated by the specified header block, and that are in the 662 /// current loop) in depth first order w.r.t the DominatorTree. This allows 663 /// us to visit definitions before uses, allowing us to hoist a loop body in 664 /// one pass without iteration. 665 /// 666 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { 667 SmallVector<MachineDomTreeNode*, 32> Scopes; 668 SmallVector<MachineDomTreeNode*, 8> WorkList; 669 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 670 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 671 672 // Perform a DFS walk to determine the order of visit. 673 WorkList.push_back(HeaderN); 674 do { 675 MachineDomTreeNode *Node = WorkList.pop_back_val(); 676 assert(Node != 0 && "Null dominator tree node?"); 677 MachineBasicBlock *BB = Node->getBlock(); 678 679 // If the header of the loop containing this basic block is a landing pad, 680 // then don't try to hoist instructions out of this loop. 681 const MachineLoop *ML = MLI->getLoopFor(BB); 682 if (ML && ML->getHeader()->isLandingPad()) 683 continue; 684 685 // If this subregion is not in the top level loop at all, exit. 686 if (!CurLoop->contains(BB)) 687 continue; 688 689 Scopes.push_back(Node); 690 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 691 unsigned NumChildren = Children.size(); 692 693 // Don't hoist things out of a large switch statement. This often causes 694 // code to be hoisted that wasn't going to be executed, and increases 695 // register pressure in a situation where it's likely to matter. 696 if (BB->succ_size() >= 25) 697 NumChildren = 0; 698 699 OpenChildren[Node] = NumChildren; 700 // Add children in reverse order as then the next popped worklist node is 701 // the first child of this node. This means we ultimately traverse the 702 // DOM tree in exactly the same order as if we'd recursed. 703 for (int i = (int)NumChildren-1; i >= 0; --i) { 704 MachineDomTreeNode *Child = Children[i]; 705 ParentMap[Child] = Node; 706 WorkList.push_back(Child); 707 } 708 } while (!WorkList.empty()); 709 710 if (Scopes.size() != 0) { 711 MachineBasicBlock *Preheader = getCurPreheader(); 712 if (!Preheader) 713 return; 714 715 // Compute registers which are livein into the loop headers. 716 RegSeen.clear(); 717 BackTrace.clear(); 718 InitRegPressure(Preheader); 719 } 720 721 // Now perform LICM. 722 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 723 MachineDomTreeNode *Node = Scopes[i]; 724 MachineBasicBlock *MBB = Node->getBlock(); 725 726 MachineBasicBlock *Preheader = getCurPreheader(); 727 if (!Preheader) 728 continue; 729 730 EnterScope(MBB); 731 732 // Process the block 733 SpeculationState = SpeculateUnknown; 734 for (MachineBasicBlock::iterator 735 MII = MBB->begin(), E = MBB->end(); MII != E; ) { 736 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 737 MachineInstr *MI = &*MII; 738 if (!Hoist(MI, Preheader)) 739 UpdateRegPressure(MI); 740 MII = NextMII; 741 } 742 743 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 744 ExitScopeIfDone(Node, OpenChildren, ParentMap); 745 } 746 } 747 748 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { 749 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); 750 } 751 752 /// getRegisterClassIDAndCost - For a given MI, register, and the operand 753 /// index, return the ID and cost of its representative register class. 754 void 755 MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, 756 unsigned Reg, unsigned OpIdx, 757 unsigned &RCId, unsigned &RCCost) const { 758 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 759 EVT VT = *RC->vt_begin(); 760 if (VT == MVT::Untyped) { 761 RCId = RC->getID(); 762 RCCost = 1; 763 } else { 764 RCId = TLI->getRepRegClassFor(VT)->getID(); 765 RCCost = TLI->getRepRegClassCostFor(VT); 766 } 767 } 768 769 /// InitRegPressure - Find all virtual register references that are liveout of 770 /// the preheader to initialize the starting "register pressure". Note this 771 /// does not count live through (livein but not used) registers. 772 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { 773 std::fill(RegPressure.begin(), RegPressure.end(), 0); 774 775 // If the preheader has only a single predecessor and it ends with a 776 // fallthrough or an unconditional branch, then scan its predecessor for live 777 // defs as well. This happens whenever the preheader is created by splitting 778 // the critical edge from the loop predecessor to the loop header. 779 if (BB->pred_size() == 1) { 780 MachineBasicBlock *TBB = 0, *FBB = 0; 781 SmallVector<MachineOperand, 4> Cond; 782 if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) 783 InitRegPressure(*BB->pred_begin()); 784 } 785 786 for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); 787 MII != E; ++MII) { 788 MachineInstr *MI = &*MII; 789 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 790 const MachineOperand &MO = MI->getOperand(i); 791 if (!MO.isReg() || MO.isImplicit()) 792 continue; 793 unsigned Reg = MO.getReg(); 794 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 795 continue; 796 797 bool isNew = RegSeen.insert(Reg); 798 unsigned RCId, RCCost; 799 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 800 if (MO.isDef()) 801 RegPressure[RCId] += RCCost; 802 else { 803 bool isKill = isOperandKill(MO, MRI); 804 if (isNew && !isKill) 805 // Haven't seen this, it must be a livein. 806 RegPressure[RCId] += RCCost; 807 else if (!isNew && isKill) 808 RegPressure[RCId] -= RCCost; 809 } 810 } 811 } 812 } 813 814 /// UpdateRegPressure - Update estimate of register pressure after the 815 /// specified instruction. 816 void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { 817 if (MI->isImplicitDef()) 818 return; 819 820 SmallVector<unsigned, 4> Defs; 821 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 822 const MachineOperand &MO = MI->getOperand(i); 823 if (!MO.isReg() || MO.isImplicit()) 824 continue; 825 unsigned Reg = MO.getReg(); 826 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 827 continue; 828 829 bool isNew = RegSeen.insert(Reg); 830 if (MO.isDef()) 831 Defs.push_back(Reg); 832 else if (!isNew && isOperandKill(MO, MRI)) { 833 unsigned RCId, RCCost; 834 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 835 if (RCCost > RegPressure[RCId]) 836 RegPressure[RCId] = 0; 837 else 838 RegPressure[RCId] -= RCCost; 839 } 840 } 841 842 unsigned Idx = 0; 843 while (!Defs.empty()) { 844 unsigned Reg = Defs.pop_back_val(); 845 unsigned RCId, RCCost; 846 getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost); 847 RegPressure[RCId] += RCCost; 848 ++Idx; 849 } 850 } 851 852 /// isLoadFromGOTOrConstantPool - Return true if this machine instruction 853 /// loads from global offset table or constant pool. 854 static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { 855 assert (MI.mayLoad() && "Expected MI that loads!"); 856 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), 857 E = MI.memoperands_end(); I != E; ++I) { 858 if (const Value *V = (*I)->getValue()) { 859 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) 860 if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) 861 return true; 862 } 863 } 864 return false; 865 } 866 867 /// IsLICMCandidate - Returns true if the instruction may be a suitable 868 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously 869 /// not safe to hoist it. 870 bool MachineLICM::IsLICMCandidate(MachineInstr &I) { 871 // Check if it's safe to move the instruction. 872 bool DontMoveAcrossStore = true; 873 if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) 874 return false; 875 876 // If it is load then check if it is guaranteed to execute by making sure that 877 // it dominates all exiting blocks. If it doesn't, then there is a path out of 878 // the loop which does not execute this load, so we can't hoist it. Loads 879 // from constant memory are not safe to speculate all the time, for example 880 // indexed load from a jump table. 881 // Stores and side effects are already checked by isSafeToMove. 882 if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && 883 !IsGuaranteedToExecute(I.getParent())) 884 return false; 885 886 return true; 887 } 888 889 /// IsLoopInvariantInst - Returns true if the instruction is loop 890 /// invariant. I.e., all virtual register operands are defined outside of the 891 /// loop, physical registers aren't accessed explicitly, and there are no side 892 /// effects that aren't captured by the operands or other flags. 893 /// 894 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 895 if (!IsLICMCandidate(I)) 896 return false; 897 898 // The instruction is loop invariant if all of its operands are. 899 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 900 const MachineOperand &MO = I.getOperand(i); 901 902 if (!MO.isReg()) 903 continue; 904 905 unsigned Reg = MO.getReg(); 906 if (Reg == 0) continue; 907 908 // Don't hoist an instruction that uses or defines a physical register. 909 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 910 if (MO.isUse()) { 911 // If the physreg has no defs anywhere, it's just an ambient register 912 // and we can freely move its uses. Alternatively, if it's allocatable, 913 // it could get allocated to something with a def during allocation. 914 if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) 915 return false; 916 // Otherwise it's safe to move. 917 continue; 918 } else if (!MO.isDead()) { 919 // A def that isn't dead. We can't move it. 920 return false; 921 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 922 // If the reg is live into the loop, we can't hoist an instruction 923 // which would clobber it. 924 return false; 925 } 926 } 927 928 if (!MO.isUse()) 929 continue; 930 931 assert(MRI->getVRegDef(Reg) && 932 "Machine instr not mapped for this vreg?!"); 933 934 // If the loop contains the definition of an operand, then the instruction 935 // isn't loop invariant. 936 if (CurLoop->contains(MRI->getVRegDef(Reg))) 937 return false; 938 } 939 940 // If we got this far, the instruction is loop invariant! 941 return true; 942 } 943 944 945 /// HasAnyPHIUse - Return true if the specified register is used by any 946 /// phi node. 947 bool MachineLICM::HasAnyPHIUse(unsigned Reg) const { 948 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), 949 UE = MRI->use_end(); UI != UE; ++UI) { 950 MachineInstr *UseMI = &*UI; 951 if (UseMI->isPHI()) 952 return true; 953 // Look pass copies as well. 954 if (UseMI->isCopy()) { 955 unsigned Def = UseMI->getOperand(0).getReg(); 956 if (TargetRegisterInfo::isVirtualRegister(Def) && 957 HasAnyPHIUse(Def)) 958 return true; 959 } 960 } 961 return false; 962 } 963 964 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 965 /// and an use in the current loop, return true if the target considered 966 /// it 'high'. 967 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, 968 unsigned DefIdx, unsigned Reg) const { 969 if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg)) 970 return false; 971 972 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 973 E = MRI->use_nodbg_end(); I != E; ++I) { 974 MachineInstr *UseMI = &*I; 975 if (UseMI->isCopyLike()) 976 continue; 977 if (!CurLoop->contains(UseMI->getParent())) 978 continue; 979 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 980 const MachineOperand &MO = UseMI->getOperand(i); 981 if (!MO.isReg() || !MO.isUse()) 982 continue; 983 unsigned MOReg = MO.getReg(); 984 if (MOReg != Reg) 985 continue; 986 987 if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i)) 988 return true; 989 } 990 991 // Only look at the first in loop use. 992 break; 993 } 994 995 return false; 996 } 997 998 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or 999 /// the operand latency between its def and a use is one or less. 1000 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { 1001 if (MI.isAsCheapAsAMove() || MI.isCopyLike()) 1002 return true; 1003 if (!InstrItins || InstrItins->isEmpty()) 1004 return false; 1005 1006 bool isCheap = false; 1007 unsigned NumDefs = MI.getDesc().getNumDefs(); 1008 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 1009 MachineOperand &DefMO = MI.getOperand(i); 1010 if (!DefMO.isReg() || !DefMO.isDef()) 1011 continue; 1012 --NumDefs; 1013 unsigned Reg = DefMO.getReg(); 1014 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1015 continue; 1016 1017 if (!TII->hasLowDefLatency(InstrItins, &MI, i)) 1018 return false; 1019 isCheap = true; 1020 } 1021 1022 return isCheap; 1023 } 1024 1025 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check 1026 /// if hoisting an instruction of the given cost matrix can cause high 1027 /// register pressure. 1028 bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) { 1029 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 1030 CI != CE; ++CI) { 1031 if (CI->second <= 0) 1032 continue; 1033 1034 unsigned RCId = CI->first; 1035 unsigned Limit = RegLimit[RCId]; 1036 int Cost = CI->second; 1037 for (unsigned i = BackTrace.size(); i != 0; --i) { 1038 SmallVector<unsigned, 8> &RP = BackTrace[i-1]; 1039 if (RP[RCId] + Cost >= Limit) 1040 return true; 1041 } 1042 } 1043 1044 return false; 1045 } 1046 1047 /// UpdateBackTraceRegPressure - Traverse the back trace from header to the 1048 /// current block and update their register pressures to reflect the effect 1049 /// of hoisting MI from the current block to the preheader. 1050 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { 1051 if (MI->isImplicitDef()) 1052 return; 1053 1054 // First compute the 'cost' of the instruction, i.e. its contribution 1055 // to register pressure. 1056 DenseMap<unsigned, int> Cost; 1057 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 1058 const MachineOperand &MO = MI->getOperand(i); 1059 if (!MO.isReg() || MO.isImplicit()) 1060 continue; 1061 unsigned Reg = MO.getReg(); 1062 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1063 continue; 1064 1065 unsigned RCId, RCCost; 1066 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 1067 if (MO.isDef()) { 1068 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1069 if (CI != Cost.end()) 1070 CI->second += RCCost; 1071 else 1072 Cost.insert(std::make_pair(RCId, RCCost)); 1073 } else if (isOperandKill(MO, MRI)) { 1074 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1075 if (CI != Cost.end()) 1076 CI->second -= RCCost; 1077 else 1078 Cost.insert(std::make_pair(RCId, -RCCost)); 1079 } 1080 } 1081 1082 // Update register pressure of blocks from loop header to current block. 1083 for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { 1084 SmallVector<unsigned, 8> &RP = BackTrace[i]; 1085 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 1086 CI != CE; ++CI) { 1087 unsigned RCId = CI->first; 1088 RP[RCId] += CI->second; 1089 } 1090 } 1091 } 1092 1093 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist 1094 /// the given loop invariant. 1095 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 1096 if (MI.isImplicitDef()) 1097 return true; 1098 1099 // If the instruction is cheap, only hoist if it is re-materilizable. LICM 1100 // will increase register pressure. It's probably not worth it if the 1101 // instruction is cheap. 1102 // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting 1103 // these tend to help performance in low register pressure situation. The 1104 // trade off is it may cause spill in high pressure situation. It will end up 1105 // adding a store in the loop preheader. But the reload is no more expensive. 1106 // The side benefit is these loads are frequently CSE'ed. 1107 if (IsCheapInstruction(MI)) { 1108 if (!TII->isTriviallyReMaterializable(&MI, AA)) 1109 return false; 1110 } else { 1111 // Estimate register pressure to determine whether to LICM the instruction. 1112 // In low register pressure situation, we can be more aggressive about 1113 // hoisting. Also, favors hoisting long latency instructions even in 1114 // moderately high pressure situation. 1115 // FIXME: If there are long latency loop-invariant instructions inside the 1116 // loop at this point, why didn't the optimizer's LICM hoist them? 1117 DenseMap<unsigned, int> Cost; 1118 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 1119 const MachineOperand &MO = MI.getOperand(i); 1120 if (!MO.isReg() || MO.isImplicit()) 1121 continue; 1122 unsigned Reg = MO.getReg(); 1123 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1124 continue; 1125 1126 unsigned RCId, RCCost; 1127 getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); 1128 if (MO.isDef()) { 1129 if (HasHighOperandLatency(MI, i, Reg)) { 1130 ++NumHighLatency; 1131 return true; 1132 } 1133 1134 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1135 if (CI != Cost.end()) 1136 CI->second += RCCost; 1137 else 1138 Cost.insert(std::make_pair(RCId, RCCost)); 1139 } else if (isOperandKill(MO, MRI)) { 1140 // Is a virtual register use is a kill, hoisting it out of the loop 1141 // may actually reduce register pressure or be register pressure 1142 // neutral. 1143 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1144 if (CI != Cost.end()) 1145 CI->second -= RCCost; 1146 else 1147 Cost.insert(std::make_pair(RCId, -RCCost)); 1148 } 1149 } 1150 1151 // Visit BBs from header to current BB, if hoisting this doesn't cause 1152 // high register pressure, then it's safe to proceed. 1153 if (!CanCauseHighRegPressure(Cost)) { 1154 ++NumLowRP; 1155 return true; 1156 } 1157 1158 // Do not "speculate" in high register pressure situation. If an 1159 // instruction is not guaranteed to be executed in the loop, it's best to be 1160 // conservative. 1161 if (AvoidSpeculation && 1162 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) 1163 return false; 1164 1165 // High register pressure situation, only hoist if the instruction is going to 1166 // be remat'ed. 1167 if (!TII->isTriviallyReMaterializable(&MI, AA) && 1168 !MI.isInvariantLoad(AA)) 1169 return false; 1170 } 1171 1172 // If result(s) of this instruction is used by PHIs outside of the loop, then 1173 // don't hoist it if the instruction because it will introduce an extra copy. 1174 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 1175 const MachineOperand &MO = MI.getOperand(i); 1176 if (!MO.isReg() || !MO.isDef()) 1177 continue; 1178 if (HasAnyPHIUse(MO.getReg())) 1179 return false; 1180 } 1181 1182 return true; 1183 } 1184 1185 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 1186 // Don't unfold simple loads. 1187 if (MI->canFoldAsLoad()) 1188 return 0; 1189 1190 // If not, we may be able to unfold a load and hoist that. 1191 // First test whether the instruction is loading from an amenable 1192 // memory location. 1193 if (!MI->isInvariantLoad(AA)) 1194 return 0; 1195 1196 // Next determine the register class for a temporary register. 1197 unsigned LoadRegIndex; 1198 unsigned NewOpc = 1199 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 1200 /*UnfoldLoad=*/true, 1201 /*UnfoldStore=*/false, 1202 &LoadRegIndex); 1203 if (NewOpc == 0) return 0; 1204 const MCInstrDesc &MID = TII->get(NewOpc); 1205 if (MID.getNumDefs() != 1) return 0; 1206 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI); 1207 // Ok, we're unfolding. Create a temporary register and do the unfold. 1208 unsigned Reg = MRI->createVirtualRegister(RC); 1209 1210 MachineFunction &MF = *MI->getParent()->getParent(); 1211 SmallVector<MachineInstr *, 2> NewMIs; 1212 bool Success = 1213 TII->unfoldMemoryOperand(MF, MI, Reg, 1214 /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 1215 NewMIs); 1216 (void)Success; 1217 assert(Success && 1218 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 1219 "succeeded!"); 1220 assert(NewMIs.size() == 2 && 1221 "Unfolded a load into multiple instructions!"); 1222 MachineBasicBlock *MBB = MI->getParent(); 1223 MachineBasicBlock::iterator Pos = MI; 1224 MBB->insert(Pos, NewMIs[0]); 1225 MBB->insert(Pos, NewMIs[1]); 1226 // If unfolding produced a load that wasn't loop-invariant or profitable to 1227 // hoist, discard the new instructions and bail. 1228 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 1229 NewMIs[0]->eraseFromParent(); 1230 NewMIs[1]->eraseFromParent(); 1231 return 0; 1232 } 1233 1234 // Update register pressure for the unfolded instruction. 1235 UpdateRegPressure(NewMIs[1]); 1236 1237 // Otherwise we successfully unfolded a load that we can hoist. 1238 MI->eraseFromParent(); 1239 return NewMIs[0]; 1240 } 1241 1242 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 1243 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 1244 const MachineInstr *MI = &*I; 1245 unsigned Opcode = MI->getOpcode(); 1246 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1247 CI = CSEMap.find(Opcode); 1248 if (CI != CSEMap.end()) 1249 CI->second.push_back(MI); 1250 else { 1251 std::vector<const MachineInstr*> CSEMIs; 1252 CSEMIs.push_back(MI); 1253 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 1254 } 1255 } 1256 } 1257 1258 const MachineInstr* 1259 MachineLICM::LookForDuplicate(const MachineInstr *MI, 1260 std::vector<const MachineInstr*> &PrevMIs) { 1261 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 1262 const MachineInstr *PrevMI = PrevMIs[i]; 1263 if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0))) 1264 return PrevMI; 1265 } 1266 return 0; 1267 } 1268 1269 bool MachineLICM::EliminateCSE(MachineInstr *MI, 1270 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 1271 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1272 // the undef property onto uses. 1273 if (CI == CSEMap.end() || MI->isImplicitDef()) 1274 return false; 1275 1276 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 1277 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 1278 1279 // Replace virtual registers defined by MI by their counterparts defined 1280 // by Dup. 1281 SmallVector<unsigned, 2> Defs; 1282 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1283 const MachineOperand &MO = MI->getOperand(i); 1284 1285 // Physical registers may not differ here. 1286 assert((!MO.isReg() || MO.getReg() == 0 || 1287 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 1288 MO.getReg() == Dup->getOperand(i).getReg()) && 1289 "Instructions with different phys regs are not identical!"); 1290 1291 if (MO.isReg() && MO.isDef() && 1292 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 1293 Defs.push_back(i); 1294 } 1295 1296 SmallVector<const TargetRegisterClass*, 2> OrigRCs; 1297 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1298 unsigned Idx = Defs[i]; 1299 unsigned Reg = MI->getOperand(Idx).getReg(); 1300 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1301 OrigRCs.push_back(MRI->getRegClass(DupReg)); 1302 1303 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { 1304 // Restore old RCs if more than one defs. 1305 for (unsigned j = 0; j != i; ++j) 1306 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); 1307 return false; 1308 } 1309 } 1310 1311 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1312 unsigned Idx = Defs[i]; 1313 unsigned Reg = MI->getOperand(Idx).getReg(); 1314 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1315 MRI->replaceRegWith(Reg, DupReg); 1316 MRI->clearKillFlags(DupReg); 1317 } 1318 1319 MI->eraseFromParent(); 1320 ++NumCSEed; 1321 return true; 1322 } 1323 return false; 1324 } 1325 1326 /// MayCSE - Return true if the given instruction will be CSE'd if it's 1327 /// hoisted out of the loop. 1328 bool MachineLICM::MayCSE(MachineInstr *MI) { 1329 unsigned Opcode = MI->getOpcode(); 1330 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1331 CI = CSEMap.find(Opcode); 1332 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1333 // the undef property onto uses. 1334 if (CI == CSEMap.end() || MI->isImplicitDef()) 1335 return false; 1336 1337 return LookForDuplicate(MI, CI->second) != 0; 1338 } 1339 1340 /// Hoist - When an instruction is found to use only loop invariant operands 1341 /// that are safe to hoist, this instruction is called to do the dirty work. 1342 /// 1343 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 1344 // First check whether we should hoist this instruction. 1345 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 1346 // If not, try unfolding a hoistable load. 1347 MI = ExtractHoistableLoad(MI); 1348 if (!MI) return false; 1349 } 1350 1351 // Now move the instructions to the predecessor, inserting it before any 1352 // terminator instructions. 1353 DEBUG({ 1354 dbgs() << "Hoisting " << *MI; 1355 if (Preheader->getBasicBlock()) 1356 dbgs() << " to MachineBasicBlock " 1357 << Preheader->getName(); 1358 if (MI->getParent()->getBasicBlock()) 1359 dbgs() << " from MachineBasicBlock " 1360 << MI->getParent()->getName(); 1361 dbgs() << "\n"; 1362 }); 1363 1364 // If this is the first instruction being hoisted to the preheader, 1365 // initialize the CSE map with potential common expressions. 1366 if (FirstInLoop) { 1367 InitCSEMap(Preheader); 1368 FirstInLoop = false; 1369 } 1370 1371 // Look for opportunity to CSE the hoisted instruction. 1372 unsigned Opcode = MI->getOpcode(); 1373 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1374 CI = CSEMap.find(Opcode); 1375 if (!EliminateCSE(MI, CI)) { 1376 // Otherwise, splice the instruction to the preheader. 1377 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 1378 1379 // Update register pressure for BBs from header to this block. 1380 UpdateBackTraceRegPressure(MI); 1381 1382 // Clear the kill flags of any register this instruction defines, 1383 // since they may need to be live throughout the entire loop 1384 // rather than just live for part of it. 1385 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1386 MachineOperand &MO = MI->getOperand(i); 1387 if (MO.isReg() && MO.isDef() && !MO.isDead()) 1388 MRI->clearKillFlags(MO.getReg()); 1389 } 1390 1391 // Add to the CSE map. 1392 if (CI != CSEMap.end()) 1393 CI->second.push_back(MI); 1394 else { 1395 std::vector<const MachineInstr*> CSEMIs; 1396 CSEMIs.push_back(MI); 1397 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 1398 } 1399 } 1400 1401 ++NumHoisted; 1402 Changed = true; 1403 1404 return true; 1405 } 1406 1407 MachineBasicBlock *MachineLICM::getCurPreheader() { 1408 // Determine the block to which to hoist instructions. If we can't find a 1409 // suitable loop predecessor, we can't do any hoisting. 1410 1411 // If we've tried to get a preheader and failed, don't try again. 1412 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 1413 return 0; 1414 1415 if (!CurPreheader) { 1416 CurPreheader = CurLoop->getLoopPreheader(); 1417 if (!CurPreheader) { 1418 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 1419 if (!Pred) { 1420 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1421 return 0; 1422 } 1423 1424 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); 1425 if (!CurPreheader) { 1426 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1427 return 0; 1428 } 1429 } 1430 } 1431 return CurPreheader; 1432 } 1433