1 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs loop invariant code motion on machine instructions. We 11 // attempt to remove as much code from the body of a loop as possible. 12 // 13 // This pass does not attempt to throttle itself to limit register pressure. 14 // The register allocation phases are expected to perform rematerialization 15 // to recover when register pressure is high. 16 // 17 // This pass is not intended to be a replacement or a complete alternative 18 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple 19 // constructs that are not exposed before lowering and instruction selection. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #define DEBUG_TYPE "machine-licm" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/CodeGen/MachineDominators.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineLoopInfo.h" 28 #include "llvm/CodeGen/MachineMemOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/PseudoSourceValue.h" 31 #include "llvm/MC/MCInstrItineraries.h" 32 #include "llvm/Target/TargetLowering.h" 33 #include "llvm/Target/TargetRegisterInfo.h" 34 #include "llvm/Target/TargetInstrInfo.h" 35 #include "llvm/Target/TargetMachine.h" 36 #include "llvm/Analysis/AliasAnalysis.h" 37 #include "llvm/ADT/DenseMap.h" 38 #include "llvm/ADT/SmallSet.h" 39 #include "llvm/ADT/Statistic.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/raw_ostream.h" 43 using namespace llvm; 44 45 static cl::opt<bool> 46 AvoidSpeculation("avoid-speculation", 47 cl::desc("MachineLICM should avoid speculation"), 48 cl::init(true), cl::Hidden); 49 50 STATISTIC(NumHoisted, 51 "Number of machine instructions hoisted out of loops"); 52 STATISTIC(NumLowRP, 53 "Number of instructions hoisted in low reg pressure situation"); 54 STATISTIC(NumHighLatency, 55 "Number of high latency instructions hoisted"); 56 STATISTIC(NumCSEed, 57 "Number of hoisted machine instructions CSEed"); 58 STATISTIC(NumPostRAHoisted, 59 "Number of machine instructions hoisted out of loops post regalloc"); 60 61 namespace { 62 class MachineLICM : public MachineFunctionPass { 63 const TargetMachine *TM; 64 const TargetInstrInfo *TII; 65 const TargetLowering *TLI; 66 const TargetRegisterInfo *TRI; 67 const MachineFrameInfo *MFI; 68 MachineRegisterInfo *MRI; 69 const InstrItineraryData *InstrItins; 70 bool PreRegAlloc; 71 72 // Various analyses that we use... 73 AliasAnalysis *AA; // Alias analysis info. 74 MachineLoopInfo *MLI; // Current MachineLoopInfo 75 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 76 77 // State that is updated as we process loops 78 bool Changed; // True if a loop is changed. 79 bool FirstInLoop; // True if it's the first LICM in the loop. 80 MachineLoop *CurLoop; // The current loop we are working on. 81 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 82 83 // Track 'estimated' register pressure. 84 SmallSet<unsigned, 32> RegSeen; 85 SmallVector<unsigned, 8> RegPressure; 86 87 // Register pressure "limit" per register class. If the pressure 88 // is higher than the limit, then it's considered high. 89 SmallVector<unsigned, 8> RegLimit; 90 91 // Register pressure on path leading from loop preheader to current BB. 92 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 93 94 // For each opcode, keep a list of potential CSE instructions. 95 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 96 97 enum { 98 SpeculateFalse = 0, 99 SpeculateTrue = 1, 100 SpeculateUnknown = 2 101 }; 102 103 // If a MBB does not dominate loop exiting blocks then it may not safe 104 // to hoist loads from this block. 105 // Tri-state: 0 - false, 1 - true, 2 - unknown 106 unsigned SpeculationState; 107 108 public: 109 static char ID; // Pass identification, replacement for typeid 110 MachineLICM() : 111 MachineFunctionPass(ID), PreRegAlloc(true) { 112 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 113 } 114 115 explicit MachineLICM(bool PreRA) : 116 MachineFunctionPass(ID), PreRegAlloc(PreRA) { 117 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 118 } 119 120 virtual bool runOnMachineFunction(MachineFunction &MF); 121 122 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 123 AU.addRequired<MachineLoopInfo>(); 124 AU.addRequired<MachineDominatorTree>(); 125 AU.addRequired<AliasAnalysis>(); 126 AU.addPreserved<MachineLoopInfo>(); 127 AU.addPreserved<MachineDominatorTree>(); 128 MachineFunctionPass::getAnalysisUsage(AU); 129 } 130 131 virtual void releaseMemory() { 132 RegSeen.clear(); 133 RegPressure.clear(); 134 RegLimit.clear(); 135 BackTrace.clear(); 136 for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator 137 CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) 138 CI->second.clear(); 139 CSEMap.clear(); 140 } 141 142 private: 143 /// CandidateInfo - Keep track of information about hoisting candidates. 144 struct CandidateInfo { 145 MachineInstr *MI; 146 unsigned Def; 147 int FI; 148 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 149 : MI(mi), Def(def), FI(fi) {} 150 }; 151 152 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 153 /// invariants out to the preheader. 154 void HoistRegionPostRA(); 155 156 /// HoistPostRA - When an instruction is found to only use loop invariant 157 /// operands that is safe to hoist, this instruction is called to do the 158 /// dirty work. 159 void HoistPostRA(MachineInstr *MI, unsigned Def); 160 161 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 162 /// gather register def and frame object update information. 163 void ProcessMI(MachineInstr *MI, 164 BitVector &PhysRegDefs, 165 BitVector &PhysRegClobbers, 166 SmallSet<int, 32> &StoredFIs, 167 SmallVector<CandidateInfo, 32> &Candidates); 168 169 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 170 /// current loop. 171 void AddToLiveIns(unsigned Reg); 172 173 /// IsLICMCandidate - Returns true if the instruction may be a suitable 174 /// candidate for LICM. e.g. If the instruction is a call, then it's 175 /// obviously not safe to hoist it. 176 bool IsLICMCandidate(MachineInstr &I); 177 178 /// IsLoopInvariantInst - Returns true if the instruction is loop 179 /// invariant. I.e., all virtual register operands are defined outside of 180 /// the loop, physical registers aren't accessed (explicitly or implicitly), 181 /// and the instruction is hoistable. 182 /// 183 bool IsLoopInvariantInst(MachineInstr &I); 184 185 /// HasAnyPHIUse - Return true if the specified register is used by any 186 /// phi node. 187 bool HasAnyPHIUse(unsigned Reg) const; 188 189 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 190 /// and an use in the current loop, return true if the target considered 191 /// it 'high'. 192 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, 193 unsigned Reg) const; 194 195 bool IsCheapInstruction(MachineInstr &MI) const; 196 197 /// CanCauseHighRegPressure - Visit BBs from header to current BB, 198 /// check if hoisting an instruction of the given cost matrix can cause high 199 /// register pressure. 200 bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost); 201 202 /// UpdateBackTraceRegPressure - Traverse the back trace from header to 203 /// the current block and update their register pressures to reflect the 204 /// effect of hoisting MI from the current block to the preheader. 205 void UpdateBackTraceRegPressure(const MachineInstr *MI); 206 207 /// IsProfitableToHoist - Return true if it is potentially profitable to 208 /// hoist the given loop invariant. 209 bool IsProfitableToHoist(MachineInstr &MI); 210 211 /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 212 /// If not then a load from this mbb may not be safe to hoist. 213 bool IsGuaranteedToExecute(MachineBasicBlock *BB); 214 215 void EnterScope(MachineBasicBlock *MBB); 216 217 void ExitScope(MachineBasicBlock *MBB); 218 219 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given 220 /// dominator tree node if its a leaf or all of its children are done. Walk 221 /// up the dominator tree to destroy ancestors which are now done. 222 void ExitScopeIfDone(MachineDomTreeNode *Node, 223 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 224 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 225 226 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 227 /// blocks dominated by the specified header block, and that are in the 228 /// current loop) in depth first order w.r.t the DominatorTree. This allows 229 /// us to visit definitions before uses, allowing us to hoist a loop body in 230 /// one pass without iteration. 231 /// 232 void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); 233 void HoistRegion(MachineDomTreeNode *N, bool IsHeader); 234 235 /// getRegisterClassIDAndCost - For a given MI, register, and the operand 236 /// index, return the ID and cost of its representative register class by 237 /// reference. 238 void getRegisterClassIDAndCost(const MachineInstr *MI, 239 unsigned Reg, unsigned OpIdx, 240 unsigned &RCId, unsigned &RCCost) const; 241 242 /// InitRegPressure - Find all virtual register references that are liveout 243 /// of the preheader to initialize the starting "register pressure". Note 244 /// this does not count live through (livein but not used) registers. 245 void InitRegPressure(MachineBasicBlock *BB); 246 247 /// UpdateRegPressure - Update estimate of register pressure after the 248 /// specified instruction. 249 void UpdateRegPressure(const MachineInstr *MI); 250 251 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if 252 /// the load itself could be hoisted. Return the unfolded and hoistable 253 /// load, or null if the load couldn't be unfolded or if it wouldn't 254 /// be hoistable. 255 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 256 257 /// LookForDuplicate - Find an instruction amount PrevMIs that is a 258 /// duplicate of MI. Return this instruction if it's found. 259 const MachineInstr *LookForDuplicate(const MachineInstr *MI, 260 std::vector<const MachineInstr*> &PrevMIs); 261 262 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on 263 /// the preheader that compute the same value. If it's found, do a RAU on 264 /// with the definition of the existing instruction rather than hoisting 265 /// the instruction to the preheader. 266 bool EliminateCSE(MachineInstr *MI, 267 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); 268 269 /// MayCSE - Return true if the given instruction will be CSE'd if it's 270 /// hoisted out of the loop. 271 bool MayCSE(MachineInstr *MI); 272 273 /// Hoist - When an instruction is found to only use loop invariant operands 274 /// that is safe to hoist, this instruction is called to do the dirty work. 275 /// It returns true if the instruction is hoisted. 276 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 277 278 /// InitCSEMap - Initialize the CSE map with instructions that are in the 279 /// current loop preheader that may become duplicates of instructions that 280 /// are hoisted out of the loop. 281 void InitCSEMap(MachineBasicBlock *BB); 282 283 /// getCurPreheader - Get the preheader for the current loop, splitting 284 /// a critical edge if needed. 285 MachineBasicBlock *getCurPreheader(); 286 }; 287 } // end anonymous namespace 288 289 char MachineLICM::ID = 0; 290 char &llvm::MachineLICMID = MachineLICM::ID; 291 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", 292 "Machine Loop Invariant Code Motion", false, false) 293 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 294 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 295 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 296 INITIALIZE_PASS_END(MachineLICM, "machinelicm", 297 "Machine Loop Invariant Code Motion", false, false) 298 299 /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most 300 /// loop that has a unique predecessor. 301 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 302 // Check whether this loop even has a unique predecessor. 303 if (!CurLoop->getLoopPredecessor()) 304 return false; 305 // Ok, now check to see if any of its outer loops do. 306 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 307 if (L->getLoopPredecessor()) 308 return false; 309 // None of them did, so this is the outermost with a unique predecessor. 310 return true; 311 } 312 313 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 314 Changed = FirstInLoop = false; 315 TM = &MF.getTarget(); 316 TII = TM->getInstrInfo(); 317 TLI = TM->getTargetLowering(); 318 TRI = TM->getRegisterInfo(); 319 MFI = MF.getFrameInfo(); 320 MRI = &MF.getRegInfo(); 321 InstrItins = TM->getInstrItineraryData(); 322 323 PreRegAlloc = MRI->isSSA(); 324 325 if (PreRegAlloc) 326 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 327 else 328 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 329 DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); 330 331 if (PreRegAlloc) { 332 // Estimate register pressure during pre-regalloc pass. 333 unsigned NumRC = TRI->getNumRegClasses(); 334 RegPressure.resize(NumRC); 335 std::fill(RegPressure.begin(), RegPressure.end(), 0); 336 RegLimit.resize(NumRC); 337 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 338 E = TRI->regclass_end(); I != E; ++I) 339 RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); 340 } 341 342 // Get our Loop information... 343 MLI = &getAnalysis<MachineLoopInfo>(); 344 DT = &getAnalysis<MachineDominatorTree>(); 345 AA = &getAnalysis<AliasAnalysis>(); 346 347 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 348 while (!Worklist.empty()) { 349 CurLoop = Worklist.pop_back_val(); 350 CurPreheader = 0; 351 352 // If this is done before regalloc, only visit outer-most preheader-sporting 353 // loops. 354 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 355 Worklist.append(CurLoop->begin(), CurLoop->end()); 356 continue; 357 } 358 359 if (!PreRegAlloc) 360 HoistRegionPostRA(); 361 else { 362 // CSEMap is initialized for loop header when the first instruction is 363 // being hoisted. 364 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 365 FirstInLoop = true; 366 HoistOutOfLoop(N); 367 CSEMap.clear(); 368 } 369 } 370 371 return Changed; 372 } 373 374 /// InstructionStoresToFI - Return true if instruction stores to the 375 /// specified frame. 376 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 377 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 378 oe = MI->memoperands_end(); o != oe; ++o) { 379 if (!(*o)->isStore() || !(*o)->getValue()) 380 continue; 381 if (const FixedStackPseudoSourceValue *Value = 382 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 383 if (Value->getFrameIndex() == FI) 384 return true; 385 } 386 } 387 return false; 388 } 389 390 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 391 /// gather register def and frame object update information. 392 void MachineLICM::ProcessMI(MachineInstr *MI, 393 BitVector &PhysRegDefs, 394 BitVector &PhysRegClobbers, 395 SmallSet<int, 32> &StoredFIs, 396 SmallVector<CandidateInfo, 32> &Candidates) { 397 bool RuledOut = false; 398 bool HasNonInvariantUse = false; 399 unsigned Def = 0; 400 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 401 const MachineOperand &MO = MI->getOperand(i); 402 if (MO.isFI()) { 403 // Remember if the instruction stores to the frame index. 404 int FI = MO.getIndex(); 405 if (!StoredFIs.count(FI) && 406 MFI->isSpillSlotObjectIndex(FI) && 407 InstructionStoresToFI(MI, FI)) 408 StoredFIs.insert(FI); 409 HasNonInvariantUse = true; 410 continue; 411 } 412 413 // We can't hoist an instruction defining a physreg that is clobbered in 414 // the loop. 415 if (MO.isRegMask()) { 416 PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); 417 continue; 418 } 419 420 if (!MO.isReg()) 421 continue; 422 unsigned Reg = MO.getReg(); 423 if (!Reg) 424 continue; 425 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 426 "Not expecting virtual register!"); 427 428 if (!MO.isDef()) { 429 if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) 430 // If it's using a non-loop-invariant register, then it's obviously not 431 // safe to hoist. 432 HasNonInvariantUse = true; 433 continue; 434 } 435 436 if (MO.isImplicit()) { 437 for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) 438 PhysRegClobbers.set(*AS); 439 if (!MO.isDead()) 440 // Non-dead implicit def? This cannot be hoisted. 441 RuledOut = true; 442 // No need to check if a dead implicit def is also defined by 443 // another instruction. 444 continue; 445 } 446 447 // FIXME: For now, avoid instructions with multiple defs, unless 448 // it's a dead implicit def. 449 if (Def) 450 RuledOut = true; 451 else 452 Def = Reg; 453 454 // If we have already seen another instruction that defines the same 455 // register, then this is not safe. Two defs is indicated by setting a 456 // PhysRegClobbers bit. 457 for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) { 458 if (PhysRegDefs.test(*AS)) 459 PhysRegClobbers.set(*AS); 460 if (PhysRegClobbers.test(*AS)) 461 // MI defined register is seen defined by another instruction in 462 // the loop, it cannot be a LICM candidate. 463 RuledOut = true; 464 PhysRegDefs.set(*AS); 465 } 466 } 467 468 // Only consider reloads for now and remats which do not have register 469 // operands. FIXME: Consider unfold load folding instructions. 470 if (Def && !RuledOut) { 471 int FI = INT_MIN; 472 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 473 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 474 Candidates.push_back(CandidateInfo(MI, Def, FI)); 475 } 476 } 477 478 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 479 /// invariants out to the preheader. 480 void MachineLICM::HoistRegionPostRA() { 481 MachineBasicBlock *Preheader = getCurPreheader(); 482 if (!Preheader) 483 return; 484 485 unsigned NumRegs = TRI->getNumRegs(); 486 BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. 487 BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. 488 489 SmallVector<CandidateInfo, 32> Candidates; 490 SmallSet<int, 32> StoredFIs; 491 492 // Walk the entire region, count number of defs for each register, and 493 // collect potential LICM candidates. 494 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 495 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 496 MachineBasicBlock *BB = Blocks[i]; 497 498 // If the header of the loop containing this basic block is a landing pad, 499 // then don't try to hoist instructions out of this loop. 500 const MachineLoop *ML = MLI->getLoopFor(BB); 501 if (ML && ML->getHeader()->isLandingPad()) continue; 502 503 // Conservatively treat live-in's as an external def. 504 // FIXME: That means a reload that're reused in successor block(s) will not 505 // be LICM'ed. 506 for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), 507 E = BB->livein_end(); I != E; ++I) { 508 unsigned Reg = *I; 509 for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) 510 PhysRegDefs.set(*AS); 511 } 512 513 SpeculationState = SpeculateUnknown; 514 for (MachineBasicBlock::iterator 515 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 516 MachineInstr *MI = &*MII; 517 ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); 518 } 519 } 520 521 // Gather the registers read / clobbered by the terminator. 522 BitVector TermRegs(NumRegs); 523 MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); 524 if (TI != Preheader->end()) { 525 for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { 526 const MachineOperand &MO = TI->getOperand(i); 527 if (!MO.isReg()) 528 continue; 529 unsigned Reg = MO.getReg(); 530 if (!Reg) 531 continue; 532 for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) 533 TermRegs.set(*AS); 534 } 535 } 536 537 // Now evaluate whether the potential candidates qualify. 538 // 1. Check if the candidate defined register is defined by another 539 // instruction in the loop. 540 // 2. If the candidate is a load from stack slot (always true for now), 541 // check if the slot is stored anywhere in the loop. 542 // 3. Make sure candidate def should not clobber 543 // registers read by the terminator. Similarly its def should not be 544 // clobbered by the terminator. 545 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 546 if (Candidates[i].FI != INT_MIN && 547 StoredFIs.count(Candidates[i].FI)) 548 continue; 549 550 unsigned Def = Candidates[i].Def; 551 if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { 552 bool Safe = true; 553 MachineInstr *MI = Candidates[i].MI; 554 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 555 const MachineOperand &MO = MI->getOperand(j); 556 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 557 continue; 558 unsigned Reg = MO.getReg(); 559 if (PhysRegDefs.test(Reg) || 560 PhysRegClobbers.test(Reg)) { 561 // If it's using a non-loop-invariant register, then it's obviously 562 // not safe to hoist. 563 Safe = false; 564 break; 565 } 566 } 567 if (Safe) 568 HoistPostRA(MI, Candidates[i].Def); 569 } 570 } 571 } 572 573 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current 574 /// loop, and make sure it is not killed by any instructions in the loop. 575 void MachineLICM::AddToLiveIns(unsigned Reg) { 576 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 577 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 578 MachineBasicBlock *BB = Blocks[i]; 579 if (!BB->isLiveIn(Reg)) 580 BB->addLiveIn(Reg); 581 for (MachineBasicBlock::iterator 582 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 583 MachineInstr *MI = &*MII; 584 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 585 MachineOperand &MO = MI->getOperand(i); 586 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 587 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 588 MO.setIsKill(false); 589 } 590 } 591 } 592 } 593 594 /// HoistPostRA - When an instruction is found to only use loop invariant 595 /// operands that is safe to hoist, this instruction is called to do the 596 /// dirty work. 597 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 598 MachineBasicBlock *Preheader = getCurPreheader(); 599 600 // Now move the instructions to the predecessor, inserting it before any 601 // terminator instructions. 602 DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" 603 << MI->getParent()->getNumber() << ": " << *MI); 604 605 // Splice the instruction to the preheader. 606 MachineBasicBlock *MBB = MI->getParent(); 607 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 608 609 // Add register to livein list to all the BBs in the current loop since a 610 // loop invariant must be kept live throughout the whole loop. This is 611 // important to ensure later passes do not scavenge the def register. 612 AddToLiveIns(Def); 613 614 ++NumPostRAHoisted; 615 Changed = true; 616 } 617 618 // IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 619 // If not then a load from this mbb may not be safe to hoist. 620 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { 621 if (SpeculationState != SpeculateUnknown) 622 return SpeculationState == SpeculateFalse; 623 624 if (BB != CurLoop->getHeader()) { 625 // Check loop exiting blocks. 626 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; 627 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); 628 for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) 629 if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { 630 SpeculationState = SpeculateTrue; 631 return false; 632 } 633 } 634 635 SpeculationState = SpeculateFalse; 636 return true; 637 } 638 639 void MachineLICM::EnterScope(MachineBasicBlock *MBB) { 640 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 641 642 // Remember livein register pressure. 643 BackTrace.push_back(RegPressure); 644 } 645 646 void MachineLICM::ExitScope(MachineBasicBlock *MBB) { 647 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 648 BackTrace.pop_back(); 649 } 650 651 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 652 /// dominator tree node if its a leaf or all of its children are done. Walk 653 /// up the dominator tree to destroy ancestors which are now done. 654 void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, 655 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 656 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 657 if (OpenChildren[Node]) 658 return; 659 660 // Pop scope. 661 ExitScope(Node->getBlock()); 662 663 // Now traverse upwards to pop ancestors whose offsprings are all done. 664 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 665 unsigned Left = --OpenChildren[Parent]; 666 if (Left != 0) 667 break; 668 ExitScope(Parent->getBlock()); 669 Node = Parent; 670 } 671 } 672 673 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 674 /// blocks dominated by the specified header block, and that are in the 675 /// current loop) in depth first order w.r.t the DominatorTree. This allows 676 /// us to visit definitions before uses, allowing us to hoist a loop body in 677 /// one pass without iteration. 678 /// 679 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { 680 SmallVector<MachineDomTreeNode*, 32> Scopes; 681 SmallVector<MachineDomTreeNode*, 8> WorkList; 682 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 683 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 684 685 // Perform a DFS walk to determine the order of visit. 686 WorkList.push_back(HeaderN); 687 do { 688 MachineDomTreeNode *Node = WorkList.pop_back_val(); 689 assert(Node != 0 && "Null dominator tree node?"); 690 MachineBasicBlock *BB = Node->getBlock(); 691 692 // If the header of the loop containing this basic block is a landing pad, 693 // then don't try to hoist instructions out of this loop. 694 const MachineLoop *ML = MLI->getLoopFor(BB); 695 if (ML && ML->getHeader()->isLandingPad()) 696 continue; 697 698 // If this subregion is not in the top level loop at all, exit. 699 if (!CurLoop->contains(BB)) 700 continue; 701 702 Scopes.push_back(Node); 703 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 704 unsigned NumChildren = Children.size(); 705 706 // Don't hoist things out of a large switch statement. This often causes 707 // code to be hoisted that wasn't going to be executed, and increases 708 // register pressure in a situation where it's likely to matter. 709 if (BB->succ_size() >= 25) 710 NumChildren = 0; 711 712 OpenChildren[Node] = NumChildren; 713 // Add children in reverse order as then the next popped worklist node is 714 // the first child of this node. This means we ultimately traverse the 715 // DOM tree in exactly the same order as if we'd recursed. 716 for (int i = (int)NumChildren-1; i >= 0; --i) { 717 MachineDomTreeNode *Child = Children[i]; 718 ParentMap[Child] = Node; 719 WorkList.push_back(Child); 720 } 721 } while (!WorkList.empty()); 722 723 if (Scopes.size() != 0) { 724 MachineBasicBlock *Preheader = getCurPreheader(); 725 if (!Preheader) 726 return; 727 728 // Compute registers which are livein into the loop headers. 729 RegSeen.clear(); 730 BackTrace.clear(); 731 InitRegPressure(Preheader); 732 } 733 734 // Now perform LICM. 735 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 736 MachineDomTreeNode *Node = Scopes[i]; 737 MachineBasicBlock *MBB = Node->getBlock(); 738 739 MachineBasicBlock *Preheader = getCurPreheader(); 740 if (!Preheader) 741 continue; 742 743 EnterScope(MBB); 744 745 // Process the block 746 SpeculationState = SpeculateUnknown; 747 for (MachineBasicBlock::iterator 748 MII = MBB->begin(), E = MBB->end(); MII != E; ) { 749 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 750 MachineInstr *MI = &*MII; 751 if (!Hoist(MI, Preheader)) 752 UpdateRegPressure(MI); 753 MII = NextMII; 754 } 755 756 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 757 ExitScopeIfDone(Node, OpenChildren, ParentMap); 758 } 759 } 760 761 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { 762 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); 763 } 764 765 /// getRegisterClassIDAndCost - For a given MI, register, and the operand 766 /// index, return the ID and cost of its representative register class. 767 void 768 MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, 769 unsigned Reg, unsigned OpIdx, 770 unsigned &RCId, unsigned &RCCost) const { 771 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 772 EVT VT = *RC->vt_begin(); 773 if (VT == MVT::Untyped) { 774 RCId = RC->getID(); 775 RCCost = 1; 776 } else { 777 RCId = TLI->getRepRegClassFor(VT)->getID(); 778 RCCost = TLI->getRepRegClassCostFor(VT); 779 } 780 } 781 782 /// InitRegPressure - Find all virtual register references that are liveout of 783 /// the preheader to initialize the starting "register pressure". Note this 784 /// does not count live through (livein but not used) registers. 785 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { 786 std::fill(RegPressure.begin(), RegPressure.end(), 0); 787 788 // If the preheader has only a single predecessor and it ends with a 789 // fallthrough or an unconditional branch, then scan its predecessor for live 790 // defs as well. This happens whenever the preheader is created by splitting 791 // the critical edge from the loop predecessor to the loop header. 792 if (BB->pred_size() == 1) { 793 MachineBasicBlock *TBB = 0, *FBB = 0; 794 SmallVector<MachineOperand, 4> Cond; 795 if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) 796 InitRegPressure(*BB->pred_begin()); 797 } 798 799 for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); 800 MII != E; ++MII) { 801 MachineInstr *MI = &*MII; 802 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 803 const MachineOperand &MO = MI->getOperand(i); 804 if (!MO.isReg() || MO.isImplicit()) 805 continue; 806 unsigned Reg = MO.getReg(); 807 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 808 continue; 809 810 bool isNew = RegSeen.insert(Reg); 811 unsigned RCId, RCCost; 812 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 813 if (MO.isDef()) 814 RegPressure[RCId] += RCCost; 815 else { 816 bool isKill = isOperandKill(MO, MRI); 817 if (isNew && !isKill) 818 // Haven't seen this, it must be a livein. 819 RegPressure[RCId] += RCCost; 820 else if (!isNew && isKill) 821 RegPressure[RCId] -= RCCost; 822 } 823 } 824 } 825 } 826 827 /// UpdateRegPressure - Update estimate of register pressure after the 828 /// specified instruction. 829 void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { 830 if (MI->isImplicitDef()) 831 return; 832 833 SmallVector<unsigned, 4> Defs; 834 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 835 const MachineOperand &MO = MI->getOperand(i); 836 if (!MO.isReg() || MO.isImplicit()) 837 continue; 838 unsigned Reg = MO.getReg(); 839 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 840 continue; 841 842 bool isNew = RegSeen.insert(Reg); 843 if (MO.isDef()) 844 Defs.push_back(Reg); 845 else if (!isNew && isOperandKill(MO, MRI)) { 846 unsigned RCId, RCCost; 847 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 848 if (RCCost > RegPressure[RCId]) 849 RegPressure[RCId] = 0; 850 else 851 RegPressure[RCId] -= RCCost; 852 } 853 } 854 855 unsigned Idx = 0; 856 while (!Defs.empty()) { 857 unsigned Reg = Defs.pop_back_val(); 858 unsigned RCId, RCCost; 859 getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost); 860 RegPressure[RCId] += RCCost; 861 ++Idx; 862 } 863 } 864 865 /// isLoadFromGOTOrConstantPool - Return true if this machine instruction 866 /// loads from global offset table or constant pool. 867 static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { 868 assert (MI.mayLoad() && "Expected MI that loads!"); 869 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), 870 E = MI.memoperands_end(); I != E; ++I) { 871 if (const Value *V = (*I)->getValue()) { 872 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) 873 if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) 874 return true; 875 } 876 } 877 return false; 878 } 879 880 /// IsLICMCandidate - Returns true if the instruction may be a suitable 881 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously 882 /// not safe to hoist it. 883 bool MachineLICM::IsLICMCandidate(MachineInstr &I) { 884 // Check if it's safe to move the instruction. 885 bool DontMoveAcrossStore = true; 886 if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) 887 return false; 888 889 // If it is load then check if it is guaranteed to execute by making sure that 890 // it dominates all exiting blocks. If it doesn't, then there is a path out of 891 // the loop which does not execute this load, so we can't hoist it. Loads 892 // from constant memory are not safe to speculate all the time, for example 893 // indexed load from a jump table. 894 // Stores and side effects are already checked by isSafeToMove. 895 if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && 896 !IsGuaranteedToExecute(I.getParent())) 897 return false; 898 899 return true; 900 } 901 902 /// IsLoopInvariantInst - Returns true if the instruction is loop 903 /// invariant. I.e., all virtual register operands are defined outside of the 904 /// loop, physical registers aren't accessed explicitly, and there are no side 905 /// effects that aren't captured by the operands or other flags. 906 /// 907 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 908 if (!IsLICMCandidate(I)) 909 return false; 910 911 // The instruction is loop invariant if all of its operands are. 912 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 913 const MachineOperand &MO = I.getOperand(i); 914 915 if (!MO.isReg()) 916 continue; 917 918 unsigned Reg = MO.getReg(); 919 if (Reg == 0) continue; 920 921 // Don't hoist an instruction that uses or defines a physical register. 922 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 923 if (MO.isUse()) { 924 // If the physreg has no defs anywhere, it's just an ambient register 925 // and we can freely move its uses. Alternatively, if it's allocatable, 926 // it could get allocated to something with a def during allocation. 927 if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) 928 return false; 929 // Otherwise it's safe to move. 930 continue; 931 } else if (!MO.isDead()) { 932 // A def that isn't dead. We can't move it. 933 return false; 934 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 935 // If the reg is live into the loop, we can't hoist an instruction 936 // which would clobber it. 937 return false; 938 } 939 } 940 941 if (!MO.isUse()) 942 continue; 943 944 assert(MRI->getVRegDef(Reg) && 945 "Machine instr not mapped for this vreg?!"); 946 947 // If the loop contains the definition of an operand, then the instruction 948 // isn't loop invariant. 949 if (CurLoop->contains(MRI->getVRegDef(Reg))) 950 return false; 951 } 952 953 // If we got this far, the instruction is loop invariant! 954 return true; 955 } 956 957 958 /// HasAnyPHIUse - Return true if the specified register is used by any 959 /// phi node. 960 bool MachineLICM::HasAnyPHIUse(unsigned Reg) const { 961 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), 962 UE = MRI->use_end(); UI != UE; ++UI) { 963 MachineInstr *UseMI = &*UI; 964 if (UseMI->isPHI()) 965 return true; 966 // Look pass copies as well. 967 if (UseMI->isCopy()) { 968 unsigned Def = UseMI->getOperand(0).getReg(); 969 if (TargetRegisterInfo::isVirtualRegister(Def) && 970 HasAnyPHIUse(Def)) 971 return true; 972 } 973 } 974 return false; 975 } 976 977 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 978 /// and an use in the current loop, return true if the target considered 979 /// it 'high'. 980 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, 981 unsigned DefIdx, unsigned Reg) const { 982 if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg)) 983 return false; 984 985 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 986 E = MRI->use_nodbg_end(); I != E; ++I) { 987 MachineInstr *UseMI = &*I; 988 if (UseMI->isCopyLike()) 989 continue; 990 if (!CurLoop->contains(UseMI->getParent())) 991 continue; 992 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 993 const MachineOperand &MO = UseMI->getOperand(i); 994 if (!MO.isReg() || !MO.isUse()) 995 continue; 996 unsigned MOReg = MO.getReg(); 997 if (MOReg != Reg) 998 continue; 999 1000 if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i)) 1001 return true; 1002 } 1003 1004 // Only look at the first in loop use. 1005 break; 1006 } 1007 1008 return false; 1009 } 1010 1011 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or 1012 /// the operand latency between its def and a use is one or less. 1013 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { 1014 if (MI.isAsCheapAsAMove() || MI.isCopyLike()) 1015 return true; 1016 if (!InstrItins || InstrItins->isEmpty()) 1017 return false; 1018 1019 bool isCheap = false; 1020 unsigned NumDefs = MI.getDesc().getNumDefs(); 1021 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 1022 MachineOperand &DefMO = MI.getOperand(i); 1023 if (!DefMO.isReg() || !DefMO.isDef()) 1024 continue; 1025 --NumDefs; 1026 unsigned Reg = DefMO.getReg(); 1027 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1028 continue; 1029 1030 if (!TII->hasLowDefLatency(InstrItins, &MI, i)) 1031 return false; 1032 isCheap = true; 1033 } 1034 1035 return isCheap; 1036 } 1037 1038 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check 1039 /// if hoisting an instruction of the given cost matrix can cause high 1040 /// register pressure. 1041 bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) { 1042 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 1043 CI != CE; ++CI) { 1044 if (CI->second <= 0) 1045 continue; 1046 1047 unsigned RCId = CI->first; 1048 unsigned Limit = RegLimit[RCId]; 1049 int Cost = CI->second; 1050 for (unsigned i = BackTrace.size(); i != 0; --i) { 1051 SmallVector<unsigned, 8> &RP = BackTrace[i-1]; 1052 if (RP[RCId] + Cost >= Limit) 1053 return true; 1054 } 1055 } 1056 1057 return false; 1058 } 1059 1060 /// UpdateBackTraceRegPressure - Traverse the back trace from header to the 1061 /// current block and update their register pressures to reflect the effect 1062 /// of hoisting MI from the current block to the preheader. 1063 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { 1064 if (MI->isImplicitDef()) 1065 return; 1066 1067 // First compute the 'cost' of the instruction, i.e. its contribution 1068 // to register pressure. 1069 DenseMap<unsigned, int> Cost; 1070 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 1071 const MachineOperand &MO = MI->getOperand(i); 1072 if (!MO.isReg() || MO.isImplicit()) 1073 continue; 1074 unsigned Reg = MO.getReg(); 1075 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1076 continue; 1077 1078 unsigned RCId, RCCost; 1079 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 1080 if (MO.isDef()) { 1081 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1082 if (CI != Cost.end()) 1083 CI->second += RCCost; 1084 else 1085 Cost.insert(std::make_pair(RCId, RCCost)); 1086 } else if (isOperandKill(MO, MRI)) { 1087 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1088 if (CI != Cost.end()) 1089 CI->second -= RCCost; 1090 else 1091 Cost.insert(std::make_pair(RCId, -RCCost)); 1092 } 1093 } 1094 1095 // Update register pressure of blocks from loop header to current block. 1096 for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { 1097 SmallVector<unsigned, 8> &RP = BackTrace[i]; 1098 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 1099 CI != CE; ++CI) { 1100 unsigned RCId = CI->first; 1101 RP[RCId] += CI->second; 1102 } 1103 } 1104 } 1105 1106 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist 1107 /// the given loop invariant. 1108 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 1109 if (MI.isImplicitDef()) 1110 return true; 1111 1112 // If the instruction is cheap, only hoist if it is re-materilizable. LICM 1113 // will increase register pressure. It's probably not worth it if the 1114 // instruction is cheap. 1115 // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting 1116 // these tend to help performance in low register pressure situation. The 1117 // trade off is it may cause spill in high pressure situation. It will end up 1118 // adding a store in the loop preheader. But the reload is no more expensive. 1119 // The side benefit is these loads are frequently CSE'ed. 1120 if (IsCheapInstruction(MI)) { 1121 if (!TII->isTriviallyReMaterializable(&MI, AA)) 1122 return false; 1123 } else { 1124 // Estimate register pressure to determine whether to LICM the instruction. 1125 // In low register pressure situation, we can be more aggressive about 1126 // hoisting. Also, favors hoisting long latency instructions even in 1127 // moderately high pressure situation. 1128 // FIXME: If there are long latency loop-invariant instructions inside the 1129 // loop at this point, why didn't the optimizer's LICM hoist them? 1130 DenseMap<unsigned, int> Cost; 1131 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 1132 const MachineOperand &MO = MI.getOperand(i); 1133 if (!MO.isReg() || MO.isImplicit()) 1134 continue; 1135 unsigned Reg = MO.getReg(); 1136 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1137 continue; 1138 1139 unsigned RCId, RCCost; 1140 getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); 1141 if (MO.isDef()) { 1142 if (HasHighOperandLatency(MI, i, Reg)) { 1143 ++NumHighLatency; 1144 return true; 1145 } 1146 1147 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1148 if (CI != Cost.end()) 1149 CI->second += RCCost; 1150 else 1151 Cost.insert(std::make_pair(RCId, RCCost)); 1152 } else if (isOperandKill(MO, MRI)) { 1153 // Is a virtual register use is a kill, hoisting it out of the loop 1154 // may actually reduce register pressure or be register pressure 1155 // neutral. 1156 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 1157 if (CI != Cost.end()) 1158 CI->second -= RCCost; 1159 else 1160 Cost.insert(std::make_pair(RCId, -RCCost)); 1161 } 1162 } 1163 1164 // Visit BBs from header to current BB, if hoisting this doesn't cause 1165 // high register pressure, then it's safe to proceed. 1166 if (!CanCauseHighRegPressure(Cost)) { 1167 ++NumLowRP; 1168 return true; 1169 } 1170 1171 // Do not "speculate" in high register pressure situation. If an 1172 // instruction is not guaranteed to be executed in the loop, it's best to be 1173 // conservative. 1174 if (AvoidSpeculation && 1175 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) 1176 return false; 1177 1178 // High register pressure situation, only hoist if the instruction is going 1179 // to be remat'ed. 1180 if (!TII->isTriviallyReMaterializable(&MI, AA) && 1181 !MI.isInvariantLoad(AA)) 1182 return false; 1183 } 1184 1185 // If result(s) of this instruction is used by PHIs outside of the loop, then 1186 // don't hoist it if the instruction because it will introduce an extra copy. 1187 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 1188 const MachineOperand &MO = MI.getOperand(i); 1189 if (!MO.isReg() || !MO.isDef()) 1190 continue; 1191 if (HasAnyPHIUse(MO.getReg())) 1192 return false; 1193 } 1194 1195 return true; 1196 } 1197 1198 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 1199 // Don't unfold simple loads. 1200 if (MI->canFoldAsLoad()) 1201 return 0; 1202 1203 // If not, we may be able to unfold a load and hoist that. 1204 // First test whether the instruction is loading from an amenable 1205 // memory location. 1206 if (!MI->isInvariantLoad(AA)) 1207 return 0; 1208 1209 // Next determine the register class for a temporary register. 1210 unsigned LoadRegIndex; 1211 unsigned NewOpc = 1212 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 1213 /*UnfoldLoad=*/true, 1214 /*UnfoldStore=*/false, 1215 &LoadRegIndex); 1216 if (NewOpc == 0) return 0; 1217 const MCInstrDesc &MID = TII->get(NewOpc); 1218 if (MID.getNumDefs() != 1) return 0; 1219 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI); 1220 // Ok, we're unfolding. Create a temporary register and do the unfold. 1221 unsigned Reg = MRI->createVirtualRegister(RC); 1222 1223 MachineFunction &MF = *MI->getParent()->getParent(); 1224 SmallVector<MachineInstr *, 2> NewMIs; 1225 bool Success = 1226 TII->unfoldMemoryOperand(MF, MI, Reg, 1227 /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 1228 NewMIs); 1229 (void)Success; 1230 assert(Success && 1231 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 1232 "succeeded!"); 1233 assert(NewMIs.size() == 2 && 1234 "Unfolded a load into multiple instructions!"); 1235 MachineBasicBlock *MBB = MI->getParent(); 1236 MachineBasicBlock::iterator Pos = MI; 1237 MBB->insert(Pos, NewMIs[0]); 1238 MBB->insert(Pos, NewMIs[1]); 1239 // If unfolding produced a load that wasn't loop-invariant or profitable to 1240 // hoist, discard the new instructions and bail. 1241 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 1242 NewMIs[0]->eraseFromParent(); 1243 NewMIs[1]->eraseFromParent(); 1244 return 0; 1245 } 1246 1247 // Update register pressure for the unfolded instruction. 1248 UpdateRegPressure(NewMIs[1]); 1249 1250 // Otherwise we successfully unfolded a load that we can hoist. 1251 MI->eraseFromParent(); 1252 return NewMIs[0]; 1253 } 1254 1255 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 1256 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 1257 const MachineInstr *MI = &*I; 1258 unsigned Opcode = MI->getOpcode(); 1259 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1260 CI = CSEMap.find(Opcode); 1261 if (CI != CSEMap.end()) 1262 CI->second.push_back(MI); 1263 else { 1264 std::vector<const MachineInstr*> CSEMIs; 1265 CSEMIs.push_back(MI); 1266 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 1267 } 1268 } 1269 } 1270 1271 const MachineInstr* 1272 MachineLICM::LookForDuplicate(const MachineInstr *MI, 1273 std::vector<const MachineInstr*> &PrevMIs) { 1274 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 1275 const MachineInstr *PrevMI = PrevMIs[i]; 1276 if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0))) 1277 return PrevMI; 1278 } 1279 return 0; 1280 } 1281 1282 bool MachineLICM::EliminateCSE(MachineInstr *MI, 1283 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 1284 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1285 // the undef property onto uses. 1286 if (CI == CSEMap.end() || MI->isImplicitDef()) 1287 return false; 1288 1289 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 1290 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 1291 1292 // Replace virtual registers defined by MI by their counterparts defined 1293 // by Dup. 1294 SmallVector<unsigned, 2> Defs; 1295 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1296 const MachineOperand &MO = MI->getOperand(i); 1297 1298 // Physical registers may not differ here. 1299 assert((!MO.isReg() || MO.getReg() == 0 || 1300 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 1301 MO.getReg() == Dup->getOperand(i).getReg()) && 1302 "Instructions with different phys regs are not identical!"); 1303 1304 if (MO.isReg() && MO.isDef() && 1305 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 1306 Defs.push_back(i); 1307 } 1308 1309 SmallVector<const TargetRegisterClass*, 2> OrigRCs; 1310 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1311 unsigned Idx = Defs[i]; 1312 unsigned Reg = MI->getOperand(Idx).getReg(); 1313 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1314 OrigRCs.push_back(MRI->getRegClass(DupReg)); 1315 1316 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { 1317 // Restore old RCs if more than one defs. 1318 for (unsigned j = 0; j != i; ++j) 1319 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); 1320 return false; 1321 } 1322 } 1323 1324 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1325 unsigned Idx = Defs[i]; 1326 unsigned Reg = MI->getOperand(Idx).getReg(); 1327 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1328 MRI->replaceRegWith(Reg, DupReg); 1329 MRI->clearKillFlags(DupReg); 1330 } 1331 1332 MI->eraseFromParent(); 1333 ++NumCSEed; 1334 return true; 1335 } 1336 return false; 1337 } 1338 1339 /// MayCSE - Return true if the given instruction will be CSE'd if it's 1340 /// hoisted out of the loop. 1341 bool MachineLICM::MayCSE(MachineInstr *MI) { 1342 unsigned Opcode = MI->getOpcode(); 1343 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1344 CI = CSEMap.find(Opcode); 1345 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1346 // the undef property onto uses. 1347 if (CI == CSEMap.end() || MI->isImplicitDef()) 1348 return false; 1349 1350 return LookForDuplicate(MI, CI->second) != 0; 1351 } 1352 1353 /// Hoist - When an instruction is found to use only loop invariant operands 1354 /// that are safe to hoist, this instruction is called to do the dirty work. 1355 /// 1356 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 1357 // First check whether we should hoist this instruction. 1358 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 1359 // If not, try unfolding a hoistable load. 1360 MI = ExtractHoistableLoad(MI); 1361 if (!MI) return false; 1362 } 1363 1364 // Now move the instructions to the predecessor, inserting it before any 1365 // terminator instructions. 1366 DEBUG({ 1367 dbgs() << "Hoisting " << *MI; 1368 if (Preheader->getBasicBlock()) 1369 dbgs() << " to MachineBasicBlock " 1370 << Preheader->getName(); 1371 if (MI->getParent()->getBasicBlock()) 1372 dbgs() << " from MachineBasicBlock " 1373 << MI->getParent()->getName(); 1374 dbgs() << "\n"; 1375 }); 1376 1377 // If this is the first instruction being hoisted to the preheader, 1378 // initialize the CSE map with potential common expressions. 1379 if (FirstInLoop) { 1380 InitCSEMap(Preheader); 1381 FirstInLoop = false; 1382 } 1383 1384 // Look for opportunity to CSE the hoisted instruction. 1385 unsigned Opcode = MI->getOpcode(); 1386 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1387 CI = CSEMap.find(Opcode); 1388 if (!EliminateCSE(MI, CI)) { 1389 // Otherwise, splice the instruction to the preheader. 1390 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 1391 1392 // Update register pressure for BBs from header to this block. 1393 UpdateBackTraceRegPressure(MI); 1394 1395 // Clear the kill flags of any register this instruction defines, 1396 // since they may need to be live throughout the entire loop 1397 // rather than just live for part of it. 1398 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1399 MachineOperand &MO = MI->getOperand(i); 1400 if (MO.isReg() && MO.isDef() && !MO.isDead()) 1401 MRI->clearKillFlags(MO.getReg()); 1402 } 1403 1404 // Add to the CSE map. 1405 if (CI != CSEMap.end()) 1406 CI->second.push_back(MI); 1407 else { 1408 std::vector<const MachineInstr*> CSEMIs; 1409 CSEMIs.push_back(MI); 1410 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 1411 } 1412 } 1413 1414 ++NumHoisted; 1415 Changed = true; 1416 1417 return true; 1418 } 1419 1420 MachineBasicBlock *MachineLICM::getCurPreheader() { 1421 // Determine the block to which to hoist instructions. If we can't find a 1422 // suitable loop predecessor, we can't do any hoisting. 1423 1424 // If we've tried to get a preheader and failed, don't try again. 1425 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 1426 return 0; 1427 1428 if (!CurPreheader) { 1429 CurPreheader = CurLoop->getLoopPreheader(); 1430 if (!CurPreheader) { 1431 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 1432 if (!Pred) { 1433 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1434 return 0; 1435 } 1436 1437 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); 1438 if (!CurPreheader) { 1439 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1440 return 0; 1441 } 1442 } 1443 } 1444 return CurPreheader; 1445 } 1446