1 //===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the RAGreedy function pass for register allocation in 11 // optimized builds. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "regalloc" 16 #include "llvm/CodeGen/Passes.h" 17 #include "AllocationOrder.h" 18 #include "InterferenceCache.h" 19 #include "LiveDebugVariables.h" 20 #include "RegAllocBase.h" 21 #include "SpillPlacement.h" 22 #include "Spiller.h" 23 #include "SplitKit.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/Analysis/AliasAnalysis.h" 26 #include "llvm/CodeGen/CalcSpillWeights.h" 27 #include "llvm/CodeGen/EdgeBundles.h" 28 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 29 #include "llvm/CodeGen/LiveRangeEdit.h" 30 #include "llvm/CodeGen/LiveRegMatrix.h" 31 #include "llvm/CodeGen/LiveStackAnalysis.h" 32 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 33 #include "llvm/CodeGen/MachineDominators.h" 34 #include "llvm/CodeGen/MachineFunctionPass.h" 35 #include "llvm/CodeGen/MachineLoopInfo.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/CodeGen/RegAllocRegistry.h" 38 #include "llvm/CodeGen/RegisterClassInfo.h" 39 #include "llvm/CodeGen/VirtRegMap.h" 40 #include "llvm/PassAnalysisSupport.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/Timer.h" 45 #include "llvm/Support/raw_ostream.h" 46 #include <queue> 47 48 using namespace llvm; 49 50 STATISTIC(NumGlobalSplits, "Number of split global live ranges"); 51 STATISTIC(NumLocalSplits, "Number of split local live ranges"); 52 STATISTIC(NumEvicted, "Number of interferences evicted"); 53 54 static cl::opt<SplitEditor::ComplementSpillMode> 55 SplitSpillMode("split-spill-mode", cl::Hidden, 56 cl::desc("Spill mode for splitting live ranges"), 57 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), 58 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), 59 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), 60 clEnumValEnd), 61 cl::init(SplitEditor::SM_Partition)); 62 63 static cl::opt<unsigned> 64 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, 65 cl::desc("Last chance recoloring max depth"), 66 cl::init(5)); 67 68 static cl::opt<unsigned> LastChanceRecoloringMaxInterference( 69 "lcr-max-interf", cl::Hidden, 70 cl::desc("Last chance recoloring maximum number of considered" 71 " interference at a time"), 72 cl::init(8)); 73 74 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", 75 createGreedyRegisterAllocator); 76 77 namespace { 78 class RAGreedy : public MachineFunctionPass, 79 public RegAllocBase, 80 private LiveRangeEdit::Delegate { 81 // Convenient shortcuts. 82 typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue; 83 typedef SmallPtrSet<LiveInterval *, 4> SmallLISet; 84 typedef SmallSet<unsigned, 16> SmallVirtRegSet; 85 86 // context 87 MachineFunction *MF; 88 89 // Shortcuts to some useful interface. 90 const TargetInstrInfo *TII; 91 const TargetRegisterInfo *TRI; 92 RegisterClassInfo RCI; 93 94 // analyses 95 SlotIndexes *Indexes; 96 MachineBlockFrequencyInfo *MBFI; 97 MachineDominatorTree *DomTree; 98 MachineLoopInfo *Loops; 99 EdgeBundles *Bundles; 100 SpillPlacement *SpillPlacer; 101 LiveDebugVariables *DebugVars; 102 103 // state 104 OwningPtr<Spiller> SpillerInstance; 105 PQueue Queue; 106 unsigned NextCascade; 107 108 // Live ranges pass through a number of stages as we try to allocate them. 109 // Some of the stages may also create new live ranges: 110 // 111 // - Region splitting. 112 // - Per-block splitting. 113 // - Local splitting. 114 // - Spilling. 115 // 116 // Ranges produced by one of the stages skip the previous stages when they are 117 // dequeued. This improves performance because we can skip interference checks 118 // that are unlikely to give any results. It also guarantees that the live 119 // range splitting algorithm terminates, something that is otherwise hard to 120 // ensure. 121 enum LiveRangeStage { 122 /// Newly created live range that has never been queued. 123 RS_New, 124 125 /// Only attempt assignment and eviction. Then requeue as RS_Split. 126 RS_Assign, 127 128 /// Attempt live range splitting if assignment is impossible. 129 RS_Split, 130 131 /// Attempt more aggressive live range splitting that is guaranteed to make 132 /// progress. This is used for split products that may not be making 133 /// progress. 134 RS_Split2, 135 136 /// Live range will be spilled. No more splitting will be attempted. 137 RS_Spill, 138 139 /// There is nothing more we can do to this live range. Abort compilation 140 /// if it can't be assigned. 141 RS_Done 142 }; 143 144 #ifndef NDEBUG 145 static const char *const StageName[]; 146 #endif 147 148 // RegInfo - Keep additional information about each live range. 149 struct RegInfo { 150 LiveRangeStage Stage; 151 152 // Cascade - Eviction loop prevention. See canEvictInterference(). 153 unsigned Cascade; 154 155 RegInfo() : Stage(RS_New), Cascade(0) {} 156 }; 157 158 IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; 159 160 LiveRangeStage getStage(const LiveInterval &VirtReg) const { 161 return ExtraRegInfo[VirtReg.reg].Stage; 162 } 163 164 void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { 165 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 166 ExtraRegInfo[VirtReg.reg].Stage = Stage; 167 } 168 169 template<typename Iterator> 170 void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { 171 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 172 for (;Begin != End; ++Begin) { 173 unsigned Reg = *Begin; 174 if (ExtraRegInfo[Reg].Stage == RS_New) 175 ExtraRegInfo[Reg].Stage = NewStage; 176 } 177 } 178 179 /// Cost of evicting interference. 180 struct EvictionCost { 181 unsigned BrokenHints; ///< Total number of broken hints. 182 float MaxWeight; ///< Maximum spill weight evicted. 183 184 EvictionCost(): BrokenHints(0), MaxWeight(0) {} 185 186 bool isMax() const { return BrokenHints == ~0u; } 187 188 void setMax() { BrokenHints = ~0u; } 189 190 void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } 191 192 bool operator<(const EvictionCost &O) const { 193 if (BrokenHints != O.BrokenHints) 194 return BrokenHints < O.BrokenHints; 195 return MaxWeight < O.MaxWeight; 196 } 197 }; 198 199 // splitting state. 200 OwningPtr<SplitAnalysis> SA; 201 OwningPtr<SplitEditor> SE; 202 203 /// Cached per-block interference maps 204 InterferenceCache IntfCache; 205 206 /// All basic blocks where the current register has uses. 207 SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; 208 209 /// Global live range splitting candidate info. 210 struct GlobalSplitCandidate { 211 // Register intended for assignment, or 0. 212 unsigned PhysReg; 213 214 // SplitKit interval index for this candidate. 215 unsigned IntvIdx; 216 217 // Interference for PhysReg. 218 InterferenceCache::Cursor Intf; 219 220 // Bundles where this candidate should be live. 221 BitVector LiveBundles; 222 SmallVector<unsigned, 8> ActiveBlocks; 223 224 void reset(InterferenceCache &Cache, unsigned Reg) { 225 PhysReg = Reg; 226 IntvIdx = 0; 227 Intf.setPhysReg(Cache, Reg); 228 LiveBundles.clear(); 229 ActiveBlocks.clear(); 230 } 231 232 // Set B[i] = C for every live bundle where B[i] was NoCand. 233 unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { 234 unsigned Count = 0; 235 for (int i = LiveBundles.find_first(); i >= 0; 236 i = LiveBundles.find_next(i)) 237 if (B[i] == NoCand) { 238 B[i] = C; 239 Count++; 240 } 241 return Count; 242 } 243 }; 244 245 /// Candidate info for each PhysReg in AllocationOrder. 246 /// This vector never shrinks, but grows to the size of the largest register 247 /// class. 248 SmallVector<GlobalSplitCandidate, 32> GlobalCand; 249 250 enum LLVM_ENUM_INT_TYPE(unsigned) { NoCand = ~0u }; 251 252 /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to 253 /// NoCand which indicates the stack interval. 254 SmallVector<unsigned, 32> BundleCand; 255 256 public: 257 RAGreedy(); 258 259 /// Return the pass name. 260 virtual const char* getPassName() const { 261 return "Greedy Register Allocator"; 262 } 263 264 /// RAGreedy analysis usage. 265 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 266 virtual void releaseMemory(); 267 virtual Spiller &spiller() { return *SpillerInstance; } 268 virtual void enqueue(LiveInterval *LI); 269 virtual LiveInterval *dequeue(); 270 virtual unsigned selectOrSplit(LiveInterval&, 271 SmallVectorImpl<unsigned>&); 272 273 /// Perform register allocation. 274 virtual bool runOnMachineFunction(MachineFunction &mf); 275 276 static char ID; 277 278 private: 279 unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &, 280 SmallVirtRegSet &, unsigned = 0); 281 282 bool LRE_CanEraseVirtReg(unsigned); 283 void LRE_WillShrinkVirtReg(unsigned); 284 void LRE_DidCloneVirtReg(unsigned, unsigned); 285 void enqueue(PQueue &CurQueue, LiveInterval *LI); 286 LiveInterval *dequeue(PQueue &CurQueue); 287 288 BlockFrequency calcSpillCost(); 289 bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); 290 void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); 291 void growRegion(GlobalSplitCandidate &Cand); 292 BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); 293 bool calcCompactRegion(GlobalSplitCandidate&); 294 void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); 295 void calcGapWeights(unsigned, SmallVectorImpl<float>&); 296 unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); 297 bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); 298 bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); 299 void evictInterference(LiveInterval&, unsigned, 300 SmallVectorImpl<unsigned>&); 301 bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, 302 SmallLISet &RecoloringCandidates, 303 const SmallVirtRegSet &FixedRegisters); 304 305 unsigned tryAssign(LiveInterval&, AllocationOrder&, 306 SmallVectorImpl<unsigned>&); 307 unsigned tryEvict(LiveInterval&, AllocationOrder&, 308 SmallVectorImpl<unsigned>&, unsigned = ~0u); 309 unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, 310 SmallVectorImpl<unsigned>&); 311 unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, 312 SmallVectorImpl<unsigned>&); 313 unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, 314 SmallVectorImpl<unsigned>&); 315 unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, 316 SmallVectorImpl<unsigned>&); 317 unsigned trySplit(LiveInterval&, AllocationOrder&, 318 SmallVectorImpl<unsigned>&); 319 unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, 320 SmallVectorImpl<unsigned> &, 321 SmallVirtRegSet &, unsigned); 322 bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, 323 SmallVirtRegSet &, unsigned); 324 }; 325 } // end anonymous namespace 326 327 char RAGreedy::ID = 0; 328 329 #ifndef NDEBUG 330 const char *const RAGreedy::StageName[] = { 331 "RS_New", 332 "RS_Assign", 333 "RS_Split", 334 "RS_Split2", 335 "RS_Spill", 336 "RS_Done" 337 }; 338 #endif 339 340 // Hysteresis to use when comparing floats. 341 // This helps stabilize decisions based on float comparisons. 342 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 343 344 345 FunctionPass* llvm::createGreedyRegisterAllocator() { 346 return new RAGreedy(); 347 } 348 349 RAGreedy::RAGreedy(): MachineFunctionPass(ID) { 350 initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); 351 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 352 initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); 353 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 354 initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); 355 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 356 initializeLiveStacksPass(*PassRegistry::getPassRegistry()); 357 initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); 358 initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); 359 initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); 360 initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); 361 initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); 362 initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); 363 } 364 365 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { 366 AU.setPreservesCFG(); 367 AU.addRequired<MachineBlockFrequencyInfo>(); 368 AU.addPreserved<MachineBlockFrequencyInfo>(); 369 AU.addRequired<AliasAnalysis>(); 370 AU.addPreserved<AliasAnalysis>(); 371 AU.addRequired<LiveIntervals>(); 372 AU.addPreserved<LiveIntervals>(); 373 AU.addRequired<SlotIndexes>(); 374 AU.addPreserved<SlotIndexes>(); 375 AU.addRequired<LiveDebugVariables>(); 376 AU.addPreserved<LiveDebugVariables>(); 377 AU.addRequired<LiveStacks>(); 378 AU.addPreserved<LiveStacks>(); 379 AU.addRequired<MachineDominatorTree>(); 380 AU.addPreserved<MachineDominatorTree>(); 381 AU.addRequired<MachineLoopInfo>(); 382 AU.addPreserved<MachineLoopInfo>(); 383 AU.addRequired<VirtRegMap>(); 384 AU.addPreserved<VirtRegMap>(); 385 AU.addRequired<LiveRegMatrix>(); 386 AU.addPreserved<LiveRegMatrix>(); 387 AU.addRequired<EdgeBundles>(); 388 AU.addRequired<SpillPlacement>(); 389 MachineFunctionPass::getAnalysisUsage(AU); 390 } 391 392 393 //===----------------------------------------------------------------------===// 394 // LiveRangeEdit delegate methods 395 //===----------------------------------------------------------------------===// 396 397 bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { 398 if (VRM->hasPhys(VirtReg)) { 399 Matrix->unassign(LIS->getInterval(VirtReg)); 400 return true; 401 } 402 // Unassigned virtreg is probably in the priority queue. 403 // RegAllocBase will erase it after dequeueing. 404 return false; 405 } 406 407 void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { 408 if (!VRM->hasPhys(VirtReg)) 409 return; 410 411 // Register is assigned, put it back on the queue for reassignment. 412 LiveInterval &LI = LIS->getInterval(VirtReg); 413 Matrix->unassign(LI); 414 enqueue(&LI); 415 } 416 417 void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { 418 // Cloning a register we haven't even heard about yet? Just ignore it. 419 if (!ExtraRegInfo.inBounds(Old)) 420 return; 421 422 // LRE may clone a virtual register because dead code elimination causes it to 423 // be split into connected components. The new components are much smaller 424 // than the original, so they should get a new chance at being assigned. 425 // same stage as the parent. 426 ExtraRegInfo[Old].Stage = RS_Assign; 427 ExtraRegInfo.grow(New); 428 ExtraRegInfo[New] = ExtraRegInfo[Old]; 429 } 430 431 void RAGreedy::releaseMemory() { 432 SpillerInstance.reset(0); 433 ExtraRegInfo.clear(); 434 GlobalCand.clear(); 435 } 436 437 void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } 438 439 void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { 440 // Prioritize live ranges by size, assigning larger ranges first. 441 // The queue holds (size, reg) pairs. 442 const unsigned Size = LI->getSize(); 443 const unsigned Reg = LI->reg; 444 assert(TargetRegisterInfo::isVirtualRegister(Reg) && 445 "Can only enqueue virtual registers"); 446 unsigned Prio; 447 448 ExtraRegInfo.grow(Reg); 449 if (ExtraRegInfo[Reg].Stage == RS_New) 450 ExtraRegInfo[Reg].Stage = RS_Assign; 451 452 if (ExtraRegInfo[Reg].Stage == RS_Split) { 453 // Unsplit ranges that couldn't be allocated immediately are deferred until 454 // everything else has been allocated. 455 Prio = Size; 456 } else { 457 if (ExtraRegInfo[Reg].Stage == RS_Assign && !LI->empty() && 458 LIS->intervalIsInOneMBB(*LI)) { 459 // Allocate original local ranges in linear instruction order. Since they 460 // are singly defined, this produces optimal coloring in the absence of 461 // global interference and other constraints. 462 if (!TRI->reverseLocalAssignment()) 463 Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); 464 else { 465 // Allocating bottom up may allow many short LRGs to be assigned first 466 // to one of the cheap registers. This could be much faster for very 467 // large blocks on targets with many physical registers. 468 Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex()); 469 } 470 } 471 else { 472 // Allocate global and split ranges in long->short order. Long ranges that 473 // don't fit should be spilled (or split) ASAP so they don't create 474 // interference. Mark a bit to prioritize global above local ranges. 475 Prio = (1u << 29) + Size; 476 } 477 // Mark a higher bit to prioritize global and local above RS_Split. 478 Prio |= (1u << 31); 479 480 // Boost ranges that have a physical register hint. 481 if (VRM->hasKnownPreference(Reg)) 482 Prio |= (1u << 30); 483 } 484 // The virtual register number is a tie breaker for same-sized ranges. 485 // Give lower vreg numbers higher priority to assign them first. 486 CurQueue.push(std::make_pair(Prio, ~Reg)); 487 } 488 489 LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } 490 491 LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { 492 if (CurQueue.empty()) 493 return 0; 494 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); 495 CurQueue.pop(); 496 return LI; 497 } 498 499 500 //===----------------------------------------------------------------------===// 501 // Direct Assignment 502 //===----------------------------------------------------------------------===// 503 504 /// tryAssign - Try to assign VirtReg to an available register. 505 unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, 506 AllocationOrder &Order, 507 SmallVectorImpl<unsigned> &NewVRegs) { 508 Order.rewind(); 509 unsigned PhysReg; 510 while ((PhysReg = Order.next())) 511 if (!Matrix->checkInterference(VirtReg, PhysReg)) 512 break; 513 if (!PhysReg || Order.isHint()) 514 return PhysReg; 515 516 // PhysReg is available, but there may be a better choice. 517 518 // If we missed a simple hint, try to cheaply evict interference from the 519 // preferred register. 520 if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) 521 if (Order.isHint(Hint)) { 522 DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); 523 EvictionCost MaxCost; 524 MaxCost.setBrokenHints(1); 525 if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { 526 evictInterference(VirtReg, Hint, NewVRegs); 527 return Hint; 528 } 529 } 530 531 // Try to evict interference from a cheaper alternative. 532 unsigned Cost = TRI->getCostPerUse(PhysReg); 533 534 // Most registers have 0 additional cost. 535 if (!Cost) 536 return PhysReg; 537 538 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost 539 << '\n'); 540 unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); 541 return CheapReg ? CheapReg : PhysReg; 542 } 543 544 545 //===----------------------------------------------------------------------===// 546 // Interference eviction 547 //===----------------------------------------------------------------------===// 548 549 unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { 550 AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); 551 unsigned PhysReg; 552 while ((PhysReg = Order.next())) { 553 if (PhysReg == PrevReg) 554 continue; 555 556 MCRegUnitIterator Units(PhysReg, TRI); 557 for (; Units.isValid(); ++Units) { 558 // Instantiate a "subquery", not to be confused with the Queries array. 559 LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]); 560 if (subQ.checkInterference()) 561 break; 562 } 563 // If no units have interference, break out with the current PhysReg. 564 if (!Units.isValid()) 565 break; 566 } 567 if (PhysReg) 568 DEBUG(dbgs() << "can reassign: " << VirtReg << " from " 569 << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) 570 << '\n'); 571 return PhysReg; 572 } 573 574 /// shouldEvict - determine if A should evict the assigned live range B. The 575 /// eviction policy defined by this function together with the allocation order 576 /// defined by enqueue() decides which registers ultimately end up being split 577 /// and spilled. 578 /// 579 /// Cascade numbers are used to prevent infinite loops if this function is a 580 /// cyclic relation. 581 /// 582 /// @param A The live range to be assigned. 583 /// @param IsHint True when A is about to be assigned to its preferred 584 /// register. 585 /// @param B The live range to be evicted. 586 /// @param BreaksHint True when B is already assigned to its preferred register. 587 bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, 588 LiveInterval &B, bool BreaksHint) { 589 bool CanSplit = getStage(B) < RS_Spill; 590 591 // Be fairly aggressive about following hints as long as the evictee can be 592 // split. 593 if (CanSplit && IsHint && !BreaksHint) 594 return true; 595 596 if (A.weight > B.weight) { 597 DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); 598 return true; 599 } 600 return false; 601 } 602 603 /// canEvictInterference - Return true if all interferences between VirtReg and 604 /// PhysReg can be evicted. When OnlyCheap is set, don't do anything 605 /// 606 /// @param VirtReg Live range that is about to be assigned. 607 /// @param PhysReg Desired register for assignment. 608 /// @param IsHint True when PhysReg is VirtReg's preferred register. 609 /// @param MaxCost Only look for cheaper candidates and update with new cost 610 /// when returning true. 611 /// @returns True when interference can be evicted cheaper than MaxCost. 612 bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, 613 bool IsHint, EvictionCost &MaxCost) { 614 // It is only possible to evict virtual register interference. 615 if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) 616 return false; 617 618 bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); 619 620 // Find VirtReg's cascade number. This will be unassigned if VirtReg was never 621 // involved in an eviction before. If a cascade number was assigned, deny 622 // evicting anything with the same or a newer cascade number. This prevents 623 // infinite eviction loops. 624 // 625 // This works out so a register without a cascade number is allowed to evict 626 // anything, and it can be evicted by anything. 627 unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; 628 if (!Cascade) 629 Cascade = NextCascade; 630 631 EvictionCost Cost; 632 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 633 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 634 // If there is 10 or more interferences, chances are one is heavier. 635 if (Q.collectInterferingVRegs(10) >= 10) 636 return false; 637 638 // Check if any interfering live range is heavier than MaxWeight. 639 for (unsigned i = Q.interferingVRegs().size(); i; --i) { 640 LiveInterval *Intf = Q.interferingVRegs()[i - 1]; 641 assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && 642 "Only expecting virtual register interference from query"); 643 // Never evict spill products. They cannot split or spill. 644 if (getStage(*Intf) == RS_Done) 645 return false; 646 // Once a live range becomes small enough, it is urgent that we find a 647 // register for it. This is indicated by an infinite spill weight. These 648 // urgent live ranges get to evict almost anything. 649 // 650 // Also allow urgent evictions of unspillable ranges from a strictly 651 // larger allocation order. 652 bool Urgent = !VirtReg.isSpillable() && 653 (Intf->isSpillable() || 654 RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < 655 RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); 656 // Only evict older cascades or live ranges without a cascade. 657 unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; 658 if (Cascade <= IntfCascade) { 659 if (!Urgent) 660 return false; 661 // We permit breaking cascades for urgent evictions. It should be the 662 // last resort, though, so make it really expensive. 663 Cost.BrokenHints += 10; 664 } 665 // Would this break a satisfied hint? 666 bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); 667 // Update eviction cost. 668 Cost.BrokenHints += BreaksHint; 669 Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); 670 // Abort if this would be too expensive. 671 if (!(Cost < MaxCost)) 672 return false; 673 if (Urgent) 674 continue; 675 // Apply the eviction policy for non-urgent evictions. 676 if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) 677 return false; 678 // If !MaxCost.isMax(), then we're just looking for a cheap register. 679 // Evicting another local live range in this case could lead to suboptimal 680 // coloring. 681 if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && 682 !canReassign(*Intf, PhysReg)) { 683 return false; 684 } 685 } 686 } 687 MaxCost = Cost; 688 return true; 689 } 690 691 /// evictInterference - Evict any interferring registers that prevent VirtReg 692 /// from being assigned to Physreg. This assumes that canEvictInterference 693 /// returned true. 694 void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, 695 SmallVectorImpl<unsigned> &NewVRegs) { 696 // Make sure that VirtReg has a cascade number, and assign that cascade 697 // number to every evicted register. These live ranges than then only be 698 // evicted by a newer cascade, preventing infinite loops. 699 unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; 700 if (!Cascade) 701 Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; 702 703 DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) 704 << " interference: Cascade " << Cascade << '\n'); 705 706 // Collect all interfering virtregs first. 707 SmallVector<LiveInterval*, 8> Intfs; 708 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 709 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 710 assert(Q.seenAllInterferences() && "Didn't check all interfererences."); 711 ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); 712 Intfs.append(IVR.begin(), IVR.end()); 713 } 714 715 // Evict them second. This will invalidate the queries. 716 for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { 717 LiveInterval *Intf = Intfs[i]; 718 // The same VirtReg may be present in multiple RegUnits. Skip duplicates. 719 if (!VRM->hasPhys(Intf->reg)) 720 continue; 721 Matrix->unassign(*Intf); 722 assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || 723 VirtReg.isSpillable() < Intf->isSpillable()) && 724 "Cannot decrease cascade number, illegal eviction"); 725 ExtraRegInfo[Intf->reg].Cascade = Cascade; 726 ++NumEvicted; 727 NewVRegs.push_back(Intf->reg); 728 } 729 } 730 731 /// tryEvict - Try to evict all interferences for a physreg. 732 /// @param VirtReg Currently unassigned virtual register. 733 /// @param Order Physregs to try. 734 /// @return Physreg to assign VirtReg, or 0. 735 unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, 736 AllocationOrder &Order, 737 SmallVectorImpl<unsigned> &NewVRegs, 738 unsigned CostPerUseLimit) { 739 NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); 740 741 // Keep track of the cheapest interference seen so far. 742 EvictionCost BestCost; 743 BestCost.setMax(); 744 unsigned BestPhys = 0; 745 unsigned OrderLimit = Order.getOrder().size(); 746 747 // When we are just looking for a reduced cost per use, don't break any 748 // hints, and only evict smaller spill weights. 749 if (CostPerUseLimit < ~0u) { 750 BestCost.BrokenHints = 0; 751 BestCost.MaxWeight = VirtReg.weight; 752 753 // Check of any registers in RC are below CostPerUseLimit. 754 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); 755 unsigned MinCost = RegClassInfo.getMinCost(RC); 756 if (MinCost >= CostPerUseLimit) { 757 DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost 758 << ", no cheaper registers to be found.\n"); 759 return 0; 760 } 761 762 // It is normal for register classes to have a long tail of registers with 763 // the same cost. We don't need to look at them if they're too expensive. 764 if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { 765 OrderLimit = RegClassInfo.getLastCostChange(RC); 766 DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); 767 } 768 } 769 770 Order.rewind(); 771 while (unsigned PhysReg = Order.next(OrderLimit)) { 772 if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) 773 continue; 774 // The first use of a callee-saved register in a function has cost 1. 775 // Don't start using a CSR when the CostPerUseLimit is low. 776 if (CostPerUseLimit == 1) 777 if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) 778 if (!MRI->isPhysRegUsed(CSR)) { 779 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " 780 << PrintReg(CSR, TRI) << '\n'); 781 continue; 782 } 783 784 if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) 785 continue; 786 787 // Best so far. 788 BestPhys = PhysReg; 789 790 // Stop if the hint can be used. 791 if (Order.isHint()) 792 break; 793 } 794 795 if (!BestPhys) 796 return 0; 797 798 evictInterference(VirtReg, BestPhys, NewVRegs); 799 return BestPhys; 800 } 801 802 803 //===----------------------------------------------------------------------===// 804 // Region Splitting 805 //===----------------------------------------------------------------------===// 806 807 /// addSplitConstraints - Fill out the SplitConstraints vector based on the 808 /// interference pattern in Physreg and its aliases. Add the constraints to 809 /// SpillPlacement and return the static cost of this split in Cost, assuming 810 /// that all preferences in SplitConstraints are met. 811 /// Return false if there are no bundles with positive bias. 812 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, 813 BlockFrequency &Cost) { 814 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 815 816 // Reset interference dependent info. 817 SplitConstraints.resize(UseBlocks.size()); 818 BlockFrequency StaticCost = 0; 819 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 820 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 821 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 822 823 BC.Number = BI.MBB->getNumber(); 824 Intf.moveToBlock(BC.Number); 825 BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 826 BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 827 BC.ChangesValue = BI.FirstDef.isValid(); 828 829 if (!Intf.hasInterference()) 830 continue; 831 832 // Number of spill code instructions to insert. 833 unsigned Ins = 0; 834 835 // Interference for the live-in value. 836 if (BI.LiveIn) { 837 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) 838 BC.Entry = SpillPlacement::MustSpill, ++Ins; 839 else if (Intf.first() < BI.FirstInstr) 840 BC.Entry = SpillPlacement::PrefSpill, ++Ins; 841 else if (Intf.first() < BI.LastInstr) 842 ++Ins; 843 } 844 845 // Interference for the live-out value. 846 if (BI.LiveOut) { 847 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) 848 BC.Exit = SpillPlacement::MustSpill, ++Ins; 849 else if (Intf.last() > BI.LastInstr) 850 BC.Exit = SpillPlacement::PrefSpill, ++Ins; 851 else if (Intf.last() > BI.FirstInstr) 852 ++Ins; 853 } 854 855 // Accumulate the total frequency of inserted spill code. 856 while (Ins--) 857 StaticCost += SpillPlacer->getBlockFrequency(BC.Number); 858 } 859 Cost = StaticCost; 860 861 // Add constraints for use-blocks. Note that these are the only constraints 862 // that may add a positive bias, it is downhill from here. 863 SpillPlacer->addConstraints(SplitConstraints); 864 return SpillPlacer->scanActiveBundles(); 865 } 866 867 868 /// addThroughConstraints - Add constraints and links to SpillPlacer from the 869 /// live-through blocks in Blocks. 870 void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, 871 ArrayRef<unsigned> Blocks) { 872 const unsigned GroupSize = 8; 873 SpillPlacement::BlockConstraint BCS[GroupSize]; 874 unsigned TBS[GroupSize]; 875 unsigned B = 0, T = 0; 876 877 for (unsigned i = 0; i != Blocks.size(); ++i) { 878 unsigned Number = Blocks[i]; 879 Intf.moveToBlock(Number); 880 881 if (!Intf.hasInterference()) { 882 assert(T < GroupSize && "Array overflow"); 883 TBS[T] = Number; 884 if (++T == GroupSize) { 885 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 886 T = 0; 887 } 888 continue; 889 } 890 891 assert(B < GroupSize && "Array overflow"); 892 BCS[B].Number = Number; 893 894 // Interference for the live-in value. 895 if (Intf.first() <= Indexes->getMBBStartIdx(Number)) 896 BCS[B].Entry = SpillPlacement::MustSpill; 897 else 898 BCS[B].Entry = SpillPlacement::PrefSpill; 899 900 // Interference for the live-out value. 901 if (Intf.last() >= SA->getLastSplitPoint(Number)) 902 BCS[B].Exit = SpillPlacement::MustSpill; 903 else 904 BCS[B].Exit = SpillPlacement::PrefSpill; 905 906 if (++B == GroupSize) { 907 ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); 908 SpillPlacer->addConstraints(Array); 909 B = 0; 910 } 911 } 912 913 ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); 914 SpillPlacer->addConstraints(Array); 915 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 916 } 917 918 void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { 919 // Keep track of through blocks that have not been added to SpillPlacer. 920 BitVector Todo = SA->getThroughBlocks(); 921 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; 922 unsigned AddedTo = 0; 923 #ifndef NDEBUG 924 unsigned Visited = 0; 925 #endif 926 927 for (;;) { 928 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); 929 // Find new through blocks in the periphery of PrefRegBundles. 930 for (int i = 0, e = NewBundles.size(); i != e; ++i) { 931 unsigned Bundle = NewBundles[i]; 932 // Look at all blocks connected to Bundle in the full graph. 933 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); 934 for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); 935 I != E; ++I) { 936 unsigned Block = *I; 937 if (!Todo.test(Block)) 938 continue; 939 Todo.reset(Block); 940 // This is a new through block. Add it to SpillPlacer later. 941 ActiveBlocks.push_back(Block); 942 #ifndef NDEBUG 943 ++Visited; 944 #endif 945 } 946 } 947 // Any new blocks to add? 948 if (ActiveBlocks.size() == AddedTo) 949 break; 950 951 // Compute through constraints from the interference, or assume that all 952 // through blocks prefer spilling when forming compact regions. 953 ArrayRef<unsigned> NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); 954 if (Cand.PhysReg) 955 addThroughConstraints(Cand.Intf, NewBlocks); 956 else 957 // Provide a strong negative bias on through blocks to prevent unwanted 958 // liveness on loop backedges. 959 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); 960 AddedTo = ActiveBlocks.size(); 961 962 // Perhaps iterating can enable more bundles? 963 SpillPlacer->iterate(); 964 } 965 DEBUG(dbgs() << ", v=" << Visited); 966 } 967 968 /// calcCompactRegion - Compute the set of edge bundles that should be live 969 /// when splitting the current live range into compact regions. Compact 970 /// regions can be computed without looking at interference. They are the 971 /// regions formed by removing all the live-through blocks from the live range. 972 /// 973 /// Returns false if the current live range is already compact, or if the 974 /// compact regions would form single block regions anyway. 975 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { 976 // Without any through blocks, the live range is already compact. 977 if (!SA->getNumThroughBlocks()) 978 return false; 979 980 // Compact regions don't correspond to any physreg. 981 Cand.reset(IntfCache, 0); 982 983 DEBUG(dbgs() << "Compact region bundles"); 984 985 // Use the spill placer to determine the live bundles. GrowRegion pretends 986 // that all the through blocks have interference when PhysReg is unset. 987 SpillPlacer->prepare(Cand.LiveBundles); 988 989 // The static split cost will be zero since Cand.Intf reports no interference. 990 BlockFrequency Cost; 991 if (!addSplitConstraints(Cand.Intf, Cost)) { 992 DEBUG(dbgs() << ", none.\n"); 993 return false; 994 } 995 996 growRegion(Cand); 997 SpillPlacer->finish(); 998 999 if (!Cand.LiveBundles.any()) { 1000 DEBUG(dbgs() << ", none.\n"); 1001 return false; 1002 } 1003 1004 DEBUG({ 1005 for (int i = Cand.LiveBundles.find_first(); i>=0; 1006 i = Cand.LiveBundles.find_next(i)) 1007 dbgs() << " EB#" << i; 1008 dbgs() << ".\n"; 1009 }); 1010 return true; 1011 } 1012 1013 /// calcSpillCost - Compute how expensive it would be to split the live range in 1014 /// SA around all use blocks instead of forming bundle regions. 1015 BlockFrequency RAGreedy::calcSpillCost() { 1016 BlockFrequency Cost = 0; 1017 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1018 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1019 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1020 unsigned Number = BI.MBB->getNumber(); 1021 // We normally only need one spill instruction - a load or a store. 1022 Cost += SpillPlacer->getBlockFrequency(Number); 1023 1024 // Unless the value is redefined in the block. 1025 if (BI.LiveIn && BI.LiveOut && BI.FirstDef) 1026 Cost += SpillPlacer->getBlockFrequency(Number); 1027 } 1028 return Cost; 1029 } 1030 1031 /// calcGlobalSplitCost - Return the global split cost of following the split 1032 /// pattern in LiveBundles. This cost should be added to the local cost of the 1033 /// interference pattern in SplitConstraints. 1034 /// 1035 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { 1036 BlockFrequency GlobalCost = 0; 1037 const BitVector &LiveBundles = Cand.LiveBundles; 1038 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1039 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1040 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1041 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 1042 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; 1043 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; 1044 unsigned Ins = 0; 1045 1046 if (BI.LiveIn) 1047 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); 1048 if (BI.LiveOut) 1049 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); 1050 while (Ins--) 1051 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); 1052 } 1053 1054 for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { 1055 unsigned Number = Cand.ActiveBlocks[i]; 1056 bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; 1057 bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; 1058 if (!RegIn && !RegOut) 1059 continue; 1060 if (RegIn && RegOut) { 1061 // We need double spill code if this block has interference. 1062 Cand.Intf.moveToBlock(Number); 1063 if (Cand.Intf.hasInterference()) { 1064 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1065 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1066 } 1067 continue; 1068 } 1069 // live-in / stack-out or stack-in live-out. 1070 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1071 } 1072 return GlobalCost; 1073 } 1074 1075 /// splitAroundRegion - Split the current live range around the regions 1076 /// determined by BundleCand and GlobalCand. 1077 /// 1078 /// Before calling this function, GlobalCand and BundleCand must be initialized 1079 /// so each bundle is assigned to a valid candidate, or NoCand for the 1080 /// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor 1081 /// objects must be initialized for the current live range, and intervals 1082 /// created for the used candidates. 1083 /// 1084 /// @param LREdit The LiveRangeEdit object handling the current split. 1085 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value 1086 /// must appear in this list. 1087 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, 1088 ArrayRef<unsigned> UsedCands) { 1089 // These are the intervals created for new global ranges. We may create more 1090 // intervals for local ranges. 1091 const unsigned NumGlobalIntvs = LREdit.size(); 1092 DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); 1093 assert(NumGlobalIntvs && "No global intervals configured"); 1094 1095 // Isolate even single instructions when dealing with a proper sub-class. 1096 // That guarantees register class inflation for the stack interval because it 1097 // is all copies. 1098 unsigned Reg = SA->getParent().reg; 1099 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1100 1101 // First handle all the blocks with uses. 1102 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1103 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1104 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1105 unsigned Number = BI.MBB->getNumber(); 1106 unsigned IntvIn = 0, IntvOut = 0; 1107 SlotIndex IntfIn, IntfOut; 1108 if (BI.LiveIn) { 1109 unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; 1110 if (CandIn != NoCand) { 1111 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1112 IntvIn = Cand.IntvIdx; 1113 Cand.Intf.moveToBlock(Number); 1114 IntfIn = Cand.Intf.first(); 1115 } 1116 } 1117 if (BI.LiveOut) { 1118 unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; 1119 if (CandOut != NoCand) { 1120 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1121 IntvOut = Cand.IntvIdx; 1122 Cand.Intf.moveToBlock(Number); 1123 IntfOut = Cand.Intf.last(); 1124 } 1125 } 1126 1127 // Create separate intervals for isolated blocks with multiple uses. 1128 if (!IntvIn && !IntvOut) { 1129 DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); 1130 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1131 SE->splitSingleBlock(BI); 1132 continue; 1133 } 1134 1135 if (IntvIn && IntvOut) 1136 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1137 else if (IntvIn) 1138 SE->splitRegInBlock(BI, IntvIn, IntfIn); 1139 else 1140 SE->splitRegOutBlock(BI, IntvOut, IntfOut); 1141 } 1142 1143 // Handle live-through blocks. The relevant live-through blocks are stored in 1144 // the ActiveBlocks list with each candidate. We need to filter out 1145 // duplicates. 1146 BitVector Todo = SA->getThroughBlocks(); 1147 for (unsigned c = 0; c != UsedCands.size(); ++c) { 1148 ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; 1149 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 1150 unsigned Number = Blocks[i]; 1151 if (!Todo.test(Number)) 1152 continue; 1153 Todo.reset(Number); 1154 1155 unsigned IntvIn = 0, IntvOut = 0; 1156 SlotIndex IntfIn, IntfOut; 1157 1158 unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; 1159 if (CandIn != NoCand) { 1160 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1161 IntvIn = Cand.IntvIdx; 1162 Cand.Intf.moveToBlock(Number); 1163 IntfIn = Cand.Intf.first(); 1164 } 1165 1166 unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; 1167 if (CandOut != NoCand) { 1168 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1169 IntvOut = Cand.IntvIdx; 1170 Cand.Intf.moveToBlock(Number); 1171 IntfOut = Cand.Intf.last(); 1172 } 1173 if (!IntvIn && !IntvOut) 1174 continue; 1175 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1176 } 1177 } 1178 1179 ++NumGlobalSplits; 1180 1181 SmallVector<unsigned, 8> IntvMap; 1182 SE->finish(&IntvMap); 1183 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1184 1185 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1186 unsigned OrigBlocks = SA->getNumLiveBlocks(); 1187 1188 // Sort out the new intervals created by splitting. We get four kinds: 1189 // - Remainder intervals should not be split again. 1190 // - Candidate intervals can be assigned to Cand.PhysReg. 1191 // - Block-local splits are candidates for local splitting. 1192 // - DCE leftovers should go back on the queue. 1193 for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { 1194 LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); 1195 1196 // Ignore old intervals from DCE. 1197 if (getStage(Reg) != RS_New) 1198 continue; 1199 1200 // Remainder interval. Don't try splitting again, spill if it doesn't 1201 // allocate. 1202 if (IntvMap[i] == 0) { 1203 setStage(Reg, RS_Spill); 1204 continue; 1205 } 1206 1207 // Global intervals. Allow repeated splitting as long as the number of live 1208 // blocks is strictly decreasing. 1209 if (IntvMap[i] < NumGlobalIntvs) { 1210 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { 1211 DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks 1212 << " blocks as original.\n"); 1213 // Don't allow repeated splitting as a safe guard against looping. 1214 setStage(Reg, RS_Split2); 1215 } 1216 continue; 1217 } 1218 1219 // Other intervals are treated as new. This includes local intervals created 1220 // for blocks with multiple uses, and anything created by DCE. 1221 } 1222 1223 if (VerifyEnabled) 1224 MF->verify(this, "After splitting live range around region"); 1225 } 1226 1227 unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1228 SmallVectorImpl<unsigned> &NewVRegs) { 1229 unsigned NumCands = 0; 1230 unsigned BestCand = NoCand; 1231 BlockFrequency BestCost; 1232 SmallVector<unsigned, 8> UsedCands; 1233 1234 // Check if we can split this live range around a compact region. 1235 bool HasCompact = calcCompactRegion(GlobalCand.front()); 1236 if (HasCompact) { 1237 // Yes, keep GlobalCand[0] as the compact region candidate. 1238 NumCands = 1; 1239 BestCost = BlockFrequency::getMaxFrequency(); 1240 } else { 1241 // No benefit from the compact region, our fallback will be per-block 1242 // splitting. Make sure we find a solution that is cheaper than spilling. 1243 BestCost = calcSpillCost(); 1244 DEBUG(dbgs() << "Cost of isolating all blocks = "; 1245 MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); 1246 } 1247 1248 Order.rewind(); 1249 while (unsigned PhysReg = Order.next()) { 1250 // Discard bad candidates before we run out of interference cache cursors. 1251 // This will only affect register classes with a lot of registers (>32). 1252 if (NumCands == IntfCache.getMaxCursors()) { 1253 unsigned WorstCount = ~0u; 1254 unsigned Worst = 0; 1255 for (unsigned i = 0; i != NumCands; ++i) { 1256 if (i == BestCand || !GlobalCand[i].PhysReg) 1257 continue; 1258 unsigned Count = GlobalCand[i].LiveBundles.count(); 1259 if (Count < WorstCount) 1260 Worst = i, WorstCount = Count; 1261 } 1262 --NumCands; 1263 GlobalCand[Worst] = GlobalCand[NumCands]; 1264 if (BestCand == NumCands) 1265 BestCand = Worst; 1266 } 1267 1268 if (GlobalCand.size() <= NumCands) 1269 GlobalCand.resize(NumCands+1); 1270 GlobalSplitCandidate &Cand = GlobalCand[NumCands]; 1271 Cand.reset(IntfCache, PhysReg); 1272 1273 SpillPlacer->prepare(Cand.LiveBundles); 1274 BlockFrequency Cost; 1275 if (!addSplitConstraints(Cand.Intf, Cost)) { 1276 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); 1277 continue; 1278 } 1279 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = "; 1280 MBFI->printBlockFreq(dbgs(), Cost)); 1281 if (Cost >= BestCost) { 1282 DEBUG({ 1283 if (BestCand == NoCand) 1284 dbgs() << " worse than no bundles\n"; 1285 else 1286 dbgs() << " worse than " 1287 << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; 1288 }); 1289 continue; 1290 } 1291 growRegion(Cand); 1292 1293 SpillPlacer->finish(); 1294 1295 // No live bundles, defer to splitSingleBlocks(). 1296 if (!Cand.LiveBundles.any()) { 1297 DEBUG(dbgs() << " no bundles.\n"); 1298 continue; 1299 } 1300 1301 Cost += calcGlobalSplitCost(Cand); 1302 DEBUG({ 1303 dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) 1304 << " with bundles"; 1305 for (int i = Cand.LiveBundles.find_first(); i>=0; 1306 i = Cand.LiveBundles.find_next(i)) 1307 dbgs() << " EB#" << i; 1308 dbgs() << ".\n"; 1309 }); 1310 if (Cost < BestCost) { 1311 BestCand = NumCands; 1312 BestCost = Cost; 1313 } 1314 ++NumCands; 1315 } 1316 1317 // No solutions found, fall back to single block splitting. 1318 if (!HasCompact && BestCand == NoCand) 1319 return 0; 1320 1321 // Prepare split editor. 1322 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 1323 SE->reset(LREdit, SplitSpillMode); 1324 1325 // Assign all edge bundles to the preferred candidate, or NoCand. 1326 BundleCand.assign(Bundles->getNumBundles(), NoCand); 1327 1328 // Assign bundles for the best candidate region. 1329 if (BestCand != NoCand) { 1330 GlobalSplitCandidate &Cand = GlobalCand[BestCand]; 1331 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { 1332 UsedCands.push_back(BestCand); 1333 Cand.IntvIdx = SE->openIntv(); 1334 DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " 1335 << B << " bundles, intv " << Cand.IntvIdx << ".\n"); 1336 (void)B; 1337 } 1338 } 1339 1340 // Assign bundles for the compact region. 1341 if (HasCompact) { 1342 GlobalSplitCandidate &Cand = GlobalCand.front(); 1343 assert(!Cand.PhysReg && "Compact region has no physreg"); 1344 if (unsigned B = Cand.getBundles(BundleCand, 0)) { 1345 UsedCands.push_back(0); 1346 Cand.IntvIdx = SE->openIntv(); 1347 DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " 1348 << Cand.IntvIdx << ".\n"); 1349 (void)B; 1350 } 1351 } 1352 1353 splitAroundRegion(LREdit, UsedCands); 1354 return 0; 1355 } 1356 1357 1358 //===----------------------------------------------------------------------===// 1359 // Per-Block Splitting 1360 //===----------------------------------------------------------------------===// 1361 1362 /// tryBlockSplit - Split a global live range around every block with uses. This 1363 /// creates a lot of local live ranges, that will be split by tryLocalSplit if 1364 /// they don't allocate. 1365 unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1366 SmallVectorImpl<unsigned> &NewVRegs) { 1367 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); 1368 unsigned Reg = VirtReg.reg; 1369 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1370 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 1371 SE->reset(LREdit, SplitSpillMode); 1372 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1373 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1374 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1375 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1376 SE->splitSingleBlock(BI); 1377 } 1378 // No blocks were split. 1379 if (LREdit.empty()) 1380 return 0; 1381 1382 // We did split for some blocks. 1383 SmallVector<unsigned, 8> IntvMap; 1384 SE->finish(&IntvMap); 1385 1386 // Tell LiveDebugVariables about the new ranges. 1387 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1388 1389 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1390 1391 // Sort out the new intervals created by splitting. The remainder interval 1392 // goes straight to spilling, the new local ranges get to stay RS_New. 1393 for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { 1394 LiveInterval &LI = LIS->getInterval(LREdit.get(i)); 1395 if (getStage(LI) == RS_New && IntvMap[i] == 0) 1396 setStage(LI, RS_Spill); 1397 } 1398 1399 if (VerifyEnabled) 1400 MF->verify(this, "After splitting live range around basic blocks"); 1401 return 0; 1402 } 1403 1404 1405 //===----------------------------------------------------------------------===// 1406 // Per-Instruction Splitting 1407 //===----------------------------------------------------------------------===// 1408 1409 /// Get the number of allocatable registers that match the constraints of \p Reg 1410 /// on \p MI and that are also in \p SuperRC. 1411 static unsigned getNumAllocatableRegsForConstraints( 1412 const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, 1413 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, 1414 const RegisterClassInfo &RCI) { 1415 assert(SuperRC && "Invalid register class"); 1416 1417 const TargetRegisterClass *ConstrainedRC = 1418 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, 1419 /* ExploreBundle */ true); 1420 if (!ConstrainedRC) 1421 return 0; 1422 return RCI.getNumAllocatableRegs(ConstrainedRC); 1423 } 1424 1425 /// tryInstructionSplit - Split a live range around individual instructions. 1426 /// This is normally not worthwhile since the spiller is doing essentially the 1427 /// same thing. However, when the live range is in a constrained register 1428 /// class, it may help to insert copies such that parts of the live range can 1429 /// be moved to a larger register class. 1430 /// 1431 /// This is similar to spilling to a larger register class. 1432 unsigned 1433 RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1434 SmallVectorImpl<unsigned> &NewVRegs) { 1435 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); 1436 // There is no point to this if there are no larger sub-classes. 1437 if (!RegClassInfo.isProperSubClass(CurRC)) 1438 return 0; 1439 1440 // Always enable split spill mode, since we're effectively spilling to a 1441 // register. 1442 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 1443 SE->reset(LREdit, SplitEditor::SM_Size); 1444 1445 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1446 if (Uses.size() <= 1) 1447 return 0; 1448 1449 DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); 1450 1451 const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC); 1452 unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); 1453 // Split around every non-copy instruction if this split will relax 1454 // the constraints on the virtual register. 1455 // Otherwise, splitting just inserts uncoalescable copies that do not help 1456 // the allocation. 1457 for (unsigned i = 0; i != Uses.size(); ++i) { 1458 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) 1459 if (MI->isFullCopy() || 1460 SuperRCNumAllocatableRegs == 1461 getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, 1462 TRI, RCI)) { 1463 DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); 1464 continue; 1465 } 1466 SE->openIntv(); 1467 SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); 1468 SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); 1469 SE->useIntv(SegStart, SegStop); 1470 } 1471 1472 if (LREdit.empty()) { 1473 DEBUG(dbgs() << "All uses were copies.\n"); 1474 return 0; 1475 } 1476 1477 SmallVector<unsigned, 8> IntvMap; 1478 SE->finish(&IntvMap); 1479 DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); 1480 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1481 1482 // Assign all new registers to RS_Spill. This was the last chance. 1483 setStage(LREdit.begin(), LREdit.end(), RS_Spill); 1484 return 0; 1485 } 1486 1487 1488 //===----------------------------------------------------------------------===// 1489 // Local Splitting 1490 //===----------------------------------------------------------------------===// 1491 1492 1493 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted 1494 /// in order to use PhysReg between two entries in SA->UseSlots. 1495 /// 1496 /// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1]. 1497 /// 1498 void RAGreedy::calcGapWeights(unsigned PhysReg, 1499 SmallVectorImpl<float> &GapWeight) { 1500 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1501 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1502 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1503 const unsigned NumGaps = Uses.size()-1; 1504 1505 // Start and end points for the interference check. 1506 SlotIndex StartIdx = 1507 BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; 1508 SlotIndex StopIdx = 1509 BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; 1510 1511 GapWeight.assign(NumGaps, 0.0f); 1512 1513 // Add interference from each overlapping register. 1514 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1515 if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units) 1516 .checkInterference()) 1517 continue; 1518 1519 // We know that VirtReg is a continuous interval from FirstInstr to 1520 // LastInstr, so we don't need InterferenceQuery. 1521 // 1522 // Interference that overlaps an instruction is counted in both gaps 1523 // surrounding the instruction. The exception is interference before 1524 // StartIdx and after StopIdx. 1525 // 1526 LiveIntervalUnion::SegmentIter IntI = 1527 Matrix->getLiveUnions()[*Units] .find(StartIdx); 1528 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { 1529 // Skip the gaps before IntI. 1530 while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) 1531 if (++Gap == NumGaps) 1532 break; 1533 if (Gap == NumGaps) 1534 break; 1535 1536 // Update the gaps covered by IntI. 1537 const float weight = IntI.value()->weight; 1538 for (; Gap != NumGaps; ++Gap) { 1539 GapWeight[Gap] = std::max(GapWeight[Gap], weight); 1540 if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) 1541 break; 1542 } 1543 if (Gap == NumGaps) 1544 break; 1545 } 1546 } 1547 1548 // Add fixed interference. 1549 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1550 const LiveRange &LR = LIS->getRegUnit(*Units); 1551 LiveRange::const_iterator I = LR.find(StartIdx); 1552 LiveRange::const_iterator E = LR.end(); 1553 1554 // Same loop as above. Mark any overlapped gaps as HUGE_VALF. 1555 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { 1556 while (Uses[Gap+1].getBoundaryIndex() < I->start) 1557 if (++Gap == NumGaps) 1558 break; 1559 if (Gap == NumGaps) 1560 break; 1561 1562 for (; Gap != NumGaps; ++Gap) { 1563 GapWeight[Gap] = llvm::huge_valf; 1564 if (Uses[Gap+1].getBaseIndex() >= I->end) 1565 break; 1566 } 1567 if (Gap == NumGaps) 1568 break; 1569 } 1570 } 1571 } 1572 1573 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only 1574 /// basic block. 1575 /// 1576 unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1577 SmallVectorImpl<unsigned> &NewVRegs) { 1578 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1579 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1580 1581 // Note that it is possible to have an interval that is live-in or live-out 1582 // while only covering a single block - A phi-def can use undef values from 1583 // predecessors, and the block could be a single-block loop. 1584 // We don't bother doing anything clever about such a case, we simply assume 1585 // that the interval is continuous from FirstInstr to LastInstr. We should 1586 // make sure that we don't do anything illegal to such an interval, though. 1587 1588 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1589 if (Uses.size() <= 2) 1590 return 0; 1591 const unsigned NumGaps = Uses.size()-1; 1592 1593 DEBUG({ 1594 dbgs() << "tryLocalSplit: "; 1595 for (unsigned i = 0, e = Uses.size(); i != e; ++i) 1596 dbgs() << ' ' << Uses[i]; 1597 dbgs() << '\n'; 1598 }); 1599 1600 // If VirtReg is live across any register mask operands, compute a list of 1601 // gaps with register masks. 1602 SmallVector<unsigned, 8> RegMaskGaps; 1603 if (Matrix->checkRegMaskInterference(VirtReg)) { 1604 // Get regmask slots for the whole block. 1605 ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); 1606 DEBUG(dbgs() << RMS.size() << " regmasks in block:"); 1607 // Constrain to VirtReg's live range. 1608 unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), 1609 Uses.front().getRegSlot()) - RMS.begin(); 1610 unsigned re = RMS.size(); 1611 for (unsigned i = 0; i != NumGaps && ri != re; ++i) { 1612 // Look for Uses[i] <= RMS <= Uses[i+1]. 1613 assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); 1614 if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) 1615 continue; 1616 // Skip a regmask on the same instruction as the last use. It doesn't 1617 // overlap the live range. 1618 if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) 1619 break; 1620 DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); 1621 RegMaskGaps.push_back(i); 1622 // Advance ri to the next gap. A regmask on one of the uses counts in 1623 // both gaps. 1624 while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) 1625 ++ri; 1626 } 1627 DEBUG(dbgs() << '\n'); 1628 } 1629 1630 // Since we allow local split results to be split again, there is a risk of 1631 // creating infinite loops. It is tempting to require that the new live 1632 // ranges have less instructions than the original. That would guarantee 1633 // convergence, but it is too strict. A live range with 3 instructions can be 1634 // split 2+3 (including the COPY), and we want to allow that. 1635 // 1636 // Instead we use these rules: 1637 // 1638 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the 1639 // noop split, of course). 1640 // 2. Require progress be made for ranges with getStage() == RS_Split2. All 1641 // the new ranges must have fewer instructions than before the split. 1642 // 3. New ranges with the same number of instructions are marked RS_Split2, 1643 // smaller ranges are marked RS_New. 1644 // 1645 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent 1646 // excessive splitting and infinite loops. 1647 // 1648 bool ProgressRequired = getStage(VirtReg) >= RS_Split2; 1649 1650 // Best split candidate. 1651 unsigned BestBefore = NumGaps; 1652 unsigned BestAfter = 0; 1653 float BestDiff = 0; 1654 1655 const float blockFreq = 1656 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * 1657 (1.0f / MBFI->getEntryFreq()); 1658 SmallVector<float, 8> GapWeight; 1659 1660 Order.rewind(); 1661 while (unsigned PhysReg = Order.next()) { 1662 // Keep track of the largest spill weight that would need to be evicted in 1663 // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. 1664 calcGapWeights(PhysReg, GapWeight); 1665 1666 // Remove any gaps with regmask clobbers. 1667 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) 1668 for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) 1669 GapWeight[RegMaskGaps[i]] = llvm::huge_valf; 1670 1671 // Try to find the best sequence of gaps to close. 1672 // The new spill weight must be larger than any gap interference. 1673 1674 // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. 1675 unsigned SplitBefore = 0, SplitAfter = 1; 1676 1677 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). 1678 // It is the spill weight that needs to be evicted. 1679 float MaxGap = GapWeight[0]; 1680 1681 for (;;) { 1682 // Live before/after split? 1683 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; 1684 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; 1685 1686 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' ' 1687 << Uses[SplitBefore] << '-' << Uses[SplitAfter] 1688 << " i=" << MaxGap); 1689 1690 // Stop before the interval gets so big we wouldn't be making progress. 1691 if (!LiveBefore && !LiveAfter) { 1692 DEBUG(dbgs() << " all\n"); 1693 break; 1694 } 1695 // Should the interval be extended or shrunk? 1696 bool Shrink = true; 1697 1698 // How many gaps would the new range have? 1699 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; 1700 1701 // Legally, without causing looping? 1702 bool Legal = !ProgressRequired || NewGaps < NumGaps; 1703 1704 if (Legal && MaxGap < llvm::huge_valf) { 1705 // Estimate the new spill weight. Each instruction reads or writes the 1706 // register. Conservatively assume there are no read-modify-write 1707 // instructions. 1708 // 1709 // Try to guess the size of the new interval. 1710 const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1), 1711 Uses[SplitBefore].distance(Uses[SplitAfter]) + 1712 (LiveBefore + LiveAfter)*SlotIndex::InstrDist); 1713 // Would this split be possible to allocate? 1714 // Never allocate all gaps, we wouldn't be making progress. 1715 DEBUG(dbgs() << " w=" << EstWeight); 1716 if (EstWeight * Hysteresis >= MaxGap) { 1717 Shrink = false; 1718 float Diff = EstWeight - MaxGap; 1719 if (Diff > BestDiff) { 1720 DEBUG(dbgs() << " (best)"); 1721 BestDiff = Hysteresis * Diff; 1722 BestBefore = SplitBefore; 1723 BestAfter = SplitAfter; 1724 } 1725 } 1726 } 1727 1728 // Try to shrink. 1729 if (Shrink) { 1730 if (++SplitBefore < SplitAfter) { 1731 DEBUG(dbgs() << " shrink\n"); 1732 // Recompute the max when necessary. 1733 if (GapWeight[SplitBefore - 1] >= MaxGap) { 1734 MaxGap = GapWeight[SplitBefore]; 1735 for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i) 1736 MaxGap = std::max(MaxGap, GapWeight[i]); 1737 } 1738 continue; 1739 } 1740 MaxGap = 0; 1741 } 1742 1743 // Try to extend the interval. 1744 if (SplitAfter >= NumGaps) { 1745 DEBUG(dbgs() << " end\n"); 1746 break; 1747 } 1748 1749 DEBUG(dbgs() << " extend\n"); 1750 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); 1751 } 1752 } 1753 1754 // Didn't find any candidates? 1755 if (BestBefore == NumGaps) 1756 return 0; 1757 1758 DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] 1759 << '-' << Uses[BestAfter] << ", " << BestDiff 1760 << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); 1761 1762 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 1763 SE->reset(LREdit); 1764 1765 SE->openIntv(); 1766 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); 1767 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); 1768 SE->useIntv(SegStart, SegStop); 1769 SmallVector<unsigned, 8> IntvMap; 1770 SE->finish(&IntvMap); 1771 DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); 1772 1773 // If the new range has the same number of instructions as before, mark it as 1774 // RS_Split2 so the next split will be forced to make progress. Otherwise, 1775 // leave the new intervals as RS_New so they can compete. 1776 bool LiveBefore = BestBefore != 0 || BI.LiveIn; 1777 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; 1778 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; 1779 if (NewGaps >= NumGaps) { 1780 DEBUG(dbgs() << "Tagging non-progress ranges: "); 1781 assert(!ProgressRequired && "Didn't make progress when it was required."); 1782 for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) 1783 if (IntvMap[i] == 1) { 1784 setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); 1785 DEBUG(dbgs() << PrintReg(LREdit.get(i))); 1786 } 1787 DEBUG(dbgs() << '\n'); 1788 } 1789 ++NumLocalSplits; 1790 1791 return 0; 1792 } 1793 1794 //===----------------------------------------------------------------------===// 1795 // Live Range Splitting 1796 //===----------------------------------------------------------------------===// 1797 1798 /// trySplit - Try to split VirtReg or one of its interferences, making it 1799 /// assignable. 1800 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. 1801 unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, 1802 SmallVectorImpl<unsigned>&NewVRegs) { 1803 // Ranges must be Split2 or less. 1804 if (getStage(VirtReg) >= RS_Spill) 1805 return 0; 1806 1807 // Local intervals are handled separately. 1808 if (LIS->intervalIsInOneMBB(VirtReg)) { 1809 NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); 1810 SA->analyze(&VirtReg); 1811 unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); 1812 if (PhysReg || !NewVRegs.empty()) 1813 return PhysReg; 1814 return tryInstructionSplit(VirtReg, Order, NewVRegs); 1815 } 1816 1817 NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); 1818 1819 SA->analyze(&VirtReg); 1820 1821 // FIXME: SplitAnalysis may repair broken live ranges coming from the 1822 // coalescer. That may cause the range to become allocatable which means that 1823 // tryRegionSplit won't be making progress. This check should be replaced with 1824 // an assertion when the coalescer is fixed. 1825 if (SA->didRepairRange()) { 1826 // VirtReg has changed, so all cached queries are invalid. 1827 Matrix->invalidateVirtRegs(); 1828 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) 1829 return PhysReg; 1830 } 1831 1832 // First try to split around a region spanning multiple blocks. RS_Split2 1833 // ranges already made dubious progress with region splitting, so they go 1834 // straight to single block splitting. 1835 if (getStage(VirtReg) < RS_Split2) { 1836 unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); 1837 if (PhysReg || !NewVRegs.empty()) 1838 return PhysReg; 1839 } 1840 1841 // Then isolate blocks. 1842 return tryBlockSplit(VirtReg, Order, NewVRegs); 1843 } 1844 1845 //===----------------------------------------------------------------------===// 1846 // Last Chance Recoloring 1847 //===----------------------------------------------------------------------===// 1848 1849 /// mayRecolorAllInterferences - Check if the virtual registers that 1850 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be 1851 /// recolored to free \p PhysReg. 1852 /// When true is returned, \p RecoloringCandidates has been augmented with all 1853 /// the live intervals that need to be recolored in order to free \p PhysReg 1854 /// for \p VirtReg. 1855 /// \p FixedRegisters contains all the virtual registers that cannot be 1856 /// recolored. 1857 bool 1858 RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, 1859 SmallLISet &RecoloringCandidates, 1860 const SmallVirtRegSet &FixedRegisters) { 1861 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); 1862 1863 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1864 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 1865 // If there is LastChanceRecoloringMaxInterference or more interferences, 1866 // chances are one would not be recolorable. 1867 if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= 1868 LastChanceRecoloringMaxInterference) { 1869 DEBUG(dbgs() << "Early abort: too many interferences.\n"); 1870 return false; 1871 } 1872 for (unsigned i = Q.interferingVRegs().size(); i; --i) { 1873 LiveInterval *Intf = Q.interferingVRegs()[i - 1]; 1874 // If Intf is done and sit on the same register class as VirtReg, 1875 // it would not be recolorable as it is in the same state as VirtReg. 1876 if ((getStage(*Intf) == RS_Done && 1877 MRI->getRegClass(Intf->reg) == CurRC) || 1878 FixedRegisters.count(Intf->reg)) { 1879 DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n"); 1880 return false; 1881 } 1882 RecoloringCandidates.insert(Intf); 1883 } 1884 } 1885 return true; 1886 } 1887 1888 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring 1889 /// its interferences. 1890 /// Last chance recoloring chooses a color for \p VirtReg and recolors every 1891 /// virtual register that was using it. The recoloring process may recursively 1892 /// use the last chance recoloring. Therefore, when a virtual register has been 1893 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot 1894 /// be last-chance-recolored again during this recoloring "session". 1895 /// E.g., 1896 /// Let 1897 /// vA can use {R1, R2 } 1898 /// vB can use { R2, R3} 1899 /// vC can use {R1 } 1900 /// Where vA, vB, and vC cannot be split anymore (they are reloads for 1901 /// instance) and they all interfere. 1902 /// 1903 /// vA is assigned R1 1904 /// vB is assigned R2 1905 /// vC tries to evict vA but vA is already done. 1906 /// Regular register allocation fails. 1907 /// 1908 /// Last chance recoloring kicks in: 1909 /// vC does as if vA was evicted => vC uses R1. 1910 /// vC is marked as fixed. 1911 /// vA needs to find a color. 1912 /// None are available. 1913 /// vA cannot evict vC: vC is a fixed virtual register now. 1914 /// vA does as if vB was evicted => vA uses R2. 1915 /// vB needs to find a color. 1916 /// R3 is available. 1917 /// Recoloring => vC = R1, vA = R2, vB = R3 1918 /// 1919 /// \p Order defines the prefered allocation order for \p VirtReg. 1920 /// \p NewRegs will contain any new virtual register that have been created 1921 /// (split, spill) during the process and that must be assigned. 1922 /// \p FixedRegisters contains all the virtual registers that cannot be 1923 /// recolored. 1924 /// \p Depth gives the current depth of the last chance recoloring. 1925 /// \return a physical register that can be used for VirtReg or ~0u if none 1926 /// exists. 1927 unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, 1928 AllocationOrder &Order, 1929 SmallVectorImpl<unsigned> &NewVRegs, 1930 SmallVirtRegSet &FixedRegisters, 1931 unsigned Depth) { 1932 DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); 1933 // Ranges must be Done. 1934 assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && 1935 "Last chance recoloring should really be last chance"); 1936 // Set the max depth to LastChanceRecoloringMaxDepth. 1937 // We may want to reconsider that if we end up with a too large search space 1938 // for target with hundreds of registers. 1939 // Indeed, in that case we may want to cut the search space earlier. 1940 if (Depth >= LastChanceRecoloringMaxDepth) { 1941 DEBUG(dbgs() << "Abort because max depth has been reached.\n"); 1942 return ~0u; 1943 } 1944 1945 // Set of Live intervals that will need to be recolored. 1946 SmallLISet RecoloringCandidates; 1947 // Record the original mapping virtual register to physical register in case 1948 // the recoloring fails. 1949 DenseMap<unsigned, unsigned> VirtRegToPhysReg; 1950 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in 1951 // this recoloring "session". 1952 FixedRegisters.insert(VirtReg.reg); 1953 1954 Order.rewind(); 1955 while (unsigned PhysReg = Order.next()) { 1956 DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " 1957 << PrintReg(PhysReg, TRI) << '\n'); 1958 RecoloringCandidates.clear(); 1959 VirtRegToPhysReg.clear(); 1960 1961 // It is only possible to recolor virtual register interference. 1962 if (Matrix->checkInterference(VirtReg, PhysReg) > 1963 LiveRegMatrix::IK_VirtReg) { 1964 DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n"); 1965 1966 continue; 1967 } 1968 1969 // Early give up on this PhysReg if it is obvious we cannot recolor all 1970 // the interferences. 1971 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, 1972 FixedRegisters)) { 1973 DEBUG(dbgs() << "Some inteferences cannot be recolored.\n"); 1974 continue; 1975 } 1976 1977 // RecoloringCandidates contains all the virtual registers that interfer 1978 // with VirtReg on PhysReg (or one of its aliases). 1979 // Enqueue them for recoloring and perform the actual recoloring. 1980 PQueue RecoloringQueue; 1981 for (SmallLISet::iterator It = RecoloringCandidates.begin(), 1982 EndIt = RecoloringCandidates.end(); 1983 It != EndIt; ++It) { 1984 unsigned ItVirtReg = (*It)->reg; 1985 enqueue(RecoloringQueue, *It); 1986 assert(VRM->hasPhys(ItVirtReg) && 1987 "Interferences are supposed to be with allocated vairables"); 1988 1989 // Record the current allocation. 1990 VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); 1991 // unset the related struct. 1992 Matrix->unassign(**It); 1993 } 1994 1995 // Do as if VirtReg was assigned to PhysReg so that the underlying 1996 // recoloring has the right information about the interferes and 1997 // available colors. 1998 Matrix->assign(VirtReg, PhysReg); 1999 2000 // Save the current recoloring state. 2001 // If we cannot recolor all the interferences, we will have to start again 2002 // at this point for the next physical register. 2003 SmallVirtRegSet SaveFixedRegisters(FixedRegisters); 2004 if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters, 2005 Depth)) { 2006 // Do not mess up with the global assignment process. 2007 // I.e., VirtReg must be unassigned. 2008 Matrix->unassign(VirtReg); 2009 return PhysReg; 2010 } 2011 2012 DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " 2013 << PrintReg(PhysReg, TRI) << '\n'); 2014 2015 // The recoloring attempt failed, undo the changes. 2016 FixedRegisters = SaveFixedRegisters; 2017 Matrix->unassign(VirtReg); 2018 2019 for (SmallLISet::iterator It = RecoloringCandidates.begin(), 2020 EndIt = RecoloringCandidates.end(); 2021 It != EndIt; ++It) { 2022 unsigned ItVirtReg = (*It)->reg; 2023 if (VRM->hasPhys(ItVirtReg)) 2024 Matrix->unassign(**It); 2025 Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]); 2026 } 2027 } 2028 2029 // Last chance recoloring did not worked either, give up. 2030 return ~0u; 2031 } 2032 2033 /// tryRecoloringCandidates - Try to assign a new color to every register 2034 /// in \RecoloringQueue. 2035 /// \p NewRegs will contain any new virtual register created during the 2036 /// recoloring process. 2037 /// \p FixedRegisters[in/out] contains all the registers that have been 2038 /// recolored. 2039 /// \return true if all virtual registers in RecoloringQueue were successfully 2040 /// recolored, false otherwise. 2041 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, 2042 SmallVectorImpl<unsigned> &NewVRegs, 2043 SmallVirtRegSet &FixedRegisters, 2044 unsigned Depth) { 2045 while (!RecoloringQueue.empty()) { 2046 LiveInterval *LI = dequeue(RecoloringQueue); 2047 DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); 2048 unsigned PhysReg; 2049 PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); 2050 if (PhysReg == ~0u || !PhysReg) 2051 return false; 2052 DEBUG(dbgs() << "Recoloring of " << *LI 2053 << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); 2054 Matrix->assign(*LI, PhysReg); 2055 FixedRegisters.insert(LI->reg); 2056 } 2057 return true; 2058 } 2059 2060 //===----------------------------------------------------------------------===// 2061 // Main Entry Point 2062 //===----------------------------------------------------------------------===// 2063 2064 unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, 2065 SmallVectorImpl<unsigned> &NewVRegs) { 2066 SmallVirtRegSet FixedRegisters; 2067 return selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); 2068 } 2069 2070 unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, 2071 SmallVectorImpl<unsigned> &NewVRegs, 2072 SmallVirtRegSet &FixedRegisters, 2073 unsigned Depth) { 2074 // First try assigning a free register. 2075 AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); 2076 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) 2077 return PhysReg; 2078 2079 LiveRangeStage Stage = getStage(VirtReg); 2080 DEBUG(dbgs() << StageName[Stage] 2081 << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); 2082 2083 // Try to evict a less worthy live range, but only for ranges from the primary 2084 // queue. The RS_Split ranges already failed to do this, and they should not 2085 // get a second chance until they have been split. 2086 if (Stage != RS_Split) 2087 if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) 2088 return PhysReg; 2089 2090 assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); 2091 2092 // The first time we see a live range, don't try to split or spill. 2093 // Wait until the second time, when all smaller ranges have been allocated. 2094 // This gives a better picture of the interference to split around. 2095 if (Stage < RS_Split) { 2096 setStage(VirtReg, RS_Split); 2097 DEBUG(dbgs() << "wait for second round\n"); 2098 NewVRegs.push_back(VirtReg.reg); 2099 return 0; 2100 } 2101 2102 // If we couldn't allocate a register from spilling, there is probably some 2103 // invalid inline assembly. The base class wil report it. 2104 if (Stage >= RS_Done || !VirtReg.isSpillable()) 2105 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, 2106 Depth); 2107 2108 // Try splitting VirtReg or interferences. 2109 unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); 2110 if (PhysReg || !NewVRegs.empty()) 2111 return PhysReg; 2112 2113 // Finally spill VirtReg itself. 2114 NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); 2115 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 2116 spiller().spill(LRE); 2117 setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); 2118 2119 if (VerifyEnabled) 2120 MF->verify(this, "After spilling"); 2121 2122 // The live virtual register requesting allocation was spilled, so tell 2123 // the caller not to allocate anything during this round. 2124 return 0; 2125 } 2126 2127 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { 2128 DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" 2129 << "********** Function: " << mf.getName() << '\n'); 2130 2131 MF = &mf; 2132 TRI = MF->getTarget().getRegisterInfo(); 2133 TII = MF->getTarget().getInstrInfo(); 2134 RCI.runOnMachineFunction(mf); 2135 if (VerifyEnabled) 2136 MF->verify(this, "Before greedy register allocator"); 2137 2138 RegAllocBase::init(getAnalysis<VirtRegMap>(), 2139 getAnalysis<LiveIntervals>(), 2140 getAnalysis<LiveRegMatrix>()); 2141 Indexes = &getAnalysis<SlotIndexes>(); 2142 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 2143 DomTree = &getAnalysis<MachineDominatorTree>(); 2144 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); 2145 Loops = &getAnalysis<MachineLoopInfo>(); 2146 Bundles = &getAnalysis<EdgeBundles>(); 2147 SpillPlacer = &getAnalysis<SpillPlacement>(); 2148 DebugVars = &getAnalysis<LiveDebugVariables>(); 2149 2150 calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); 2151 2152 DEBUG(LIS->dump()); 2153 2154 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); 2155 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI)); 2156 ExtraRegInfo.clear(); 2157 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 2158 NextCascade = 1; 2159 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); 2160 GlobalCand.resize(32); // This will grow as needed. 2161 2162 allocatePhysRegs(); 2163 releaseMemory(); 2164 return true; 2165 } 2166