1 //===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the RAGreedy function pass for register allocation in 11 // optimized builds. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/Passes.h" 16 #include "AllocationOrder.h" 17 #include "InterferenceCache.h" 18 #include "LiveDebugVariables.h" 19 #include "RegAllocBase.h" 20 #include "SpillPlacement.h" 21 #include "Spiller.h" 22 #include "SplitKit.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/Analysis/AliasAnalysis.h" 25 #include "llvm/CodeGen/CalcSpillWeights.h" 26 #include "llvm/CodeGen/EdgeBundles.h" 27 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 28 #include "llvm/CodeGen/LiveRangeEdit.h" 29 #include "llvm/CodeGen/LiveRegMatrix.h" 30 #include "llvm/CodeGen/LiveStackAnalysis.h" 31 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 32 #include "llvm/CodeGen/MachineDominators.h" 33 #include "llvm/CodeGen/MachineFunctionPass.h" 34 #include "llvm/CodeGen/MachineLoopInfo.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/RegAllocRegistry.h" 37 #include "llvm/CodeGen/RegisterClassInfo.h" 38 #include "llvm/CodeGen/VirtRegMap.h" 39 #include "llvm/IR/LLVMContext.h" 40 #include "llvm/PassAnalysisSupport.h" 41 #include "llvm/Support/BranchProbability.h" 42 #include "llvm/Support/CommandLine.h" 43 #include "llvm/Support/Debug.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/Timer.h" 46 #include "llvm/Support/raw_ostream.h" 47 #include <queue> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "regalloc" 52 53 STATISTIC(NumGlobalSplits, "Number of split global live ranges"); 54 STATISTIC(NumLocalSplits, "Number of split local live ranges"); 55 STATISTIC(NumEvicted, "Number of interferences evicted"); 56 57 static cl::opt<SplitEditor::ComplementSpillMode> 58 SplitSpillMode("split-spill-mode", cl::Hidden, 59 cl::desc("Spill mode for splitting live ranges"), 60 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), 61 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), 62 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), 63 clEnumValEnd), 64 cl::init(SplitEditor::SM_Partition)); 65 66 static cl::opt<unsigned> 67 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, 68 cl::desc("Last chance recoloring max depth"), 69 cl::init(5)); 70 71 static cl::opt<unsigned> LastChanceRecoloringMaxInterference( 72 "lcr-max-interf", cl::Hidden, 73 cl::desc("Last chance recoloring maximum number of considered" 74 " interference at a time"), 75 cl::init(8)); 76 77 static cl::opt<bool> 78 ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, 79 cl::desc("Exhaustive Search for registers bypassing the depth " 80 "and interference cutoffs of last chance recoloring")); 81 82 static cl::opt<bool> EnableLocalReassignment( 83 "enable-local-reassign", cl::Hidden, 84 cl::desc("Local reassignment can yield better allocation decisions, but " 85 "may be compile time intensive"), 86 cl::init(true)); 87 88 // FIXME: Find a good default for this flag and remove the flag. 89 static cl::opt<unsigned> 90 CSRFirstTimeCost("regalloc-csr-first-time-cost", 91 cl::desc("Cost for first time use of callee-saved register."), 92 cl::init(0), cl::Hidden); 93 94 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", 95 createGreedyRegisterAllocator); 96 97 namespace { 98 class RAGreedy : public MachineFunctionPass, 99 public RegAllocBase, 100 private LiveRangeEdit::Delegate { 101 // Convenient shortcuts. 102 typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue; 103 typedef SmallPtrSet<LiveInterval *, 4> SmallLISet; 104 typedef SmallSet<unsigned, 16> SmallVirtRegSet; 105 106 // context 107 MachineFunction *MF; 108 109 // Shortcuts to some useful interface. 110 const TargetInstrInfo *TII; 111 const TargetRegisterInfo *TRI; 112 RegisterClassInfo RCI; 113 114 // analyses 115 SlotIndexes *Indexes; 116 MachineBlockFrequencyInfo *MBFI; 117 MachineDominatorTree *DomTree; 118 MachineLoopInfo *Loops; 119 EdgeBundles *Bundles; 120 SpillPlacement *SpillPlacer; 121 LiveDebugVariables *DebugVars; 122 123 // state 124 std::unique_ptr<Spiller> SpillerInstance; 125 PQueue Queue; 126 unsigned NextCascade; 127 128 // Live ranges pass through a number of stages as we try to allocate them. 129 // Some of the stages may also create new live ranges: 130 // 131 // - Region splitting. 132 // - Per-block splitting. 133 // - Local splitting. 134 // - Spilling. 135 // 136 // Ranges produced by one of the stages skip the previous stages when they are 137 // dequeued. This improves performance because we can skip interference checks 138 // that are unlikely to give any results. It also guarantees that the live 139 // range splitting algorithm terminates, something that is otherwise hard to 140 // ensure. 141 enum LiveRangeStage { 142 /// Newly created live range that has never been queued. 143 RS_New, 144 145 /// Only attempt assignment and eviction. Then requeue as RS_Split. 146 RS_Assign, 147 148 /// Attempt live range splitting if assignment is impossible. 149 RS_Split, 150 151 /// Attempt more aggressive live range splitting that is guaranteed to make 152 /// progress. This is used for split products that may not be making 153 /// progress. 154 RS_Split2, 155 156 /// Live range will be spilled. No more splitting will be attempted. 157 RS_Spill, 158 159 /// There is nothing more we can do to this live range. Abort compilation 160 /// if it can't be assigned. 161 RS_Done 162 }; 163 164 // Enum CutOffStage to keep a track whether the register allocation failed 165 // because of the cutoffs encountered in last chance recoloring. 166 // Note: This is used as bitmask. New value should be next power of 2. 167 enum CutOffStage { 168 // No cutoffs encountered 169 CO_None = 0, 170 171 // lcr-max-depth cutoff encountered 172 CO_Depth = 1, 173 174 // lcr-max-interf cutoff encountered 175 CO_Interf = 2 176 }; 177 178 uint8_t CutOffInfo; 179 180 #ifndef NDEBUG 181 static const char *const StageName[]; 182 #endif 183 184 // RegInfo - Keep additional information about each live range. 185 struct RegInfo { 186 LiveRangeStage Stage; 187 188 // Cascade - Eviction loop prevention. See canEvictInterference(). 189 unsigned Cascade; 190 191 RegInfo() : Stage(RS_New), Cascade(0) {} 192 }; 193 194 IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; 195 196 LiveRangeStage getStage(const LiveInterval &VirtReg) const { 197 return ExtraRegInfo[VirtReg.reg].Stage; 198 } 199 200 void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { 201 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 202 ExtraRegInfo[VirtReg.reg].Stage = Stage; 203 } 204 205 template<typename Iterator> 206 void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { 207 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 208 for (;Begin != End; ++Begin) { 209 unsigned Reg = *Begin; 210 if (ExtraRegInfo[Reg].Stage == RS_New) 211 ExtraRegInfo[Reg].Stage = NewStage; 212 } 213 } 214 215 /// Cost of evicting interference. 216 struct EvictionCost { 217 unsigned BrokenHints; ///< Total number of broken hints. 218 float MaxWeight; ///< Maximum spill weight evicted. 219 220 EvictionCost(): BrokenHints(0), MaxWeight(0) {} 221 222 bool isMax() const { return BrokenHints == ~0u; } 223 224 void setMax() { BrokenHints = ~0u; } 225 226 void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } 227 228 bool operator<(const EvictionCost &O) const { 229 return std::tie(BrokenHints, MaxWeight) < 230 std::tie(O.BrokenHints, O.MaxWeight); 231 } 232 }; 233 234 // splitting state. 235 std::unique_ptr<SplitAnalysis> SA; 236 std::unique_ptr<SplitEditor> SE; 237 238 /// Cached per-block interference maps 239 InterferenceCache IntfCache; 240 241 /// All basic blocks where the current register has uses. 242 SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; 243 244 /// Global live range splitting candidate info. 245 struct GlobalSplitCandidate { 246 // Register intended for assignment, or 0. 247 unsigned PhysReg; 248 249 // SplitKit interval index for this candidate. 250 unsigned IntvIdx; 251 252 // Interference for PhysReg. 253 InterferenceCache::Cursor Intf; 254 255 // Bundles where this candidate should be live. 256 BitVector LiveBundles; 257 SmallVector<unsigned, 8> ActiveBlocks; 258 259 void reset(InterferenceCache &Cache, unsigned Reg) { 260 PhysReg = Reg; 261 IntvIdx = 0; 262 Intf.setPhysReg(Cache, Reg); 263 LiveBundles.clear(); 264 ActiveBlocks.clear(); 265 } 266 267 // Set B[i] = C for every live bundle where B[i] was NoCand. 268 unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { 269 unsigned Count = 0; 270 for (int i = LiveBundles.find_first(); i >= 0; 271 i = LiveBundles.find_next(i)) 272 if (B[i] == NoCand) { 273 B[i] = C; 274 Count++; 275 } 276 return Count; 277 } 278 }; 279 280 /// Candidate info for each PhysReg in AllocationOrder. 281 /// This vector never shrinks, but grows to the size of the largest register 282 /// class. 283 SmallVector<GlobalSplitCandidate, 32> GlobalCand; 284 285 enum : unsigned { NoCand = ~0u }; 286 287 /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to 288 /// NoCand which indicates the stack interval. 289 SmallVector<unsigned, 32> BundleCand; 290 291 /// Callee-save register cost, calculated once per machine function. 292 BlockFrequency CSRCost; 293 294 public: 295 RAGreedy(); 296 297 /// Return the pass name. 298 const char* getPassName() const override { 299 return "Greedy Register Allocator"; 300 } 301 302 /// RAGreedy analysis usage. 303 void getAnalysisUsage(AnalysisUsage &AU) const override; 304 void releaseMemory() override; 305 Spiller &spiller() override { return *SpillerInstance; } 306 void enqueue(LiveInterval *LI) override; 307 LiveInterval *dequeue() override; 308 unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override; 309 310 /// Perform register allocation. 311 bool runOnMachineFunction(MachineFunction &mf) override; 312 313 static char ID; 314 315 private: 316 unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &, 317 SmallVirtRegSet &, unsigned = 0); 318 319 bool LRE_CanEraseVirtReg(unsigned) override; 320 void LRE_WillShrinkVirtReg(unsigned) override; 321 void LRE_DidCloneVirtReg(unsigned, unsigned) override; 322 void enqueue(PQueue &CurQueue, LiveInterval *LI); 323 LiveInterval *dequeue(PQueue &CurQueue); 324 325 BlockFrequency calcSpillCost(); 326 bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); 327 void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); 328 void growRegion(GlobalSplitCandidate &Cand); 329 BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); 330 bool calcCompactRegion(GlobalSplitCandidate&); 331 void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); 332 void calcGapWeights(unsigned, SmallVectorImpl<float>&); 333 unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); 334 bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); 335 bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); 336 void evictInterference(LiveInterval&, unsigned, 337 SmallVectorImpl<unsigned>&); 338 bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, 339 SmallLISet &RecoloringCandidates, 340 const SmallVirtRegSet &FixedRegisters); 341 342 unsigned tryAssign(LiveInterval&, AllocationOrder&, 343 SmallVectorImpl<unsigned>&); 344 unsigned tryEvict(LiveInterval&, AllocationOrder&, 345 SmallVectorImpl<unsigned>&, unsigned = ~0u); 346 unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, 347 SmallVectorImpl<unsigned>&); 348 /// Calculate cost of region splitting. 349 unsigned calculateRegionSplitCost(LiveInterval &VirtReg, 350 AllocationOrder &Order, 351 BlockFrequency &BestCost, 352 unsigned &NumCands, bool IgnoreCSR); 353 /// Perform region splitting. 354 unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, 355 bool HasCompact, 356 SmallVectorImpl<unsigned> &NewVRegs); 357 /// Check other options before using a callee-saved register for the first 358 /// time. 359 unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, 360 unsigned PhysReg, unsigned &CostPerUseLimit, 361 SmallVectorImpl<unsigned> &NewVRegs); 362 void initializeCSRCost(); 363 unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, 364 SmallVectorImpl<unsigned>&); 365 unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, 366 SmallVectorImpl<unsigned>&); 367 unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, 368 SmallVectorImpl<unsigned>&); 369 unsigned trySplit(LiveInterval&, AllocationOrder&, 370 SmallVectorImpl<unsigned>&); 371 unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, 372 SmallVectorImpl<unsigned> &, 373 SmallVirtRegSet &, unsigned); 374 bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, 375 SmallVirtRegSet &, unsigned); 376 }; 377 } // end anonymous namespace 378 379 char RAGreedy::ID = 0; 380 381 #ifndef NDEBUG 382 const char *const RAGreedy::StageName[] = { 383 "RS_New", 384 "RS_Assign", 385 "RS_Split", 386 "RS_Split2", 387 "RS_Spill", 388 "RS_Done" 389 }; 390 #endif 391 392 // Hysteresis to use when comparing floats. 393 // This helps stabilize decisions based on float comparisons. 394 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 395 396 397 FunctionPass* llvm::createGreedyRegisterAllocator() { 398 return new RAGreedy(); 399 } 400 401 RAGreedy::RAGreedy(): MachineFunctionPass(ID) { 402 initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); 403 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 404 initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); 405 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 406 initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); 407 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 408 initializeLiveStacksPass(*PassRegistry::getPassRegistry()); 409 initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); 410 initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); 411 initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); 412 initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); 413 initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); 414 initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); 415 } 416 417 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { 418 AU.setPreservesCFG(); 419 AU.addRequired<MachineBlockFrequencyInfo>(); 420 AU.addPreserved<MachineBlockFrequencyInfo>(); 421 AU.addRequired<AliasAnalysis>(); 422 AU.addPreserved<AliasAnalysis>(); 423 AU.addRequired<LiveIntervals>(); 424 AU.addPreserved<LiveIntervals>(); 425 AU.addRequired<SlotIndexes>(); 426 AU.addPreserved<SlotIndexes>(); 427 AU.addRequired<LiveDebugVariables>(); 428 AU.addPreserved<LiveDebugVariables>(); 429 AU.addRequired<LiveStacks>(); 430 AU.addPreserved<LiveStacks>(); 431 AU.addRequired<MachineDominatorTree>(); 432 AU.addPreserved<MachineDominatorTree>(); 433 AU.addRequired<MachineLoopInfo>(); 434 AU.addPreserved<MachineLoopInfo>(); 435 AU.addRequired<VirtRegMap>(); 436 AU.addPreserved<VirtRegMap>(); 437 AU.addRequired<LiveRegMatrix>(); 438 AU.addPreserved<LiveRegMatrix>(); 439 AU.addRequired<EdgeBundles>(); 440 AU.addRequired<SpillPlacement>(); 441 MachineFunctionPass::getAnalysisUsage(AU); 442 } 443 444 445 //===----------------------------------------------------------------------===// 446 // LiveRangeEdit delegate methods 447 //===----------------------------------------------------------------------===// 448 449 bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { 450 if (VRM->hasPhys(VirtReg)) { 451 Matrix->unassign(LIS->getInterval(VirtReg)); 452 return true; 453 } 454 // Unassigned virtreg is probably in the priority queue. 455 // RegAllocBase will erase it after dequeueing. 456 return false; 457 } 458 459 void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { 460 if (!VRM->hasPhys(VirtReg)) 461 return; 462 463 // Register is assigned, put it back on the queue for reassignment. 464 LiveInterval &LI = LIS->getInterval(VirtReg); 465 Matrix->unassign(LI); 466 enqueue(&LI); 467 } 468 469 void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { 470 // Cloning a register we haven't even heard about yet? Just ignore it. 471 if (!ExtraRegInfo.inBounds(Old)) 472 return; 473 474 // LRE may clone a virtual register because dead code elimination causes it to 475 // be split into connected components. The new components are much smaller 476 // than the original, so they should get a new chance at being assigned. 477 // same stage as the parent. 478 ExtraRegInfo[Old].Stage = RS_Assign; 479 ExtraRegInfo.grow(New); 480 ExtraRegInfo[New] = ExtraRegInfo[Old]; 481 } 482 483 void RAGreedy::releaseMemory() { 484 SpillerInstance.reset(nullptr); 485 ExtraRegInfo.clear(); 486 GlobalCand.clear(); 487 } 488 489 void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } 490 491 void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { 492 // Prioritize live ranges by size, assigning larger ranges first. 493 // The queue holds (size, reg) pairs. 494 const unsigned Size = LI->getSize(); 495 const unsigned Reg = LI->reg; 496 assert(TargetRegisterInfo::isVirtualRegister(Reg) && 497 "Can only enqueue virtual registers"); 498 unsigned Prio; 499 500 ExtraRegInfo.grow(Reg); 501 if (ExtraRegInfo[Reg].Stage == RS_New) 502 ExtraRegInfo[Reg].Stage = RS_Assign; 503 504 if (ExtraRegInfo[Reg].Stage == RS_Split) { 505 // Unsplit ranges that couldn't be allocated immediately are deferred until 506 // everything else has been allocated. 507 Prio = Size; 508 } else { 509 // Giant live ranges fall back to the global assignment heuristic, which 510 // prevents excessive spilling in pathological cases. 511 bool ReverseLocal = TRI->reverseLocalAssignment(); 512 bool ForceGlobal = !ReverseLocal && TRI->mayOverrideLocalAssignment() && 513 (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs()); 514 515 if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && 516 LIS->intervalIsInOneMBB(*LI)) { 517 // Allocate original local ranges in linear instruction order. Since they 518 // are singly defined, this produces optimal coloring in the absence of 519 // global interference and other constraints. 520 if (!ReverseLocal) 521 Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); 522 else { 523 // Allocating bottom up may allow many short LRGs to be assigned first 524 // to one of the cheap registers. This could be much faster for very 525 // large blocks on targets with many physical registers. 526 Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex()); 527 } 528 } 529 else { 530 // Allocate global and split ranges in long->short order. Long ranges that 531 // don't fit should be spilled (or split) ASAP so they don't create 532 // interference. Mark a bit to prioritize global above local ranges. 533 Prio = (1u << 29) + Size; 534 } 535 // Mark a higher bit to prioritize global and local above RS_Split. 536 Prio |= (1u << 31); 537 538 // Boost ranges that have a physical register hint. 539 if (VRM->hasKnownPreference(Reg)) 540 Prio |= (1u << 30); 541 } 542 // The virtual register number is a tie breaker for same-sized ranges. 543 // Give lower vreg numbers higher priority to assign them first. 544 CurQueue.push(std::make_pair(Prio, ~Reg)); 545 } 546 547 LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } 548 549 LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { 550 if (CurQueue.empty()) 551 return nullptr; 552 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); 553 CurQueue.pop(); 554 return LI; 555 } 556 557 558 //===----------------------------------------------------------------------===// 559 // Direct Assignment 560 //===----------------------------------------------------------------------===// 561 562 /// tryAssign - Try to assign VirtReg to an available register. 563 unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, 564 AllocationOrder &Order, 565 SmallVectorImpl<unsigned> &NewVRegs) { 566 Order.rewind(); 567 unsigned PhysReg; 568 while ((PhysReg = Order.next())) 569 if (!Matrix->checkInterference(VirtReg, PhysReg)) 570 break; 571 if (!PhysReg || Order.isHint()) 572 return PhysReg; 573 574 // PhysReg is available, but there may be a better choice. 575 576 // If we missed a simple hint, try to cheaply evict interference from the 577 // preferred register. 578 if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) 579 if (Order.isHint(Hint)) { 580 DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); 581 EvictionCost MaxCost; 582 MaxCost.setBrokenHints(1); 583 if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { 584 evictInterference(VirtReg, Hint, NewVRegs); 585 return Hint; 586 } 587 } 588 589 // Try to evict interference from a cheaper alternative. 590 unsigned Cost = TRI->getCostPerUse(PhysReg); 591 592 // Most registers have 0 additional cost. 593 if (!Cost) 594 return PhysReg; 595 596 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost 597 << '\n'); 598 unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); 599 return CheapReg ? CheapReg : PhysReg; 600 } 601 602 603 //===----------------------------------------------------------------------===// 604 // Interference eviction 605 //===----------------------------------------------------------------------===// 606 607 unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { 608 AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); 609 unsigned PhysReg; 610 while ((PhysReg = Order.next())) { 611 if (PhysReg == PrevReg) 612 continue; 613 614 MCRegUnitIterator Units(PhysReg, TRI); 615 for (; Units.isValid(); ++Units) { 616 // Instantiate a "subquery", not to be confused with the Queries array. 617 LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]); 618 if (subQ.checkInterference()) 619 break; 620 } 621 // If no units have interference, break out with the current PhysReg. 622 if (!Units.isValid()) 623 break; 624 } 625 if (PhysReg) 626 DEBUG(dbgs() << "can reassign: " << VirtReg << " from " 627 << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) 628 << '\n'); 629 return PhysReg; 630 } 631 632 /// shouldEvict - determine if A should evict the assigned live range B. The 633 /// eviction policy defined by this function together with the allocation order 634 /// defined by enqueue() decides which registers ultimately end up being split 635 /// and spilled. 636 /// 637 /// Cascade numbers are used to prevent infinite loops if this function is a 638 /// cyclic relation. 639 /// 640 /// @param A The live range to be assigned. 641 /// @param IsHint True when A is about to be assigned to its preferred 642 /// register. 643 /// @param B The live range to be evicted. 644 /// @param BreaksHint True when B is already assigned to its preferred register. 645 bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, 646 LiveInterval &B, bool BreaksHint) { 647 bool CanSplit = getStage(B) < RS_Spill; 648 649 // Be fairly aggressive about following hints as long as the evictee can be 650 // split. 651 if (CanSplit && IsHint && !BreaksHint) 652 return true; 653 654 if (A.weight > B.weight) { 655 DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); 656 return true; 657 } 658 return false; 659 } 660 661 /// canEvictInterference - Return true if all interferences between VirtReg and 662 /// PhysReg can be evicted. 663 /// 664 /// @param VirtReg Live range that is about to be assigned. 665 /// @param PhysReg Desired register for assignment. 666 /// @param IsHint True when PhysReg is VirtReg's preferred register. 667 /// @param MaxCost Only look for cheaper candidates and update with new cost 668 /// when returning true. 669 /// @returns True when interference can be evicted cheaper than MaxCost. 670 bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, 671 bool IsHint, EvictionCost &MaxCost) { 672 // It is only possible to evict virtual register interference. 673 if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) 674 return false; 675 676 bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); 677 678 // Find VirtReg's cascade number. This will be unassigned if VirtReg was never 679 // involved in an eviction before. If a cascade number was assigned, deny 680 // evicting anything with the same or a newer cascade number. This prevents 681 // infinite eviction loops. 682 // 683 // This works out so a register without a cascade number is allowed to evict 684 // anything, and it can be evicted by anything. 685 unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; 686 if (!Cascade) 687 Cascade = NextCascade; 688 689 EvictionCost Cost; 690 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 691 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 692 // If there is 10 or more interferences, chances are one is heavier. 693 if (Q.collectInterferingVRegs(10) >= 10) 694 return false; 695 696 // Check if any interfering live range is heavier than MaxWeight. 697 for (unsigned i = Q.interferingVRegs().size(); i; --i) { 698 LiveInterval *Intf = Q.interferingVRegs()[i - 1]; 699 assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && 700 "Only expecting virtual register interference from query"); 701 // Never evict spill products. They cannot split or spill. 702 if (getStage(*Intf) == RS_Done) 703 return false; 704 // Once a live range becomes small enough, it is urgent that we find a 705 // register for it. This is indicated by an infinite spill weight. These 706 // urgent live ranges get to evict almost anything. 707 // 708 // Also allow urgent evictions of unspillable ranges from a strictly 709 // larger allocation order. 710 bool Urgent = !VirtReg.isSpillable() && 711 (Intf->isSpillable() || 712 RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < 713 RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); 714 // Only evict older cascades or live ranges without a cascade. 715 unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; 716 if (Cascade <= IntfCascade) { 717 if (!Urgent) 718 return false; 719 // We permit breaking cascades for urgent evictions. It should be the 720 // last resort, though, so make it really expensive. 721 Cost.BrokenHints += 10; 722 } 723 // Would this break a satisfied hint? 724 bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); 725 // Update eviction cost. 726 Cost.BrokenHints += BreaksHint; 727 Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); 728 // Abort if this would be too expensive. 729 if (!(Cost < MaxCost)) 730 return false; 731 if (Urgent) 732 continue; 733 // Apply the eviction policy for non-urgent evictions. 734 if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) 735 return false; 736 // If !MaxCost.isMax(), then we're just looking for a cheap register. 737 // Evicting another local live range in this case could lead to suboptimal 738 // coloring. 739 if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && 740 (!EnableLocalReassignment || !canReassign(*Intf, PhysReg))) { 741 return false; 742 } 743 } 744 } 745 MaxCost = Cost; 746 return true; 747 } 748 749 /// evictInterference - Evict any interferring registers that prevent VirtReg 750 /// from being assigned to Physreg. This assumes that canEvictInterference 751 /// returned true. 752 void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, 753 SmallVectorImpl<unsigned> &NewVRegs) { 754 // Make sure that VirtReg has a cascade number, and assign that cascade 755 // number to every evicted register. These live ranges than then only be 756 // evicted by a newer cascade, preventing infinite loops. 757 unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; 758 if (!Cascade) 759 Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; 760 761 DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) 762 << " interference: Cascade " << Cascade << '\n'); 763 764 // Collect all interfering virtregs first. 765 SmallVector<LiveInterval*, 8> Intfs; 766 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 767 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 768 assert(Q.seenAllInterferences() && "Didn't check all interfererences."); 769 ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); 770 Intfs.append(IVR.begin(), IVR.end()); 771 } 772 773 // Evict them second. This will invalidate the queries. 774 for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { 775 LiveInterval *Intf = Intfs[i]; 776 // The same VirtReg may be present in multiple RegUnits. Skip duplicates. 777 if (!VRM->hasPhys(Intf->reg)) 778 continue; 779 Matrix->unassign(*Intf); 780 assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || 781 VirtReg.isSpillable() < Intf->isSpillable()) && 782 "Cannot decrease cascade number, illegal eviction"); 783 ExtraRegInfo[Intf->reg].Cascade = Cascade; 784 ++NumEvicted; 785 NewVRegs.push_back(Intf->reg); 786 } 787 } 788 789 /// tryEvict - Try to evict all interferences for a physreg. 790 /// @param VirtReg Currently unassigned virtual register. 791 /// @param Order Physregs to try. 792 /// @return Physreg to assign VirtReg, or 0. 793 unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, 794 AllocationOrder &Order, 795 SmallVectorImpl<unsigned> &NewVRegs, 796 unsigned CostPerUseLimit) { 797 NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); 798 799 // Keep track of the cheapest interference seen so far. 800 EvictionCost BestCost; 801 BestCost.setMax(); 802 unsigned BestPhys = 0; 803 unsigned OrderLimit = Order.getOrder().size(); 804 805 // When we are just looking for a reduced cost per use, don't break any 806 // hints, and only evict smaller spill weights. 807 if (CostPerUseLimit < ~0u) { 808 BestCost.BrokenHints = 0; 809 BestCost.MaxWeight = VirtReg.weight; 810 811 // Check of any registers in RC are below CostPerUseLimit. 812 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); 813 unsigned MinCost = RegClassInfo.getMinCost(RC); 814 if (MinCost >= CostPerUseLimit) { 815 DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost 816 << ", no cheaper registers to be found.\n"); 817 return 0; 818 } 819 820 // It is normal for register classes to have a long tail of registers with 821 // the same cost. We don't need to look at them if they're too expensive. 822 if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { 823 OrderLimit = RegClassInfo.getLastCostChange(RC); 824 DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); 825 } 826 } 827 828 Order.rewind(); 829 while (unsigned PhysReg = Order.next(OrderLimit)) { 830 if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) 831 continue; 832 // The first use of a callee-saved register in a function has cost 1. 833 // Don't start using a CSR when the CostPerUseLimit is low. 834 if (CostPerUseLimit == 1) 835 if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) 836 if (!MRI->isPhysRegUsed(CSR)) { 837 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " 838 << PrintReg(CSR, TRI) << '\n'); 839 continue; 840 } 841 842 if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) 843 continue; 844 845 // Best so far. 846 BestPhys = PhysReg; 847 848 // Stop if the hint can be used. 849 if (Order.isHint()) 850 break; 851 } 852 853 if (!BestPhys) 854 return 0; 855 856 evictInterference(VirtReg, BestPhys, NewVRegs); 857 return BestPhys; 858 } 859 860 861 //===----------------------------------------------------------------------===// 862 // Region Splitting 863 //===----------------------------------------------------------------------===// 864 865 /// addSplitConstraints - Fill out the SplitConstraints vector based on the 866 /// interference pattern in Physreg and its aliases. Add the constraints to 867 /// SpillPlacement and return the static cost of this split in Cost, assuming 868 /// that all preferences in SplitConstraints are met. 869 /// Return false if there are no bundles with positive bias. 870 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, 871 BlockFrequency &Cost) { 872 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 873 874 // Reset interference dependent info. 875 SplitConstraints.resize(UseBlocks.size()); 876 BlockFrequency StaticCost = 0; 877 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 878 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 879 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 880 881 BC.Number = BI.MBB->getNumber(); 882 Intf.moveToBlock(BC.Number); 883 BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 884 BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 885 BC.ChangesValue = BI.FirstDef.isValid(); 886 887 if (!Intf.hasInterference()) 888 continue; 889 890 // Number of spill code instructions to insert. 891 unsigned Ins = 0; 892 893 // Interference for the live-in value. 894 if (BI.LiveIn) { 895 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) 896 BC.Entry = SpillPlacement::MustSpill, ++Ins; 897 else if (Intf.first() < BI.FirstInstr) 898 BC.Entry = SpillPlacement::PrefSpill, ++Ins; 899 else if (Intf.first() < BI.LastInstr) 900 ++Ins; 901 } 902 903 // Interference for the live-out value. 904 if (BI.LiveOut) { 905 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) 906 BC.Exit = SpillPlacement::MustSpill, ++Ins; 907 else if (Intf.last() > BI.LastInstr) 908 BC.Exit = SpillPlacement::PrefSpill, ++Ins; 909 else if (Intf.last() > BI.FirstInstr) 910 ++Ins; 911 } 912 913 // Accumulate the total frequency of inserted spill code. 914 while (Ins--) 915 StaticCost += SpillPlacer->getBlockFrequency(BC.Number); 916 } 917 Cost = StaticCost; 918 919 // Add constraints for use-blocks. Note that these are the only constraints 920 // that may add a positive bias, it is downhill from here. 921 SpillPlacer->addConstraints(SplitConstraints); 922 return SpillPlacer->scanActiveBundles(); 923 } 924 925 926 /// addThroughConstraints - Add constraints and links to SpillPlacer from the 927 /// live-through blocks in Blocks. 928 void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, 929 ArrayRef<unsigned> Blocks) { 930 const unsigned GroupSize = 8; 931 SpillPlacement::BlockConstraint BCS[GroupSize]; 932 unsigned TBS[GroupSize]; 933 unsigned B = 0, T = 0; 934 935 for (unsigned i = 0; i != Blocks.size(); ++i) { 936 unsigned Number = Blocks[i]; 937 Intf.moveToBlock(Number); 938 939 if (!Intf.hasInterference()) { 940 assert(T < GroupSize && "Array overflow"); 941 TBS[T] = Number; 942 if (++T == GroupSize) { 943 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 944 T = 0; 945 } 946 continue; 947 } 948 949 assert(B < GroupSize && "Array overflow"); 950 BCS[B].Number = Number; 951 952 // Interference for the live-in value. 953 if (Intf.first() <= Indexes->getMBBStartIdx(Number)) 954 BCS[B].Entry = SpillPlacement::MustSpill; 955 else 956 BCS[B].Entry = SpillPlacement::PrefSpill; 957 958 // Interference for the live-out value. 959 if (Intf.last() >= SA->getLastSplitPoint(Number)) 960 BCS[B].Exit = SpillPlacement::MustSpill; 961 else 962 BCS[B].Exit = SpillPlacement::PrefSpill; 963 964 if (++B == GroupSize) { 965 ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); 966 SpillPlacer->addConstraints(Array); 967 B = 0; 968 } 969 } 970 971 ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); 972 SpillPlacer->addConstraints(Array); 973 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 974 } 975 976 void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { 977 // Keep track of through blocks that have not been added to SpillPlacer. 978 BitVector Todo = SA->getThroughBlocks(); 979 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; 980 unsigned AddedTo = 0; 981 #ifndef NDEBUG 982 unsigned Visited = 0; 983 #endif 984 985 for (;;) { 986 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); 987 // Find new through blocks in the periphery of PrefRegBundles. 988 for (int i = 0, e = NewBundles.size(); i != e; ++i) { 989 unsigned Bundle = NewBundles[i]; 990 // Look at all blocks connected to Bundle in the full graph. 991 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); 992 for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); 993 I != E; ++I) { 994 unsigned Block = *I; 995 if (!Todo.test(Block)) 996 continue; 997 Todo.reset(Block); 998 // This is a new through block. Add it to SpillPlacer later. 999 ActiveBlocks.push_back(Block); 1000 #ifndef NDEBUG 1001 ++Visited; 1002 #endif 1003 } 1004 } 1005 // Any new blocks to add? 1006 if (ActiveBlocks.size() == AddedTo) 1007 break; 1008 1009 // Compute through constraints from the interference, or assume that all 1010 // through blocks prefer spilling when forming compact regions. 1011 ArrayRef<unsigned> NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); 1012 if (Cand.PhysReg) 1013 addThroughConstraints(Cand.Intf, NewBlocks); 1014 else 1015 // Provide a strong negative bias on through blocks to prevent unwanted 1016 // liveness on loop backedges. 1017 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); 1018 AddedTo = ActiveBlocks.size(); 1019 1020 // Perhaps iterating can enable more bundles? 1021 SpillPlacer->iterate(); 1022 } 1023 DEBUG(dbgs() << ", v=" << Visited); 1024 } 1025 1026 /// calcCompactRegion - Compute the set of edge bundles that should be live 1027 /// when splitting the current live range into compact regions. Compact 1028 /// regions can be computed without looking at interference. They are the 1029 /// regions formed by removing all the live-through blocks from the live range. 1030 /// 1031 /// Returns false if the current live range is already compact, or if the 1032 /// compact regions would form single block regions anyway. 1033 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { 1034 // Without any through blocks, the live range is already compact. 1035 if (!SA->getNumThroughBlocks()) 1036 return false; 1037 1038 // Compact regions don't correspond to any physreg. 1039 Cand.reset(IntfCache, 0); 1040 1041 DEBUG(dbgs() << "Compact region bundles"); 1042 1043 // Use the spill placer to determine the live bundles. GrowRegion pretends 1044 // that all the through blocks have interference when PhysReg is unset. 1045 SpillPlacer->prepare(Cand.LiveBundles); 1046 1047 // The static split cost will be zero since Cand.Intf reports no interference. 1048 BlockFrequency Cost; 1049 if (!addSplitConstraints(Cand.Intf, Cost)) { 1050 DEBUG(dbgs() << ", none.\n"); 1051 return false; 1052 } 1053 1054 growRegion(Cand); 1055 SpillPlacer->finish(); 1056 1057 if (!Cand.LiveBundles.any()) { 1058 DEBUG(dbgs() << ", none.\n"); 1059 return false; 1060 } 1061 1062 DEBUG({ 1063 for (int i = Cand.LiveBundles.find_first(); i>=0; 1064 i = Cand.LiveBundles.find_next(i)) 1065 dbgs() << " EB#" << i; 1066 dbgs() << ".\n"; 1067 }); 1068 return true; 1069 } 1070 1071 /// calcSpillCost - Compute how expensive it would be to split the live range in 1072 /// SA around all use blocks instead of forming bundle regions. 1073 BlockFrequency RAGreedy::calcSpillCost() { 1074 BlockFrequency Cost = 0; 1075 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1076 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1077 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1078 unsigned Number = BI.MBB->getNumber(); 1079 // We normally only need one spill instruction - a load or a store. 1080 Cost += SpillPlacer->getBlockFrequency(Number); 1081 1082 // Unless the value is redefined in the block. 1083 if (BI.LiveIn && BI.LiveOut && BI.FirstDef) 1084 Cost += SpillPlacer->getBlockFrequency(Number); 1085 } 1086 return Cost; 1087 } 1088 1089 /// calcGlobalSplitCost - Return the global split cost of following the split 1090 /// pattern in LiveBundles. This cost should be added to the local cost of the 1091 /// interference pattern in SplitConstraints. 1092 /// 1093 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { 1094 BlockFrequency GlobalCost = 0; 1095 const BitVector &LiveBundles = Cand.LiveBundles; 1096 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1097 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1098 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1099 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 1100 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; 1101 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; 1102 unsigned Ins = 0; 1103 1104 if (BI.LiveIn) 1105 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); 1106 if (BI.LiveOut) 1107 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); 1108 while (Ins--) 1109 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); 1110 } 1111 1112 for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { 1113 unsigned Number = Cand.ActiveBlocks[i]; 1114 bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; 1115 bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; 1116 if (!RegIn && !RegOut) 1117 continue; 1118 if (RegIn && RegOut) { 1119 // We need double spill code if this block has interference. 1120 Cand.Intf.moveToBlock(Number); 1121 if (Cand.Intf.hasInterference()) { 1122 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1123 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1124 } 1125 continue; 1126 } 1127 // live-in / stack-out or stack-in live-out. 1128 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1129 } 1130 return GlobalCost; 1131 } 1132 1133 /// splitAroundRegion - Split the current live range around the regions 1134 /// determined by BundleCand and GlobalCand. 1135 /// 1136 /// Before calling this function, GlobalCand and BundleCand must be initialized 1137 /// so each bundle is assigned to a valid candidate, or NoCand for the 1138 /// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor 1139 /// objects must be initialized for the current live range, and intervals 1140 /// created for the used candidates. 1141 /// 1142 /// @param LREdit The LiveRangeEdit object handling the current split. 1143 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value 1144 /// must appear in this list. 1145 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, 1146 ArrayRef<unsigned> UsedCands) { 1147 // These are the intervals created for new global ranges. We may create more 1148 // intervals for local ranges. 1149 const unsigned NumGlobalIntvs = LREdit.size(); 1150 DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); 1151 assert(NumGlobalIntvs && "No global intervals configured"); 1152 1153 // Isolate even single instructions when dealing with a proper sub-class. 1154 // That guarantees register class inflation for the stack interval because it 1155 // is all copies. 1156 unsigned Reg = SA->getParent().reg; 1157 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1158 1159 // First handle all the blocks with uses. 1160 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1161 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1162 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1163 unsigned Number = BI.MBB->getNumber(); 1164 unsigned IntvIn = 0, IntvOut = 0; 1165 SlotIndex IntfIn, IntfOut; 1166 if (BI.LiveIn) { 1167 unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; 1168 if (CandIn != NoCand) { 1169 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1170 IntvIn = Cand.IntvIdx; 1171 Cand.Intf.moveToBlock(Number); 1172 IntfIn = Cand.Intf.first(); 1173 } 1174 } 1175 if (BI.LiveOut) { 1176 unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; 1177 if (CandOut != NoCand) { 1178 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1179 IntvOut = Cand.IntvIdx; 1180 Cand.Intf.moveToBlock(Number); 1181 IntfOut = Cand.Intf.last(); 1182 } 1183 } 1184 1185 // Create separate intervals for isolated blocks with multiple uses. 1186 if (!IntvIn && !IntvOut) { 1187 DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); 1188 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1189 SE->splitSingleBlock(BI); 1190 continue; 1191 } 1192 1193 if (IntvIn && IntvOut) 1194 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1195 else if (IntvIn) 1196 SE->splitRegInBlock(BI, IntvIn, IntfIn); 1197 else 1198 SE->splitRegOutBlock(BI, IntvOut, IntfOut); 1199 } 1200 1201 // Handle live-through blocks. The relevant live-through blocks are stored in 1202 // the ActiveBlocks list with each candidate. We need to filter out 1203 // duplicates. 1204 BitVector Todo = SA->getThroughBlocks(); 1205 for (unsigned c = 0; c != UsedCands.size(); ++c) { 1206 ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; 1207 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 1208 unsigned Number = Blocks[i]; 1209 if (!Todo.test(Number)) 1210 continue; 1211 Todo.reset(Number); 1212 1213 unsigned IntvIn = 0, IntvOut = 0; 1214 SlotIndex IntfIn, IntfOut; 1215 1216 unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; 1217 if (CandIn != NoCand) { 1218 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1219 IntvIn = Cand.IntvIdx; 1220 Cand.Intf.moveToBlock(Number); 1221 IntfIn = Cand.Intf.first(); 1222 } 1223 1224 unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; 1225 if (CandOut != NoCand) { 1226 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1227 IntvOut = Cand.IntvIdx; 1228 Cand.Intf.moveToBlock(Number); 1229 IntfOut = Cand.Intf.last(); 1230 } 1231 if (!IntvIn && !IntvOut) 1232 continue; 1233 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1234 } 1235 } 1236 1237 ++NumGlobalSplits; 1238 1239 SmallVector<unsigned, 8> IntvMap; 1240 SE->finish(&IntvMap); 1241 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1242 1243 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1244 unsigned OrigBlocks = SA->getNumLiveBlocks(); 1245 1246 // Sort out the new intervals created by splitting. We get four kinds: 1247 // - Remainder intervals should not be split again. 1248 // - Candidate intervals can be assigned to Cand.PhysReg. 1249 // - Block-local splits are candidates for local splitting. 1250 // - DCE leftovers should go back on the queue. 1251 for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { 1252 LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); 1253 1254 // Ignore old intervals from DCE. 1255 if (getStage(Reg) != RS_New) 1256 continue; 1257 1258 // Remainder interval. Don't try splitting again, spill if it doesn't 1259 // allocate. 1260 if (IntvMap[i] == 0) { 1261 setStage(Reg, RS_Spill); 1262 continue; 1263 } 1264 1265 // Global intervals. Allow repeated splitting as long as the number of live 1266 // blocks is strictly decreasing. 1267 if (IntvMap[i] < NumGlobalIntvs) { 1268 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { 1269 DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks 1270 << " blocks as original.\n"); 1271 // Don't allow repeated splitting as a safe guard against looping. 1272 setStage(Reg, RS_Split2); 1273 } 1274 continue; 1275 } 1276 1277 // Other intervals are treated as new. This includes local intervals created 1278 // for blocks with multiple uses, and anything created by DCE. 1279 } 1280 1281 if (VerifyEnabled) 1282 MF->verify(this, "After splitting live range around region"); 1283 } 1284 1285 unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1286 SmallVectorImpl<unsigned> &NewVRegs) { 1287 unsigned NumCands = 0; 1288 BlockFrequency BestCost; 1289 1290 // Check if we can split this live range around a compact region. 1291 bool HasCompact = calcCompactRegion(GlobalCand.front()); 1292 if (HasCompact) { 1293 // Yes, keep GlobalCand[0] as the compact region candidate. 1294 NumCands = 1; 1295 BestCost = BlockFrequency::getMaxFrequency(); 1296 } else { 1297 // No benefit from the compact region, our fallback will be per-block 1298 // splitting. Make sure we find a solution that is cheaper than spilling. 1299 BestCost = calcSpillCost(); 1300 DEBUG(dbgs() << "Cost of isolating all blocks = "; 1301 MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); 1302 } 1303 1304 unsigned BestCand = 1305 calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, 1306 false/*IgnoreCSR*/); 1307 1308 // No solutions found, fall back to single block splitting. 1309 if (!HasCompact && BestCand == NoCand) 1310 return 0; 1311 1312 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); 1313 } 1314 1315 unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, 1316 AllocationOrder &Order, 1317 BlockFrequency &BestCost, 1318 unsigned &NumCands, 1319 bool IgnoreCSR) { 1320 unsigned BestCand = NoCand; 1321 Order.rewind(); 1322 while (unsigned PhysReg = Order.next()) { 1323 if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) 1324 if (IgnoreCSR && !MRI->isPhysRegUsed(CSR)) 1325 continue; 1326 1327 // Discard bad candidates before we run out of interference cache cursors. 1328 // This will only affect register classes with a lot of registers (>32). 1329 if (NumCands == IntfCache.getMaxCursors()) { 1330 unsigned WorstCount = ~0u; 1331 unsigned Worst = 0; 1332 for (unsigned i = 0; i != NumCands; ++i) { 1333 if (i == BestCand || !GlobalCand[i].PhysReg) 1334 continue; 1335 unsigned Count = GlobalCand[i].LiveBundles.count(); 1336 if (Count < WorstCount) 1337 Worst = i, WorstCount = Count; 1338 } 1339 --NumCands; 1340 GlobalCand[Worst] = GlobalCand[NumCands]; 1341 if (BestCand == NumCands) 1342 BestCand = Worst; 1343 } 1344 1345 if (GlobalCand.size() <= NumCands) 1346 GlobalCand.resize(NumCands+1); 1347 GlobalSplitCandidate &Cand = GlobalCand[NumCands]; 1348 Cand.reset(IntfCache, PhysReg); 1349 1350 SpillPlacer->prepare(Cand.LiveBundles); 1351 BlockFrequency Cost; 1352 if (!addSplitConstraints(Cand.Intf, Cost)) { 1353 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); 1354 continue; 1355 } 1356 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = "; 1357 MBFI->printBlockFreq(dbgs(), Cost)); 1358 if (Cost >= BestCost) { 1359 DEBUG({ 1360 if (BestCand == NoCand) 1361 dbgs() << " worse than no bundles\n"; 1362 else 1363 dbgs() << " worse than " 1364 << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; 1365 }); 1366 continue; 1367 } 1368 growRegion(Cand); 1369 1370 SpillPlacer->finish(); 1371 1372 // No live bundles, defer to splitSingleBlocks(). 1373 if (!Cand.LiveBundles.any()) { 1374 DEBUG(dbgs() << " no bundles.\n"); 1375 continue; 1376 } 1377 1378 Cost += calcGlobalSplitCost(Cand); 1379 DEBUG({ 1380 dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) 1381 << " with bundles"; 1382 for (int i = Cand.LiveBundles.find_first(); i>=0; 1383 i = Cand.LiveBundles.find_next(i)) 1384 dbgs() << " EB#" << i; 1385 dbgs() << ".\n"; 1386 }); 1387 if (Cost < BestCost) { 1388 BestCand = NumCands; 1389 BestCost = Cost; 1390 } 1391 ++NumCands; 1392 } 1393 return BestCand; 1394 } 1395 1396 unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, 1397 bool HasCompact, 1398 SmallVectorImpl<unsigned> &NewVRegs) { 1399 SmallVector<unsigned, 8> UsedCands; 1400 // Prepare split editor. 1401 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 1402 SE->reset(LREdit, SplitSpillMode); 1403 1404 // Assign all edge bundles to the preferred candidate, or NoCand. 1405 BundleCand.assign(Bundles->getNumBundles(), NoCand); 1406 1407 // Assign bundles for the best candidate region. 1408 if (BestCand != NoCand) { 1409 GlobalSplitCandidate &Cand = GlobalCand[BestCand]; 1410 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { 1411 UsedCands.push_back(BestCand); 1412 Cand.IntvIdx = SE->openIntv(); 1413 DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " 1414 << B << " bundles, intv " << Cand.IntvIdx << ".\n"); 1415 (void)B; 1416 } 1417 } 1418 1419 // Assign bundles for the compact region. 1420 if (HasCompact) { 1421 GlobalSplitCandidate &Cand = GlobalCand.front(); 1422 assert(!Cand.PhysReg && "Compact region has no physreg"); 1423 if (unsigned B = Cand.getBundles(BundleCand, 0)) { 1424 UsedCands.push_back(0); 1425 Cand.IntvIdx = SE->openIntv(); 1426 DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " 1427 << Cand.IntvIdx << ".\n"); 1428 (void)B; 1429 } 1430 } 1431 1432 splitAroundRegion(LREdit, UsedCands); 1433 return 0; 1434 } 1435 1436 1437 //===----------------------------------------------------------------------===// 1438 // Per-Block Splitting 1439 //===----------------------------------------------------------------------===// 1440 1441 /// tryBlockSplit - Split a global live range around every block with uses. This 1442 /// creates a lot of local live ranges, that will be split by tryLocalSplit if 1443 /// they don't allocate. 1444 unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1445 SmallVectorImpl<unsigned> &NewVRegs) { 1446 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); 1447 unsigned Reg = VirtReg.reg; 1448 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1449 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 1450 SE->reset(LREdit, SplitSpillMode); 1451 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1452 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1453 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1454 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1455 SE->splitSingleBlock(BI); 1456 } 1457 // No blocks were split. 1458 if (LREdit.empty()) 1459 return 0; 1460 1461 // We did split for some blocks. 1462 SmallVector<unsigned, 8> IntvMap; 1463 SE->finish(&IntvMap); 1464 1465 // Tell LiveDebugVariables about the new ranges. 1466 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1467 1468 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1469 1470 // Sort out the new intervals created by splitting. The remainder interval 1471 // goes straight to spilling, the new local ranges get to stay RS_New. 1472 for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { 1473 LiveInterval &LI = LIS->getInterval(LREdit.get(i)); 1474 if (getStage(LI) == RS_New && IntvMap[i] == 0) 1475 setStage(LI, RS_Spill); 1476 } 1477 1478 if (VerifyEnabled) 1479 MF->verify(this, "After splitting live range around basic blocks"); 1480 return 0; 1481 } 1482 1483 1484 //===----------------------------------------------------------------------===// 1485 // Per-Instruction Splitting 1486 //===----------------------------------------------------------------------===// 1487 1488 /// Get the number of allocatable registers that match the constraints of \p Reg 1489 /// on \p MI and that are also in \p SuperRC. 1490 static unsigned getNumAllocatableRegsForConstraints( 1491 const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, 1492 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, 1493 const RegisterClassInfo &RCI) { 1494 assert(SuperRC && "Invalid register class"); 1495 1496 const TargetRegisterClass *ConstrainedRC = 1497 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, 1498 /* ExploreBundle */ true); 1499 if (!ConstrainedRC) 1500 return 0; 1501 return RCI.getNumAllocatableRegs(ConstrainedRC); 1502 } 1503 1504 /// tryInstructionSplit - Split a live range around individual instructions. 1505 /// This is normally not worthwhile since the spiller is doing essentially the 1506 /// same thing. However, when the live range is in a constrained register 1507 /// class, it may help to insert copies such that parts of the live range can 1508 /// be moved to a larger register class. 1509 /// 1510 /// This is similar to spilling to a larger register class. 1511 unsigned 1512 RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1513 SmallVectorImpl<unsigned> &NewVRegs) { 1514 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); 1515 // There is no point to this if there are no larger sub-classes. 1516 if (!RegClassInfo.isProperSubClass(CurRC)) 1517 return 0; 1518 1519 // Always enable split spill mode, since we're effectively spilling to a 1520 // register. 1521 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 1522 SE->reset(LREdit, SplitEditor::SM_Size); 1523 1524 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1525 if (Uses.size() <= 1) 1526 return 0; 1527 1528 DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); 1529 1530 const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC); 1531 unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); 1532 // Split around every non-copy instruction if this split will relax 1533 // the constraints on the virtual register. 1534 // Otherwise, splitting just inserts uncoalescable copies that do not help 1535 // the allocation. 1536 for (unsigned i = 0; i != Uses.size(); ++i) { 1537 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) 1538 if (MI->isFullCopy() || 1539 SuperRCNumAllocatableRegs == 1540 getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, 1541 TRI, RCI)) { 1542 DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); 1543 continue; 1544 } 1545 SE->openIntv(); 1546 SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); 1547 SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); 1548 SE->useIntv(SegStart, SegStop); 1549 } 1550 1551 if (LREdit.empty()) { 1552 DEBUG(dbgs() << "All uses were copies.\n"); 1553 return 0; 1554 } 1555 1556 SmallVector<unsigned, 8> IntvMap; 1557 SE->finish(&IntvMap); 1558 DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); 1559 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1560 1561 // Assign all new registers to RS_Spill. This was the last chance. 1562 setStage(LREdit.begin(), LREdit.end(), RS_Spill); 1563 return 0; 1564 } 1565 1566 1567 //===----------------------------------------------------------------------===// 1568 // Local Splitting 1569 //===----------------------------------------------------------------------===// 1570 1571 1572 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted 1573 /// in order to use PhysReg between two entries in SA->UseSlots. 1574 /// 1575 /// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1]. 1576 /// 1577 void RAGreedy::calcGapWeights(unsigned PhysReg, 1578 SmallVectorImpl<float> &GapWeight) { 1579 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1580 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1581 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1582 const unsigned NumGaps = Uses.size()-1; 1583 1584 // Start and end points for the interference check. 1585 SlotIndex StartIdx = 1586 BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; 1587 SlotIndex StopIdx = 1588 BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; 1589 1590 GapWeight.assign(NumGaps, 0.0f); 1591 1592 // Add interference from each overlapping register. 1593 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1594 if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units) 1595 .checkInterference()) 1596 continue; 1597 1598 // We know that VirtReg is a continuous interval from FirstInstr to 1599 // LastInstr, so we don't need InterferenceQuery. 1600 // 1601 // Interference that overlaps an instruction is counted in both gaps 1602 // surrounding the instruction. The exception is interference before 1603 // StartIdx and after StopIdx. 1604 // 1605 LiveIntervalUnion::SegmentIter IntI = 1606 Matrix->getLiveUnions()[*Units] .find(StartIdx); 1607 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { 1608 // Skip the gaps before IntI. 1609 while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) 1610 if (++Gap == NumGaps) 1611 break; 1612 if (Gap == NumGaps) 1613 break; 1614 1615 // Update the gaps covered by IntI. 1616 const float weight = IntI.value()->weight; 1617 for (; Gap != NumGaps; ++Gap) { 1618 GapWeight[Gap] = std::max(GapWeight[Gap], weight); 1619 if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) 1620 break; 1621 } 1622 if (Gap == NumGaps) 1623 break; 1624 } 1625 } 1626 1627 // Add fixed interference. 1628 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1629 const LiveRange &LR = LIS->getRegUnit(*Units); 1630 LiveRange::const_iterator I = LR.find(StartIdx); 1631 LiveRange::const_iterator E = LR.end(); 1632 1633 // Same loop as above. Mark any overlapped gaps as HUGE_VALF. 1634 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { 1635 while (Uses[Gap+1].getBoundaryIndex() < I->start) 1636 if (++Gap == NumGaps) 1637 break; 1638 if (Gap == NumGaps) 1639 break; 1640 1641 for (; Gap != NumGaps; ++Gap) { 1642 GapWeight[Gap] = llvm::huge_valf; 1643 if (Uses[Gap+1].getBaseIndex() >= I->end) 1644 break; 1645 } 1646 if (Gap == NumGaps) 1647 break; 1648 } 1649 } 1650 } 1651 1652 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only 1653 /// basic block. 1654 /// 1655 unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1656 SmallVectorImpl<unsigned> &NewVRegs) { 1657 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1658 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1659 1660 // Note that it is possible to have an interval that is live-in or live-out 1661 // while only covering a single block - A phi-def can use undef values from 1662 // predecessors, and the block could be a single-block loop. 1663 // We don't bother doing anything clever about such a case, we simply assume 1664 // that the interval is continuous from FirstInstr to LastInstr. We should 1665 // make sure that we don't do anything illegal to such an interval, though. 1666 1667 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1668 if (Uses.size() <= 2) 1669 return 0; 1670 const unsigned NumGaps = Uses.size()-1; 1671 1672 DEBUG({ 1673 dbgs() << "tryLocalSplit: "; 1674 for (unsigned i = 0, e = Uses.size(); i != e; ++i) 1675 dbgs() << ' ' << Uses[i]; 1676 dbgs() << '\n'; 1677 }); 1678 1679 // If VirtReg is live across any register mask operands, compute a list of 1680 // gaps with register masks. 1681 SmallVector<unsigned, 8> RegMaskGaps; 1682 if (Matrix->checkRegMaskInterference(VirtReg)) { 1683 // Get regmask slots for the whole block. 1684 ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); 1685 DEBUG(dbgs() << RMS.size() << " regmasks in block:"); 1686 // Constrain to VirtReg's live range. 1687 unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), 1688 Uses.front().getRegSlot()) - RMS.begin(); 1689 unsigned re = RMS.size(); 1690 for (unsigned i = 0; i != NumGaps && ri != re; ++i) { 1691 // Look for Uses[i] <= RMS <= Uses[i+1]. 1692 assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); 1693 if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) 1694 continue; 1695 // Skip a regmask on the same instruction as the last use. It doesn't 1696 // overlap the live range. 1697 if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) 1698 break; 1699 DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); 1700 RegMaskGaps.push_back(i); 1701 // Advance ri to the next gap. A regmask on one of the uses counts in 1702 // both gaps. 1703 while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) 1704 ++ri; 1705 } 1706 DEBUG(dbgs() << '\n'); 1707 } 1708 1709 // Since we allow local split results to be split again, there is a risk of 1710 // creating infinite loops. It is tempting to require that the new live 1711 // ranges have less instructions than the original. That would guarantee 1712 // convergence, but it is too strict. A live range with 3 instructions can be 1713 // split 2+3 (including the COPY), and we want to allow that. 1714 // 1715 // Instead we use these rules: 1716 // 1717 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the 1718 // noop split, of course). 1719 // 2. Require progress be made for ranges with getStage() == RS_Split2. All 1720 // the new ranges must have fewer instructions than before the split. 1721 // 3. New ranges with the same number of instructions are marked RS_Split2, 1722 // smaller ranges are marked RS_New. 1723 // 1724 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent 1725 // excessive splitting and infinite loops. 1726 // 1727 bool ProgressRequired = getStage(VirtReg) >= RS_Split2; 1728 1729 // Best split candidate. 1730 unsigned BestBefore = NumGaps; 1731 unsigned BestAfter = 0; 1732 float BestDiff = 0; 1733 1734 const float blockFreq = 1735 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * 1736 (1.0f / MBFI->getEntryFreq()); 1737 SmallVector<float, 8> GapWeight; 1738 1739 Order.rewind(); 1740 while (unsigned PhysReg = Order.next()) { 1741 // Keep track of the largest spill weight that would need to be evicted in 1742 // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. 1743 calcGapWeights(PhysReg, GapWeight); 1744 1745 // Remove any gaps with regmask clobbers. 1746 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) 1747 for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) 1748 GapWeight[RegMaskGaps[i]] = llvm::huge_valf; 1749 1750 // Try to find the best sequence of gaps to close. 1751 // The new spill weight must be larger than any gap interference. 1752 1753 // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. 1754 unsigned SplitBefore = 0, SplitAfter = 1; 1755 1756 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). 1757 // It is the spill weight that needs to be evicted. 1758 float MaxGap = GapWeight[0]; 1759 1760 for (;;) { 1761 // Live before/after split? 1762 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; 1763 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; 1764 1765 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' ' 1766 << Uses[SplitBefore] << '-' << Uses[SplitAfter] 1767 << " i=" << MaxGap); 1768 1769 // Stop before the interval gets so big we wouldn't be making progress. 1770 if (!LiveBefore && !LiveAfter) { 1771 DEBUG(dbgs() << " all\n"); 1772 break; 1773 } 1774 // Should the interval be extended or shrunk? 1775 bool Shrink = true; 1776 1777 // How many gaps would the new range have? 1778 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; 1779 1780 // Legally, without causing looping? 1781 bool Legal = !ProgressRequired || NewGaps < NumGaps; 1782 1783 if (Legal && MaxGap < llvm::huge_valf) { 1784 // Estimate the new spill weight. Each instruction reads or writes the 1785 // register. Conservatively assume there are no read-modify-write 1786 // instructions. 1787 // 1788 // Try to guess the size of the new interval. 1789 const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1), 1790 Uses[SplitBefore].distance(Uses[SplitAfter]) + 1791 (LiveBefore + LiveAfter)*SlotIndex::InstrDist); 1792 // Would this split be possible to allocate? 1793 // Never allocate all gaps, we wouldn't be making progress. 1794 DEBUG(dbgs() << " w=" << EstWeight); 1795 if (EstWeight * Hysteresis >= MaxGap) { 1796 Shrink = false; 1797 float Diff = EstWeight - MaxGap; 1798 if (Diff > BestDiff) { 1799 DEBUG(dbgs() << " (best)"); 1800 BestDiff = Hysteresis * Diff; 1801 BestBefore = SplitBefore; 1802 BestAfter = SplitAfter; 1803 } 1804 } 1805 } 1806 1807 // Try to shrink. 1808 if (Shrink) { 1809 if (++SplitBefore < SplitAfter) { 1810 DEBUG(dbgs() << " shrink\n"); 1811 // Recompute the max when necessary. 1812 if (GapWeight[SplitBefore - 1] >= MaxGap) { 1813 MaxGap = GapWeight[SplitBefore]; 1814 for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i) 1815 MaxGap = std::max(MaxGap, GapWeight[i]); 1816 } 1817 continue; 1818 } 1819 MaxGap = 0; 1820 } 1821 1822 // Try to extend the interval. 1823 if (SplitAfter >= NumGaps) { 1824 DEBUG(dbgs() << " end\n"); 1825 break; 1826 } 1827 1828 DEBUG(dbgs() << " extend\n"); 1829 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); 1830 } 1831 } 1832 1833 // Didn't find any candidates? 1834 if (BestBefore == NumGaps) 1835 return 0; 1836 1837 DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] 1838 << '-' << Uses[BestAfter] << ", " << BestDiff 1839 << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); 1840 1841 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 1842 SE->reset(LREdit); 1843 1844 SE->openIntv(); 1845 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); 1846 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); 1847 SE->useIntv(SegStart, SegStop); 1848 SmallVector<unsigned, 8> IntvMap; 1849 SE->finish(&IntvMap); 1850 DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); 1851 1852 // If the new range has the same number of instructions as before, mark it as 1853 // RS_Split2 so the next split will be forced to make progress. Otherwise, 1854 // leave the new intervals as RS_New so they can compete. 1855 bool LiveBefore = BestBefore != 0 || BI.LiveIn; 1856 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; 1857 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; 1858 if (NewGaps >= NumGaps) { 1859 DEBUG(dbgs() << "Tagging non-progress ranges: "); 1860 assert(!ProgressRequired && "Didn't make progress when it was required."); 1861 for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) 1862 if (IntvMap[i] == 1) { 1863 setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); 1864 DEBUG(dbgs() << PrintReg(LREdit.get(i))); 1865 } 1866 DEBUG(dbgs() << '\n'); 1867 } 1868 ++NumLocalSplits; 1869 1870 return 0; 1871 } 1872 1873 //===----------------------------------------------------------------------===// 1874 // Live Range Splitting 1875 //===----------------------------------------------------------------------===// 1876 1877 /// trySplit - Try to split VirtReg or one of its interferences, making it 1878 /// assignable. 1879 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. 1880 unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, 1881 SmallVectorImpl<unsigned>&NewVRegs) { 1882 // Ranges must be Split2 or less. 1883 if (getStage(VirtReg) >= RS_Spill) 1884 return 0; 1885 1886 // Local intervals are handled separately. 1887 if (LIS->intervalIsInOneMBB(VirtReg)) { 1888 NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); 1889 SA->analyze(&VirtReg); 1890 unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); 1891 if (PhysReg || !NewVRegs.empty()) 1892 return PhysReg; 1893 return tryInstructionSplit(VirtReg, Order, NewVRegs); 1894 } 1895 1896 NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); 1897 1898 SA->analyze(&VirtReg); 1899 1900 // FIXME: SplitAnalysis may repair broken live ranges coming from the 1901 // coalescer. That may cause the range to become allocatable which means that 1902 // tryRegionSplit won't be making progress. This check should be replaced with 1903 // an assertion when the coalescer is fixed. 1904 if (SA->didRepairRange()) { 1905 // VirtReg has changed, so all cached queries are invalid. 1906 Matrix->invalidateVirtRegs(); 1907 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) 1908 return PhysReg; 1909 } 1910 1911 // First try to split around a region spanning multiple blocks. RS_Split2 1912 // ranges already made dubious progress with region splitting, so they go 1913 // straight to single block splitting. 1914 if (getStage(VirtReg) < RS_Split2) { 1915 unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); 1916 if (PhysReg || !NewVRegs.empty()) 1917 return PhysReg; 1918 } 1919 1920 // Then isolate blocks. 1921 return tryBlockSplit(VirtReg, Order, NewVRegs); 1922 } 1923 1924 //===----------------------------------------------------------------------===// 1925 // Last Chance Recoloring 1926 //===----------------------------------------------------------------------===// 1927 1928 /// mayRecolorAllInterferences - Check if the virtual registers that 1929 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be 1930 /// recolored to free \p PhysReg. 1931 /// When true is returned, \p RecoloringCandidates has been augmented with all 1932 /// the live intervals that need to be recolored in order to free \p PhysReg 1933 /// for \p VirtReg. 1934 /// \p FixedRegisters contains all the virtual registers that cannot be 1935 /// recolored. 1936 bool 1937 RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, 1938 SmallLISet &RecoloringCandidates, 1939 const SmallVirtRegSet &FixedRegisters) { 1940 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); 1941 1942 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1943 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 1944 // If there is LastChanceRecoloringMaxInterference or more interferences, 1945 // chances are one would not be recolorable. 1946 if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= 1947 LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { 1948 DEBUG(dbgs() << "Early abort: too many interferences.\n"); 1949 CutOffInfo |= CO_Interf; 1950 return false; 1951 } 1952 for (unsigned i = Q.interferingVRegs().size(); i; --i) { 1953 LiveInterval *Intf = Q.interferingVRegs()[i - 1]; 1954 // If Intf is done and sit on the same register class as VirtReg, 1955 // it would not be recolorable as it is in the same state as VirtReg. 1956 if ((getStage(*Intf) == RS_Done && 1957 MRI->getRegClass(Intf->reg) == CurRC) || 1958 FixedRegisters.count(Intf->reg)) { 1959 DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n"); 1960 return false; 1961 } 1962 RecoloringCandidates.insert(Intf); 1963 } 1964 } 1965 return true; 1966 } 1967 1968 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring 1969 /// its interferences. 1970 /// Last chance recoloring chooses a color for \p VirtReg and recolors every 1971 /// virtual register that was using it. The recoloring process may recursively 1972 /// use the last chance recoloring. Therefore, when a virtual register has been 1973 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot 1974 /// be last-chance-recolored again during this recoloring "session". 1975 /// E.g., 1976 /// Let 1977 /// vA can use {R1, R2 } 1978 /// vB can use { R2, R3} 1979 /// vC can use {R1 } 1980 /// Where vA, vB, and vC cannot be split anymore (they are reloads for 1981 /// instance) and they all interfere. 1982 /// 1983 /// vA is assigned R1 1984 /// vB is assigned R2 1985 /// vC tries to evict vA but vA is already done. 1986 /// Regular register allocation fails. 1987 /// 1988 /// Last chance recoloring kicks in: 1989 /// vC does as if vA was evicted => vC uses R1. 1990 /// vC is marked as fixed. 1991 /// vA needs to find a color. 1992 /// None are available. 1993 /// vA cannot evict vC: vC is a fixed virtual register now. 1994 /// vA does as if vB was evicted => vA uses R2. 1995 /// vB needs to find a color. 1996 /// R3 is available. 1997 /// Recoloring => vC = R1, vA = R2, vB = R3 1998 /// 1999 /// \p Order defines the preferred allocation order for \p VirtReg. 2000 /// \p NewRegs will contain any new virtual register that have been created 2001 /// (split, spill) during the process and that must be assigned. 2002 /// \p FixedRegisters contains all the virtual registers that cannot be 2003 /// recolored. 2004 /// \p Depth gives the current depth of the last chance recoloring. 2005 /// \return a physical register that can be used for VirtReg or ~0u if none 2006 /// exists. 2007 unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, 2008 AllocationOrder &Order, 2009 SmallVectorImpl<unsigned> &NewVRegs, 2010 SmallVirtRegSet &FixedRegisters, 2011 unsigned Depth) { 2012 DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); 2013 // Ranges must be Done. 2014 assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && 2015 "Last chance recoloring should really be last chance"); 2016 // Set the max depth to LastChanceRecoloringMaxDepth. 2017 // We may want to reconsider that if we end up with a too large search space 2018 // for target with hundreds of registers. 2019 // Indeed, in that case we may want to cut the search space earlier. 2020 if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { 2021 DEBUG(dbgs() << "Abort because max depth has been reached.\n"); 2022 CutOffInfo |= CO_Depth; 2023 return ~0u; 2024 } 2025 2026 // Set of Live intervals that will need to be recolored. 2027 SmallLISet RecoloringCandidates; 2028 // Record the original mapping virtual register to physical register in case 2029 // the recoloring fails. 2030 DenseMap<unsigned, unsigned> VirtRegToPhysReg; 2031 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in 2032 // this recoloring "session". 2033 FixedRegisters.insert(VirtReg.reg); 2034 2035 Order.rewind(); 2036 while (unsigned PhysReg = Order.next()) { 2037 DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " 2038 << PrintReg(PhysReg, TRI) << '\n'); 2039 RecoloringCandidates.clear(); 2040 VirtRegToPhysReg.clear(); 2041 2042 // It is only possible to recolor virtual register interference. 2043 if (Matrix->checkInterference(VirtReg, PhysReg) > 2044 LiveRegMatrix::IK_VirtReg) { 2045 DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n"); 2046 2047 continue; 2048 } 2049 2050 // Early give up on this PhysReg if it is obvious we cannot recolor all 2051 // the interferences. 2052 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, 2053 FixedRegisters)) { 2054 DEBUG(dbgs() << "Some inteferences cannot be recolored.\n"); 2055 continue; 2056 } 2057 2058 // RecoloringCandidates contains all the virtual registers that interfer 2059 // with VirtReg on PhysReg (or one of its aliases). 2060 // Enqueue them for recoloring and perform the actual recoloring. 2061 PQueue RecoloringQueue; 2062 for (SmallLISet::iterator It = RecoloringCandidates.begin(), 2063 EndIt = RecoloringCandidates.end(); 2064 It != EndIt; ++It) { 2065 unsigned ItVirtReg = (*It)->reg; 2066 enqueue(RecoloringQueue, *It); 2067 assert(VRM->hasPhys(ItVirtReg) && 2068 "Interferences are supposed to be with allocated vairables"); 2069 2070 // Record the current allocation. 2071 VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); 2072 // unset the related struct. 2073 Matrix->unassign(**It); 2074 } 2075 2076 // Do as if VirtReg was assigned to PhysReg so that the underlying 2077 // recoloring has the right information about the interferes and 2078 // available colors. 2079 Matrix->assign(VirtReg, PhysReg); 2080 2081 // Save the current recoloring state. 2082 // If we cannot recolor all the interferences, we will have to start again 2083 // at this point for the next physical register. 2084 SmallVirtRegSet SaveFixedRegisters(FixedRegisters); 2085 if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters, 2086 Depth)) { 2087 // Do not mess up with the global assignment process. 2088 // I.e., VirtReg must be unassigned. 2089 Matrix->unassign(VirtReg); 2090 return PhysReg; 2091 } 2092 2093 DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " 2094 << PrintReg(PhysReg, TRI) << '\n'); 2095 2096 // The recoloring attempt failed, undo the changes. 2097 FixedRegisters = SaveFixedRegisters; 2098 Matrix->unassign(VirtReg); 2099 2100 for (SmallLISet::iterator It = RecoloringCandidates.begin(), 2101 EndIt = RecoloringCandidates.end(); 2102 It != EndIt; ++It) { 2103 unsigned ItVirtReg = (*It)->reg; 2104 if (VRM->hasPhys(ItVirtReg)) 2105 Matrix->unassign(**It); 2106 Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]); 2107 } 2108 } 2109 2110 // Last chance recoloring did not worked either, give up. 2111 return ~0u; 2112 } 2113 2114 /// tryRecoloringCandidates - Try to assign a new color to every register 2115 /// in \RecoloringQueue. 2116 /// \p NewRegs will contain any new virtual register created during the 2117 /// recoloring process. 2118 /// \p FixedRegisters[in/out] contains all the registers that have been 2119 /// recolored. 2120 /// \return true if all virtual registers in RecoloringQueue were successfully 2121 /// recolored, false otherwise. 2122 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, 2123 SmallVectorImpl<unsigned> &NewVRegs, 2124 SmallVirtRegSet &FixedRegisters, 2125 unsigned Depth) { 2126 while (!RecoloringQueue.empty()) { 2127 LiveInterval *LI = dequeue(RecoloringQueue); 2128 DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); 2129 unsigned PhysReg; 2130 PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); 2131 if (PhysReg == ~0u || !PhysReg) 2132 return false; 2133 DEBUG(dbgs() << "Recoloring of " << *LI 2134 << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); 2135 Matrix->assign(*LI, PhysReg); 2136 FixedRegisters.insert(LI->reg); 2137 } 2138 return true; 2139 } 2140 2141 //===----------------------------------------------------------------------===// 2142 // Main Entry Point 2143 //===----------------------------------------------------------------------===// 2144 2145 unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, 2146 SmallVectorImpl<unsigned> &NewVRegs) { 2147 CutOffInfo = CO_None; 2148 LLVMContext &Ctx = MF->getFunction()->getContext(); 2149 SmallVirtRegSet FixedRegisters; 2150 unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); 2151 if (Reg == ~0U && (CutOffInfo != CO_None)) { 2152 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); 2153 if (CutOffEncountered == CO_Depth) 2154 Ctx.emitError("register allocation failed: maximum depth for recoloring " 2155 "reached. Use -fexhaustive-register-search to skip " 2156 "cutoffs"); 2157 else if (CutOffEncountered == CO_Interf) 2158 Ctx.emitError("register allocation failed: maximum interference for " 2159 "recoloring reached. Use -fexhaustive-register-search " 2160 "to skip cutoffs"); 2161 else if (CutOffEncountered == (CO_Depth | CO_Interf)) 2162 Ctx.emitError("register allocation failed: maximum interference and " 2163 "depth for recoloring reached. Use " 2164 "-fexhaustive-register-search to skip cutoffs"); 2165 } 2166 return Reg; 2167 } 2168 2169 /// Using a CSR for the first time has a cost because it causes push|pop 2170 /// to be added to prologue|epilogue. Splitting a cold section of the live 2171 /// range can have lower cost than using the CSR for the first time; 2172 /// Spilling a live range in the cold path can have lower cost than using 2173 /// the CSR for the first time. Returns the physical register if we decide 2174 /// to use the CSR; otherwise return 0. 2175 unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, 2176 AllocationOrder &Order, 2177 unsigned PhysReg, 2178 unsigned &CostPerUseLimit, 2179 SmallVectorImpl<unsigned> &NewVRegs) { 2180 if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { 2181 // We choose spill over using the CSR for the first time if the spill cost 2182 // is lower than CSRCost. 2183 SA->analyze(&VirtReg); 2184 if (calcSpillCost() >= CSRCost) 2185 return PhysReg; 2186 2187 // We are going to spill, set CostPerUseLimit to 1 to make sure that 2188 // we will not use a callee-saved register in tryEvict. 2189 CostPerUseLimit = 1; 2190 return 0; 2191 } 2192 if (getStage(VirtReg) < RS_Split) { 2193 // We choose pre-splitting over using the CSR for the first time if 2194 // the cost of splitting is lower than CSRCost. 2195 SA->analyze(&VirtReg); 2196 unsigned NumCands = 0; 2197 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. 2198 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, 2199 NumCands, true /*IgnoreCSR*/); 2200 if (BestCand == NoCand) 2201 // Use the CSR if we can't find a region split below CSRCost. 2202 return PhysReg; 2203 2204 // Perform the actual pre-splitting. 2205 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); 2206 return 0; 2207 } 2208 return PhysReg; 2209 } 2210 2211 void RAGreedy::initializeCSRCost() { 2212 // We use the larger one out of the command-line option and the value report 2213 // by TRI. 2214 CSRCost = BlockFrequency( 2215 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); 2216 if (!CSRCost.getFrequency()) 2217 return; 2218 2219 // Raw cost is relative to Entry == 2^14; scale it appropriately. 2220 uint64_t ActualEntry = MBFI->getEntryFreq(); 2221 if (!ActualEntry) { 2222 CSRCost = 0; 2223 return; 2224 } 2225 uint64_t FixedEntry = 1 << 14; 2226 if (ActualEntry < FixedEntry) 2227 CSRCost *= BranchProbability(ActualEntry, FixedEntry); 2228 else if (ActualEntry <= UINT32_MAX) 2229 // Invert the fraction and divide. 2230 CSRCost /= BranchProbability(FixedEntry, ActualEntry); 2231 else 2232 // Can't use BranchProbability in general, since it takes 32-bit numbers. 2233 CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); 2234 } 2235 2236 unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, 2237 SmallVectorImpl<unsigned> &NewVRegs, 2238 SmallVirtRegSet &FixedRegisters, 2239 unsigned Depth) { 2240 unsigned CostPerUseLimit = ~0u; 2241 // First try assigning a free register. 2242 AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); 2243 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { 2244 // We check other options if we are using a CSR for the first time. 2245 bool CSRFirstUse = false; 2246 if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) 2247 if (!MRI->isPhysRegUsed(CSR)) 2248 CSRFirstUse = true; 2249 2250 // When NewVRegs is not empty, we may have made decisions such as evicting 2251 // a virtual register, go with the earlier decisions and use the physical 2252 // register. 2253 if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) { 2254 unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, 2255 CostPerUseLimit, NewVRegs); 2256 if (CSRReg || !NewVRegs.empty()) 2257 // Return now if we decide to use a CSR or create new vregs due to 2258 // pre-splitting. 2259 return CSRReg; 2260 } else 2261 return PhysReg; 2262 } 2263 2264 LiveRangeStage Stage = getStage(VirtReg); 2265 DEBUG(dbgs() << StageName[Stage] 2266 << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); 2267 2268 // Try to evict a less worthy live range, but only for ranges from the primary 2269 // queue. The RS_Split ranges already failed to do this, and they should not 2270 // get a second chance until they have been split. 2271 if (Stage != RS_Split) 2272 if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) 2273 return PhysReg; 2274 2275 assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); 2276 2277 // The first time we see a live range, don't try to split or spill. 2278 // Wait until the second time, when all smaller ranges have been allocated. 2279 // This gives a better picture of the interference to split around. 2280 if (Stage < RS_Split) { 2281 setStage(VirtReg, RS_Split); 2282 DEBUG(dbgs() << "wait for second round\n"); 2283 NewVRegs.push_back(VirtReg.reg); 2284 return 0; 2285 } 2286 2287 // If we couldn't allocate a register from spilling, there is probably some 2288 // invalid inline assembly. The base class wil report it. 2289 if (Stage >= RS_Done || !VirtReg.isSpillable()) 2290 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, 2291 Depth); 2292 2293 // Try splitting VirtReg or interferences. 2294 unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); 2295 if (PhysReg || !NewVRegs.empty()) 2296 return PhysReg; 2297 2298 // Finally spill VirtReg itself. 2299 NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); 2300 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 2301 spiller().spill(LRE); 2302 setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); 2303 2304 if (VerifyEnabled) 2305 MF->verify(this, "After spilling"); 2306 2307 // The live virtual register requesting allocation was spilled, so tell 2308 // the caller not to allocate anything during this round. 2309 return 0; 2310 } 2311 2312 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { 2313 DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" 2314 << "********** Function: " << mf.getName() << '\n'); 2315 2316 MF = &mf; 2317 TRI = MF->getTarget().getRegisterInfo(); 2318 TII = MF->getTarget().getInstrInfo(); 2319 RCI.runOnMachineFunction(mf); 2320 if (VerifyEnabled) 2321 MF->verify(this, "Before greedy register allocator"); 2322 2323 RegAllocBase::init(getAnalysis<VirtRegMap>(), 2324 getAnalysis<LiveIntervals>(), 2325 getAnalysis<LiveRegMatrix>()); 2326 Indexes = &getAnalysis<SlotIndexes>(); 2327 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 2328 DomTree = &getAnalysis<MachineDominatorTree>(); 2329 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); 2330 Loops = &getAnalysis<MachineLoopInfo>(); 2331 Bundles = &getAnalysis<EdgeBundles>(); 2332 SpillPlacer = &getAnalysis<SpillPlacement>(); 2333 DebugVars = &getAnalysis<LiveDebugVariables>(); 2334 2335 initializeCSRCost(); 2336 2337 calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); 2338 2339 DEBUG(LIS->dump()); 2340 2341 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); 2342 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI)); 2343 ExtraRegInfo.clear(); 2344 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 2345 NextCascade = 1; 2346 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); 2347 GlobalCand.resize(32); // This will grow as needed. 2348 2349 allocatePhysRegs(); 2350 releaseMemory(); 2351 return true; 2352 } 2353