1 //===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the RAGreedy function pass for register allocation in 11 // optimized builds. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AllocationOrder.h" 16 #include "InterferenceCache.h" 17 #include "LiveDebugVariables.h" 18 #include "RegAllocBase.h" 19 #include "SpillPlacement.h" 20 #include "Spiller.h" 21 #include "SplitKit.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/CodeGen/CalcSpillWeights.h" 25 #include "llvm/CodeGen/EdgeBundles.h" 26 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 27 #include "llvm/CodeGen/LiveRangeEdit.h" 28 #include "llvm/CodeGen/LiveRegMatrix.h" 29 #include "llvm/CodeGen/LiveStackAnalysis.h" 30 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 31 #include "llvm/CodeGen/MachineDominators.h" 32 #include "llvm/CodeGen/MachineFunctionPass.h" 33 #include "llvm/CodeGen/MachineLoopInfo.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/Passes.h" 36 #include "llvm/CodeGen/RegAllocRegistry.h" 37 #include "llvm/CodeGen/RegisterClassInfo.h" 38 #include "llvm/CodeGen/VirtRegMap.h" 39 #include "llvm/IR/LLVMContext.h" 40 #include "llvm/PassAnalysisSupport.h" 41 #include "llvm/Support/BranchProbability.h" 42 #include "llvm/Support/CommandLine.h" 43 #include "llvm/Support/Debug.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/Timer.h" 46 #include "llvm/Support/raw_ostream.h" 47 #include "llvm/Target/TargetInstrInfo.h" 48 #include "llvm/Target/TargetSubtargetInfo.h" 49 #include <queue> 50 51 using namespace llvm; 52 53 #define DEBUG_TYPE "regalloc" 54 55 STATISTIC(NumGlobalSplits, "Number of split global live ranges"); 56 STATISTIC(NumLocalSplits, "Number of split local live ranges"); 57 STATISTIC(NumEvicted, "Number of interferences evicted"); 58 59 static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode( 60 "split-spill-mode", cl::Hidden, 61 cl::desc("Spill mode for splitting live ranges"), 62 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), 63 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), 64 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), 65 clEnumValEnd), 66 cl::init(SplitEditor::SM_Speed)); 67 68 static cl::opt<unsigned> 69 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, 70 cl::desc("Last chance recoloring max depth"), 71 cl::init(5)); 72 73 static cl::opt<unsigned> LastChanceRecoloringMaxInterference( 74 "lcr-max-interf", cl::Hidden, 75 cl::desc("Last chance recoloring maximum number of considered" 76 " interference at a time"), 77 cl::init(8)); 78 79 static cl::opt<bool> 80 ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, 81 cl::desc("Exhaustive Search for registers bypassing the depth " 82 "and interference cutoffs of last chance recoloring")); 83 84 static cl::opt<bool> EnableLocalReassignment( 85 "enable-local-reassign", cl::Hidden, 86 cl::desc("Local reassignment can yield better allocation decisions, but " 87 "may be compile time intensive"), 88 cl::init(false)); 89 90 static cl::opt<bool> EnableDeferredSpilling( 91 "enable-deferred-spilling", cl::Hidden, 92 cl::desc("Instead of spilling a variable right away, defer the actual " 93 "code insertion to the end of the allocation. That way the " 94 "allocator might still find a suitable coloring for this " 95 "variable because of other evicted variables."), 96 cl::init(false)); 97 98 // FIXME: Find a good default for this flag and remove the flag. 99 static cl::opt<unsigned> 100 CSRFirstTimeCost("regalloc-csr-first-time-cost", 101 cl::desc("Cost for first time use of callee-saved register."), 102 cl::init(0), cl::Hidden); 103 104 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", 105 createGreedyRegisterAllocator); 106 107 namespace { 108 class RAGreedy : public MachineFunctionPass, 109 public RegAllocBase, 110 private LiveRangeEdit::Delegate { 111 // Convenient shortcuts. 112 typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue; 113 typedef SmallPtrSet<LiveInterval *, 4> SmallLISet; 114 typedef SmallSet<unsigned, 16> SmallVirtRegSet; 115 116 // context 117 MachineFunction *MF; 118 119 // Shortcuts to some useful interface. 120 const TargetInstrInfo *TII; 121 const TargetRegisterInfo *TRI; 122 RegisterClassInfo RCI; 123 124 // analyses 125 SlotIndexes *Indexes; 126 MachineBlockFrequencyInfo *MBFI; 127 MachineDominatorTree *DomTree; 128 MachineLoopInfo *Loops; 129 EdgeBundles *Bundles; 130 SpillPlacement *SpillPlacer; 131 LiveDebugVariables *DebugVars; 132 AliasAnalysis *AA; 133 134 // state 135 std::unique_ptr<Spiller> SpillerInstance; 136 PQueue Queue; 137 unsigned NextCascade; 138 139 // Live ranges pass through a number of stages as we try to allocate them. 140 // Some of the stages may also create new live ranges: 141 // 142 // - Region splitting. 143 // - Per-block splitting. 144 // - Local splitting. 145 // - Spilling. 146 // 147 // Ranges produced by one of the stages skip the previous stages when they are 148 // dequeued. This improves performance because we can skip interference checks 149 // that are unlikely to give any results. It also guarantees that the live 150 // range splitting algorithm terminates, something that is otherwise hard to 151 // ensure. 152 enum LiveRangeStage { 153 /// Newly created live range that has never been queued. 154 RS_New, 155 156 /// Only attempt assignment and eviction. Then requeue as RS_Split. 157 RS_Assign, 158 159 /// Attempt live range splitting if assignment is impossible. 160 RS_Split, 161 162 /// Attempt more aggressive live range splitting that is guaranteed to make 163 /// progress. This is used for split products that may not be making 164 /// progress. 165 RS_Split2, 166 167 /// Live range will be spilled. No more splitting will be attempted. 168 RS_Spill, 169 170 171 /// Live range is in memory. Because of other evictions, it might get moved 172 /// in a register in the end. 173 RS_Memory, 174 175 /// There is nothing more we can do to this live range. Abort compilation 176 /// if it can't be assigned. 177 RS_Done 178 }; 179 180 // Enum CutOffStage to keep a track whether the register allocation failed 181 // because of the cutoffs encountered in last chance recoloring. 182 // Note: This is used as bitmask. New value should be next power of 2. 183 enum CutOffStage { 184 // No cutoffs encountered 185 CO_None = 0, 186 187 // lcr-max-depth cutoff encountered 188 CO_Depth = 1, 189 190 // lcr-max-interf cutoff encountered 191 CO_Interf = 2 192 }; 193 194 uint8_t CutOffInfo; 195 196 #ifndef NDEBUG 197 static const char *const StageName[]; 198 #endif 199 200 // RegInfo - Keep additional information about each live range. 201 struct RegInfo { 202 LiveRangeStage Stage; 203 204 // Cascade - Eviction loop prevention. See canEvictInterference(). 205 unsigned Cascade; 206 207 RegInfo() : Stage(RS_New), Cascade(0) {} 208 }; 209 210 IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; 211 212 LiveRangeStage getStage(const LiveInterval &VirtReg) const { 213 return ExtraRegInfo[VirtReg.reg].Stage; 214 } 215 216 void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { 217 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 218 ExtraRegInfo[VirtReg.reg].Stage = Stage; 219 } 220 221 template<typename Iterator> 222 void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { 223 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 224 for (;Begin != End; ++Begin) { 225 unsigned Reg = *Begin; 226 if (ExtraRegInfo[Reg].Stage == RS_New) 227 ExtraRegInfo[Reg].Stage = NewStage; 228 } 229 } 230 231 /// Cost of evicting interference. 232 struct EvictionCost { 233 unsigned BrokenHints; ///< Total number of broken hints. 234 float MaxWeight; ///< Maximum spill weight evicted. 235 236 EvictionCost(): BrokenHints(0), MaxWeight(0) {} 237 238 bool isMax() const { return BrokenHints == ~0u; } 239 240 void setMax() { BrokenHints = ~0u; } 241 242 void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } 243 244 bool operator<(const EvictionCost &O) const { 245 return std::tie(BrokenHints, MaxWeight) < 246 std::tie(O.BrokenHints, O.MaxWeight); 247 } 248 }; 249 250 // splitting state. 251 std::unique_ptr<SplitAnalysis> SA; 252 std::unique_ptr<SplitEditor> SE; 253 254 /// Cached per-block interference maps 255 InterferenceCache IntfCache; 256 257 /// All basic blocks where the current register has uses. 258 SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; 259 260 /// Global live range splitting candidate info. 261 struct GlobalSplitCandidate { 262 // Register intended for assignment, or 0. 263 unsigned PhysReg; 264 265 // SplitKit interval index for this candidate. 266 unsigned IntvIdx; 267 268 // Interference for PhysReg. 269 InterferenceCache::Cursor Intf; 270 271 // Bundles where this candidate should be live. 272 BitVector LiveBundles; 273 SmallVector<unsigned, 8> ActiveBlocks; 274 275 void reset(InterferenceCache &Cache, unsigned Reg) { 276 PhysReg = Reg; 277 IntvIdx = 0; 278 Intf.setPhysReg(Cache, Reg); 279 LiveBundles.clear(); 280 ActiveBlocks.clear(); 281 } 282 283 // Set B[i] = C for every live bundle where B[i] was NoCand. 284 unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { 285 unsigned Count = 0; 286 for (int i = LiveBundles.find_first(); i >= 0; 287 i = LiveBundles.find_next(i)) 288 if (B[i] == NoCand) { 289 B[i] = C; 290 Count++; 291 } 292 return Count; 293 } 294 }; 295 296 /// Candidate info for each PhysReg in AllocationOrder. 297 /// This vector never shrinks, but grows to the size of the largest register 298 /// class. 299 SmallVector<GlobalSplitCandidate, 32> GlobalCand; 300 301 enum : unsigned { NoCand = ~0u }; 302 303 /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to 304 /// NoCand which indicates the stack interval. 305 SmallVector<unsigned, 32> BundleCand; 306 307 /// Callee-save register cost, calculated once per machine function. 308 BlockFrequency CSRCost; 309 310 /// Run or not the local reassignment heuristic. This information is 311 /// obtained from the TargetSubtargetInfo. 312 bool EnableLocalReassign; 313 314 /// Set of broken hints that may be reconciled later because of eviction. 315 SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; 316 317 public: 318 RAGreedy(); 319 320 /// Return the pass name. 321 const char* getPassName() const override { 322 return "Greedy Register Allocator"; 323 } 324 325 /// RAGreedy analysis usage. 326 void getAnalysisUsage(AnalysisUsage &AU) const override; 327 void releaseMemory() override; 328 Spiller &spiller() override { return *SpillerInstance; } 329 void enqueue(LiveInterval *LI) override; 330 LiveInterval *dequeue() override; 331 unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override; 332 void aboutToRemoveInterval(LiveInterval &) override; 333 334 /// Perform register allocation. 335 bool runOnMachineFunction(MachineFunction &mf) override; 336 337 MachineFunctionProperties getRequiredProperties() const override { 338 return MachineFunctionProperties().set( 339 MachineFunctionProperties::Property::NoPHIs); 340 } 341 342 static char ID; 343 344 private: 345 unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &, 346 SmallVirtRegSet &, unsigned = 0); 347 348 bool LRE_CanEraseVirtReg(unsigned) override; 349 void LRE_WillShrinkVirtReg(unsigned) override; 350 void LRE_DidCloneVirtReg(unsigned, unsigned) override; 351 void enqueue(PQueue &CurQueue, LiveInterval *LI); 352 LiveInterval *dequeue(PQueue &CurQueue); 353 354 BlockFrequency calcSpillCost(); 355 bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); 356 void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); 357 void growRegion(GlobalSplitCandidate &Cand); 358 BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); 359 bool calcCompactRegion(GlobalSplitCandidate&); 360 void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); 361 void calcGapWeights(unsigned, SmallVectorImpl<float>&); 362 unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); 363 bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); 364 bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); 365 void evictInterference(LiveInterval&, unsigned, 366 SmallVectorImpl<unsigned>&); 367 bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, 368 SmallLISet &RecoloringCandidates, 369 const SmallVirtRegSet &FixedRegisters); 370 371 unsigned tryAssign(LiveInterval&, AllocationOrder&, 372 SmallVectorImpl<unsigned>&); 373 unsigned tryEvict(LiveInterval&, AllocationOrder&, 374 SmallVectorImpl<unsigned>&, unsigned = ~0u); 375 unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, 376 SmallVectorImpl<unsigned>&); 377 /// Calculate cost of region splitting. 378 unsigned calculateRegionSplitCost(LiveInterval &VirtReg, 379 AllocationOrder &Order, 380 BlockFrequency &BestCost, 381 unsigned &NumCands, bool IgnoreCSR); 382 /// Perform region splitting. 383 unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, 384 bool HasCompact, 385 SmallVectorImpl<unsigned> &NewVRegs); 386 /// Check other options before using a callee-saved register for the first 387 /// time. 388 unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, 389 unsigned PhysReg, unsigned &CostPerUseLimit, 390 SmallVectorImpl<unsigned> &NewVRegs); 391 void initializeCSRCost(); 392 unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, 393 SmallVectorImpl<unsigned>&); 394 unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, 395 SmallVectorImpl<unsigned>&); 396 unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, 397 SmallVectorImpl<unsigned>&); 398 unsigned trySplit(LiveInterval&, AllocationOrder&, 399 SmallVectorImpl<unsigned>&); 400 unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, 401 SmallVectorImpl<unsigned> &, 402 SmallVirtRegSet &, unsigned); 403 bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, 404 SmallVirtRegSet &, unsigned); 405 void tryHintRecoloring(LiveInterval &); 406 void tryHintsRecoloring(); 407 408 /// Model the information carried by one end of a copy. 409 struct HintInfo { 410 /// The frequency of the copy. 411 BlockFrequency Freq; 412 /// The virtual register or physical register. 413 unsigned Reg; 414 /// Its currently assigned register. 415 /// In case of a physical register Reg == PhysReg. 416 unsigned PhysReg; 417 HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) 418 : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} 419 }; 420 typedef SmallVector<HintInfo, 4> HintsInfo; 421 BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); 422 void collectHintInfo(unsigned, HintsInfo &); 423 424 bool isUnusedCalleeSavedReg(unsigned PhysReg) const; 425 }; 426 } // end anonymous namespace 427 428 char RAGreedy::ID = 0; 429 430 #ifndef NDEBUG 431 const char *const RAGreedy::StageName[] = { 432 "RS_New", 433 "RS_Assign", 434 "RS_Split", 435 "RS_Split2", 436 "RS_Spill", 437 "RS_Memory", 438 "RS_Done" 439 }; 440 #endif 441 442 // Hysteresis to use when comparing floats. 443 // This helps stabilize decisions based on float comparisons. 444 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 445 446 447 FunctionPass* llvm::createGreedyRegisterAllocator() { 448 return new RAGreedy(); 449 } 450 451 RAGreedy::RAGreedy(): MachineFunctionPass(ID) { 452 initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); 453 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 454 initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); 455 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 456 initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); 457 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 458 initializeLiveStacksPass(*PassRegistry::getPassRegistry()); 459 initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); 460 initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); 461 initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); 462 initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); 463 initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); 464 initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); 465 } 466 467 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { 468 AU.setPreservesCFG(); 469 AU.addRequired<MachineBlockFrequencyInfo>(); 470 AU.addPreserved<MachineBlockFrequencyInfo>(); 471 AU.addRequired<AAResultsWrapperPass>(); 472 AU.addPreserved<AAResultsWrapperPass>(); 473 AU.addRequired<LiveIntervals>(); 474 AU.addPreserved<LiveIntervals>(); 475 AU.addRequired<SlotIndexes>(); 476 AU.addPreserved<SlotIndexes>(); 477 AU.addRequired<LiveDebugVariables>(); 478 AU.addPreserved<LiveDebugVariables>(); 479 AU.addRequired<LiveStacks>(); 480 AU.addPreserved<LiveStacks>(); 481 AU.addRequired<MachineDominatorTree>(); 482 AU.addPreserved<MachineDominatorTree>(); 483 AU.addRequired<MachineLoopInfo>(); 484 AU.addPreserved<MachineLoopInfo>(); 485 AU.addRequired<VirtRegMap>(); 486 AU.addPreserved<VirtRegMap>(); 487 AU.addRequired<LiveRegMatrix>(); 488 AU.addPreserved<LiveRegMatrix>(); 489 AU.addRequired<EdgeBundles>(); 490 AU.addRequired<SpillPlacement>(); 491 MachineFunctionPass::getAnalysisUsage(AU); 492 } 493 494 495 //===----------------------------------------------------------------------===// 496 // LiveRangeEdit delegate methods 497 //===----------------------------------------------------------------------===// 498 499 bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { 500 if (VRM->hasPhys(VirtReg)) { 501 LiveInterval &LI = LIS->getInterval(VirtReg); 502 Matrix->unassign(LI); 503 aboutToRemoveInterval(LI); 504 return true; 505 } 506 // Unassigned virtreg is probably in the priority queue. 507 // RegAllocBase will erase it after dequeueing. 508 return false; 509 } 510 511 void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { 512 if (!VRM->hasPhys(VirtReg)) 513 return; 514 515 // Register is assigned, put it back on the queue for reassignment. 516 LiveInterval &LI = LIS->getInterval(VirtReg); 517 Matrix->unassign(LI); 518 enqueue(&LI); 519 } 520 521 void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { 522 // Cloning a register we haven't even heard about yet? Just ignore it. 523 if (!ExtraRegInfo.inBounds(Old)) 524 return; 525 526 // LRE may clone a virtual register because dead code elimination causes it to 527 // be split into connected components. The new components are much smaller 528 // than the original, so they should get a new chance at being assigned. 529 // same stage as the parent. 530 ExtraRegInfo[Old].Stage = RS_Assign; 531 ExtraRegInfo.grow(New); 532 ExtraRegInfo[New] = ExtraRegInfo[Old]; 533 } 534 535 void RAGreedy::releaseMemory() { 536 SpillerInstance.reset(); 537 ExtraRegInfo.clear(); 538 GlobalCand.clear(); 539 } 540 541 void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } 542 543 void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { 544 // Prioritize live ranges by size, assigning larger ranges first. 545 // The queue holds (size, reg) pairs. 546 const unsigned Size = LI->getSize(); 547 const unsigned Reg = LI->reg; 548 assert(TargetRegisterInfo::isVirtualRegister(Reg) && 549 "Can only enqueue virtual registers"); 550 unsigned Prio; 551 552 ExtraRegInfo.grow(Reg); 553 if (ExtraRegInfo[Reg].Stage == RS_New) 554 ExtraRegInfo[Reg].Stage = RS_Assign; 555 556 if (ExtraRegInfo[Reg].Stage == RS_Split) { 557 // Unsplit ranges that couldn't be allocated immediately are deferred until 558 // everything else has been allocated. 559 Prio = Size; 560 } else if (ExtraRegInfo[Reg].Stage == RS_Memory) { 561 // Memory operand should be considered last. 562 // Change the priority such that Memory operand are assigned in 563 // the reverse order that they came in. 564 // TODO: Make this a member variable and probably do something about hints. 565 static unsigned MemOp = 0; 566 Prio = MemOp++; 567 } else { 568 // Giant live ranges fall back to the global assignment heuristic, which 569 // prevents excessive spilling in pathological cases. 570 bool ReverseLocal = TRI->reverseLocalAssignment(); 571 const TargetRegisterClass &RC = *MRI->getRegClass(Reg); 572 bool ForceGlobal = !ReverseLocal && 573 (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs()); 574 575 if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && 576 LIS->intervalIsInOneMBB(*LI)) { 577 // Allocate original local ranges in linear instruction order. Since they 578 // are singly defined, this produces optimal coloring in the absence of 579 // global interference and other constraints. 580 if (!ReverseLocal) 581 Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); 582 else { 583 // Allocating bottom up may allow many short LRGs to be assigned first 584 // to one of the cheap registers. This could be much faster for very 585 // large blocks on targets with many physical registers. 586 Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex()); 587 } 588 Prio |= RC.AllocationPriority << 24; 589 } else { 590 // Allocate global and split ranges in long->short order. Long ranges that 591 // don't fit should be spilled (or split) ASAP so they don't create 592 // interference. Mark a bit to prioritize global above local ranges. 593 Prio = (1u << 29) + Size; 594 } 595 // Mark a higher bit to prioritize global and local above RS_Split. 596 Prio |= (1u << 31); 597 598 // Boost ranges that have a physical register hint. 599 if (VRM->hasKnownPreference(Reg)) 600 Prio |= (1u << 30); 601 } 602 // The virtual register number is a tie breaker for same-sized ranges. 603 // Give lower vreg numbers higher priority to assign them first. 604 CurQueue.push(std::make_pair(Prio, ~Reg)); 605 } 606 607 LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } 608 609 LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { 610 if (CurQueue.empty()) 611 return nullptr; 612 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); 613 CurQueue.pop(); 614 return LI; 615 } 616 617 618 //===----------------------------------------------------------------------===// 619 // Direct Assignment 620 //===----------------------------------------------------------------------===// 621 622 /// tryAssign - Try to assign VirtReg to an available register. 623 unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, 624 AllocationOrder &Order, 625 SmallVectorImpl<unsigned> &NewVRegs) { 626 Order.rewind(); 627 unsigned PhysReg; 628 while ((PhysReg = Order.next())) 629 if (!Matrix->checkInterference(VirtReg, PhysReg)) 630 break; 631 if (!PhysReg || Order.isHint()) 632 return PhysReg; 633 634 // PhysReg is available, but there may be a better choice. 635 636 // If we missed a simple hint, try to cheaply evict interference from the 637 // preferred register. 638 if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) 639 if (Order.isHint(Hint)) { 640 DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); 641 EvictionCost MaxCost; 642 MaxCost.setBrokenHints(1); 643 if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { 644 evictInterference(VirtReg, Hint, NewVRegs); 645 return Hint; 646 } 647 } 648 649 // Try to evict interference from a cheaper alternative. 650 unsigned Cost = TRI->getCostPerUse(PhysReg); 651 652 // Most registers have 0 additional cost. 653 if (!Cost) 654 return PhysReg; 655 656 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost 657 << '\n'); 658 unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); 659 return CheapReg ? CheapReg : PhysReg; 660 } 661 662 663 //===----------------------------------------------------------------------===// 664 // Interference eviction 665 //===----------------------------------------------------------------------===// 666 667 unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { 668 AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); 669 unsigned PhysReg; 670 while ((PhysReg = Order.next())) { 671 if (PhysReg == PrevReg) 672 continue; 673 674 MCRegUnitIterator Units(PhysReg, TRI); 675 for (; Units.isValid(); ++Units) { 676 // Instantiate a "subquery", not to be confused with the Queries array. 677 LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]); 678 if (subQ.checkInterference()) 679 break; 680 } 681 // If no units have interference, break out with the current PhysReg. 682 if (!Units.isValid()) 683 break; 684 } 685 if (PhysReg) 686 DEBUG(dbgs() << "can reassign: " << VirtReg << " from " 687 << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) 688 << '\n'); 689 return PhysReg; 690 } 691 692 /// shouldEvict - determine if A should evict the assigned live range B. The 693 /// eviction policy defined by this function together with the allocation order 694 /// defined by enqueue() decides which registers ultimately end up being split 695 /// and spilled. 696 /// 697 /// Cascade numbers are used to prevent infinite loops if this function is a 698 /// cyclic relation. 699 /// 700 /// @param A The live range to be assigned. 701 /// @param IsHint True when A is about to be assigned to its preferred 702 /// register. 703 /// @param B The live range to be evicted. 704 /// @param BreaksHint True when B is already assigned to its preferred register. 705 bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, 706 LiveInterval &B, bool BreaksHint) { 707 bool CanSplit = getStage(B) < RS_Spill; 708 709 // Be fairly aggressive about following hints as long as the evictee can be 710 // split. 711 if (CanSplit && IsHint && !BreaksHint) 712 return true; 713 714 if (A.weight > B.weight) { 715 DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); 716 return true; 717 } 718 return false; 719 } 720 721 /// canEvictInterference - Return true if all interferences between VirtReg and 722 /// PhysReg can be evicted. 723 /// 724 /// @param VirtReg Live range that is about to be assigned. 725 /// @param PhysReg Desired register for assignment. 726 /// @param IsHint True when PhysReg is VirtReg's preferred register. 727 /// @param MaxCost Only look for cheaper candidates and update with new cost 728 /// when returning true. 729 /// @returns True when interference can be evicted cheaper than MaxCost. 730 bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, 731 bool IsHint, EvictionCost &MaxCost) { 732 // It is only possible to evict virtual register interference. 733 if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) 734 return false; 735 736 bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); 737 738 // Find VirtReg's cascade number. This will be unassigned if VirtReg was never 739 // involved in an eviction before. If a cascade number was assigned, deny 740 // evicting anything with the same or a newer cascade number. This prevents 741 // infinite eviction loops. 742 // 743 // This works out so a register without a cascade number is allowed to evict 744 // anything, and it can be evicted by anything. 745 unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; 746 if (!Cascade) 747 Cascade = NextCascade; 748 749 EvictionCost Cost; 750 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 751 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 752 // If there is 10 or more interferences, chances are one is heavier. 753 if (Q.collectInterferingVRegs(10) >= 10) 754 return false; 755 756 // Check if any interfering live range is heavier than MaxWeight. 757 for (unsigned i = Q.interferingVRegs().size(); i; --i) { 758 LiveInterval *Intf = Q.interferingVRegs()[i - 1]; 759 assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && 760 "Only expecting virtual register interference from query"); 761 // Never evict spill products. They cannot split or spill. 762 if (getStage(*Intf) == RS_Done) 763 return false; 764 // Once a live range becomes small enough, it is urgent that we find a 765 // register for it. This is indicated by an infinite spill weight. These 766 // urgent live ranges get to evict almost anything. 767 // 768 // Also allow urgent evictions of unspillable ranges from a strictly 769 // larger allocation order. 770 bool Urgent = !VirtReg.isSpillable() && 771 (Intf->isSpillable() || 772 RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < 773 RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); 774 // Only evict older cascades or live ranges without a cascade. 775 unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; 776 if (Cascade <= IntfCascade) { 777 if (!Urgent) 778 return false; 779 // We permit breaking cascades for urgent evictions. It should be the 780 // last resort, though, so make it really expensive. 781 Cost.BrokenHints += 10; 782 } 783 // Would this break a satisfied hint? 784 bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); 785 // Update eviction cost. 786 Cost.BrokenHints += BreaksHint; 787 Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); 788 // Abort if this would be too expensive. 789 if (!(Cost < MaxCost)) 790 return false; 791 if (Urgent) 792 continue; 793 // Apply the eviction policy for non-urgent evictions. 794 if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) 795 return false; 796 // If !MaxCost.isMax(), then we're just looking for a cheap register. 797 // Evicting another local live range in this case could lead to suboptimal 798 // coloring. 799 if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && 800 (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) { 801 return false; 802 } 803 } 804 } 805 MaxCost = Cost; 806 return true; 807 } 808 809 /// evictInterference - Evict any interferring registers that prevent VirtReg 810 /// from being assigned to Physreg. This assumes that canEvictInterference 811 /// returned true. 812 void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, 813 SmallVectorImpl<unsigned> &NewVRegs) { 814 // Make sure that VirtReg has a cascade number, and assign that cascade 815 // number to every evicted register. These live ranges than then only be 816 // evicted by a newer cascade, preventing infinite loops. 817 unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; 818 if (!Cascade) 819 Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; 820 821 DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) 822 << " interference: Cascade " << Cascade << '\n'); 823 824 // Collect all interfering virtregs first. 825 SmallVector<LiveInterval*, 8> Intfs; 826 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 827 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 828 assert(Q.seenAllInterferences() && "Didn't check all interfererences."); 829 ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); 830 Intfs.append(IVR.begin(), IVR.end()); 831 } 832 833 // Evict them second. This will invalidate the queries. 834 for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { 835 LiveInterval *Intf = Intfs[i]; 836 // The same VirtReg may be present in multiple RegUnits. Skip duplicates. 837 if (!VRM->hasPhys(Intf->reg)) 838 continue; 839 Matrix->unassign(*Intf); 840 assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || 841 VirtReg.isSpillable() < Intf->isSpillable()) && 842 "Cannot decrease cascade number, illegal eviction"); 843 ExtraRegInfo[Intf->reg].Cascade = Cascade; 844 ++NumEvicted; 845 NewVRegs.push_back(Intf->reg); 846 } 847 } 848 849 /// Returns true if the given \p PhysReg is a callee saved register and has not 850 /// been used for allocation yet. 851 bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const { 852 unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg); 853 if (CSR == 0) 854 return false; 855 856 return !Matrix->isPhysRegUsed(PhysReg); 857 } 858 859 /// tryEvict - Try to evict all interferences for a physreg. 860 /// @param VirtReg Currently unassigned virtual register. 861 /// @param Order Physregs to try. 862 /// @return Physreg to assign VirtReg, or 0. 863 unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, 864 AllocationOrder &Order, 865 SmallVectorImpl<unsigned> &NewVRegs, 866 unsigned CostPerUseLimit) { 867 NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); 868 869 // Keep track of the cheapest interference seen so far. 870 EvictionCost BestCost; 871 BestCost.setMax(); 872 unsigned BestPhys = 0; 873 unsigned OrderLimit = Order.getOrder().size(); 874 875 // When we are just looking for a reduced cost per use, don't break any 876 // hints, and only evict smaller spill weights. 877 if (CostPerUseLimit < ~0u) { 878 BestCost.BrokenHints = 0; 879 BestCost.MaxWeight = VirtReg.weight; 880 881 // Check of any registers in RC are below CostPerUseLimit. 882 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); 883 unsigned MinCost = RegClassInfo.getMinCost(RC); 884 if (MinCost >= CostPerUseLimit) { 885 DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost 886 << ", no cheaper registers to be found.\n"); 887 return 0; 888 } 889 890 // It is normal for register classes to have a long tail of registers with 891 // the same cost. We don't need to look at them if they're too expensive. 892 if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { 893 OrderLimit = RegClassInfo.getLastCostChange(RC); 894 DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); 895 } 896 } 897 898 Order.rewind(); 899 while (unsigned PhysReg = Order.next(OrderLimit)) { 900 if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) 901 continue; 902 // The first use of a callee-saved register in a function has cost 1. 903 // Don't start using a CSR when the CostPerUseLimit is low. 904 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) { 905 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " 906 << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) 907 << '\n'); 908 continue; 909 } 910 911 if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) 912 continue; 913 914 // Best so far. 915 BestPhys = PhysReg; 916 917 // Stop if the hint can be used. 918 if (Order.isHint()) 919 break; 920 } 921 922 if (!BestPhys) 923 return 0; 924 925 evictInterference(VirtReg, BestPhys, NewVRegs); 926 return BestPhys; 927 } 928 929 930 //===----------------------------------------------------------------------===// 931 // Region Splitting 932 //===----------------------------------------------------------------------===// 933 934 /// addSplitConstraints - Fill out the SplitConstraints vector based on the 935 /// interference pattern in Physreg and its aliases. Add the constraints to 936 /// SpillPlacement and return the static cost of this split in Cost, assuming 937 /// that all preferences in SplitConstraints are met. 938 /// Return false if there are no bundles with positive bias. 939 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, 940 BlockFrequency &Cost) { 941 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 942 943 // Reset interference dependent info. 944 SplitConstraints.resize(UseBlocks.size()); 945 BlockFrequency StaticCost = 0; 946 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 947 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 948 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 949 950 BC.Number = BI.MBB->getNumber(); 951 Intf.moveToBlock(BC.Number); 952 BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 953 BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 954 BC.ChangesValue = BI.FirstDef.isValid(); 955 956 if (!Intf.hasInterference()) 957 continue; 958 959 // Number of spill code instructions to insert. 960 unsigned Ins = 0; 961 962 // Interference for the live-in value. 963 if (BI.LiveIn) { 964 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) { 965 BC.Entry = SpillPlacement::MustSpill; 966 ++Ins; 967 } else if (Intf.first() < BI.FirstInstr) { 968 BC.Entry = SpillPlacement::PrefSpill; 969 ++Ins; 970 } else if (Intf.first() < BI.LastInstr) { 971 ++Ins; 972 } 973 } 974 975 // Interference for the live-out value. 976 if (BI.LiveOut) { 977 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) { 978 BC.Exit = SpillPlacement::MustSpill; 979 ++Ins; 980 } else if (Intf.last() > BI.LastInstr) { 981 BC.Exit = SpillPlacement::PrefSpill; 982 ++Ins; 983 } else if (Intf.last() > BI.FirstInstr) { 984 ++Ins; 985 } 986 } 987 988 // Accumulate the total frequency of inserted spill code. 989 while (Ins--) 990 StaticCost += SpillPlacer->getBlockFrequency(BC.Number); 991 } 992 Cost = StaticCost; 993 994 // Add constraints for use-blocks. Note that these are the only constraints 995 // that may add a positive bias, it is downhill from here. 996 SpillPlacer->addConstraints(SplitConstraints); 997 return SpillPlacer->scanActiveBundles(); 998 } 999 1000 1001 /// addThroughConstraints - Add constraints and links to SpillPlacer from the 1002 /// live-through blocks in Blocks. 1003 void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, 1004 ArrayRef<unsigned> Blocks) { 1005 const unsigned GroupSize = 8; 1006 SpillPlacement::BlockConstraint BCS[GroupSize]; 1007 unsigned TBS[GroupSize]; 1008 unsigned B = 0, T = 0; 1009 1010 for (unsigned i = 0; i != Blocks.size(); ++i) { 1011 unsigned Number = Blocks[i]; 1012 Intf.moveToBlock(Number); 1013 1014 if (!Intf.hasInterference()) { 1015 assert(T < GroupSize && "Array overflow"); 1016 TBS[T] = Number; 1017 if (++T == GroupSize) { 1018 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 1019 T = 0; 1020 } 1021 continue; 1022 } 1023 1024 assert(B < GroupSize && "Array overflow"); 1025 BCS[B].Number = Number; 1026 1027 // Interference for the live-in value. 1028 if (Intf.first() <= Indexes->getMBBStartIdx(Number)) 1029 BCS[B].Entry = SpillPlacement::MustSpill; 1030 else 1031 BCS[B].Entry = SpillPlacement::PrefSpill; 1032 1033 // Interference for the live-out value. 1034 if (Intf.last() >= SA->getLastSplitPoint(Number)) 1035 BCS[B].Exit = SpillPlacement::MustSpill; 1036 else 1037 BCS[B].Exit = SpillPlacement::PrefSpill; 1038 1039 if (++B == GroupSize) { 1040 SpillPlacer->addConstraints(makeArrayRef(BCS, B)); 1041 B = 0; 1042 } 1043 } 1044 1045 SpillPlacer->addConstraints(makeArrayRef(BCS, B)); 1046 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 1047 } 1048 1049 void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { 1050 // Keep track of through blocks that have not been added to SpillPlacer. 1051 BitVector Todo = SA->getThroughBlocks(); 1052 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; 1053 unsigned AddedTo = 0; 1054 #ifndef NDEBUG 1055 unsigned Visited = 0; 1056 #endif 1057 1058 for (;;) { 1059 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); 1060 // Find new through blocks in the periphery of PrefRegBundles. 1061 for (int i = 0, e = NewBundles.size(); i != e; ++i) { 1062 unsigned Bundle = NewBundles[i]; 1063 // Look at all blocks connected to Bundle in the full graph. 1064 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); 1065 for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); 1066 I != E; ++I) { 1067 unsigned Block = *I; 1068 if (!Todo.test(Block)) 1069 continue; 1070 Todo.reset(Block); 1071 // This is a new through block. Add it to SpillPlacer later. 1072 ActiveBlocks.push_back(Block); 1073 #ifndef NDEBUG 1074 ++Visited; 1075 #endif 1076 } 1077 } 1078 // Any new blocks to add? 1079 if (ActiveBlocks.size() == AddedTo) 1080 break; 1081 1082 // Compute through constraints from the interference, or assume that all 1083 // through blocks prefer spilling when forming compact regions. 1084 auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); 1085 if (Cand.PhysReg) 1086 addThroughConstraints(Cand.Intf, NewBlocks); 1087 else 1088 // Provide a strong negative bias on through blocks to prevent unwanted 1089 // liveness on loop backedges. 1090 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); 1091 AddedTo = ActiveBlocks.size(); 1092 1093 // Perhaps iterating can enable more bundles? 1094 SpillPlacer->iterate(); 1095 } 1096 DEBUG(dbgs() << ", v=" << Visited); 1097 } 1098 1099 /// calcCompactRegion - Compute the set of edge bundles that should be live 1100 /// when splitting the current live range into compact regions. Compact 1101 /// regions can be computed without looking at interference. They are the 1102 /// regions formed by removing all the live-through blocks from the live range. 1103 /// 1104 /// Returns false if the current live range is already compact, or if the 1105 /// compact regions would form single block regions anyway. 1106 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { 1107 // Without any through blocks, the live range is already compact. 1108 if (!SA->getNumThroughBlocks()) 1109 return false; 1110 1111 // Compact regions don't correspond to any physreg. 1112 Cand.reset(IntfCache, 0); 1113 1114 DEBUG(dbgs() << "Compact region bundles"); 1115 1116 // Use the spill placer to determine the live bundles. GrowRegion pretends 1117 // that all the through blocks have interference when PhysReg is unset. 1118 SpillPlacer->prepare(Cand.LiveBundles); 1119 1120 // The static split cost will be zero since Cand.Intf reports no interference. 1121 BlockFrequency Cost; 1122 if (!addSplitConstraints(Cand.Intf, Cost)) { 1123 DEBUG(dbgs() << ", none.\n"); 1124 return false; 1125 } 1126 1127 growRegion(Cand); 1128 SpillPlacer->finish(); 1129 1130 if (!Cand.LiveBundles.any()) { 1131 DEBUG(dbgs() << ", none.\n"); 1132 return false; 1133 } 1134 1135 DEBUG({ 1136 for (int i = Cand.LiveBundles.find_first(); i>=0; 1137 i = Cand.LiveBundles.find_next(i)) 1138 dbgs() << " EB#" << i; 1139 dbgs() << ".\n"; 1140 }); 1141 return true; 1142 } 1143 1144 /// calcSpillCost - Compute how expensive it would be to split the live range in 1145 /// SA around all use blocks instead of forming bundle regions. 1146 BlockFrequency RAGreedy::calcSpillCost() { 1147 BlockFrequency Cost = 0; 1148 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1149 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1150 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1151 unsigned Number = BI.MBB->getNumber(); 1152 // We normally only need one spill instruction - a load or a store. 1153 Cost += SpillPlacer->getBlockFrequency(Number); 1154 1155 // Unless the value is redefined in the block. 1156 if (BI.LiveIn && BI.LiveOut && BI.FirstDef) 1157 Cost += SpillPlacer->getBlockFrequency(Number); 1158 } 1159 return Cost; 1160 } 1161 1162 /// calcGlobalSplitCost - Return the global split cost of following the split 1163 /// pattern in LiveBundles. This cost should be added to the local cost of the 1164 /// interference pattern in SplitConstraints. 1165 /// 1166 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { 1167 BlockFrequency GlobalCost = 0; 1168 const BitVector &LiveBundles = Cand.LiveBundles; 1169 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1170 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1171 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1172 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 1173 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; 1174 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; 1175 unsigned Ins = 0; 1176 1177 if (BI.LiveIn) 1178 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); 1179 if (BI.LiveOut) 1180 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); 1181 while (Ins--) 1182 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); 1183 } 1184 1185 for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { 1186 unsigned Number = Cand.ActiveBlocks[i]; 1187 bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; 1188 bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; 1189 if (!RegIn && !RegOut) 1190 continue; 1191 if (RegIn && RegOut) { 1192 // We need double spill code if this block has interference. 1193 Cand.Intf.moveToBlock(Number); 1194 if (Cand.Intf.hasInterference()) { 1195 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1196 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1197 } 1198 continue; 1199 } 1200 // live-in / stack-out or stack-in live-out. 1201 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1202 } 1203 return GlobalCost; 1204 } 1205 1206 /// splitAroundRegion - Split the current live range around the regions 1207 /// determined by BundleCand and GlobalCand. 1208 /// 1209 /// Before calling this function, GlobalCand and BundleCand must be initialized 1210 /// so each bundle is assigned to a valid candidate, or NoCand for the 1211 /// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor 1212 /// objects must be initialized for the current live range, and intervals 1213 /// created for the used candidates. 1214 /// 1215 /// @param LREdit The LiveRangeEdit object handling the current split. 1216 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value 1217 /// must appear in this list. 1218 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, 1219 ArrayRef<unsigned> UsedCands) { 1220 // These are the intervals created for new global ranges. We may create more 1221 // intervals for local ranges. 1222 const unsigned NumGlobalIntvs = LREdit.size(); 1223 DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); 1224 assert(NumGlobalIntvs && "No global intervals configured"); 1225 1226 // Isolate even single instructions when dealing with a proper sub-class. 1227 // That guarantees register class inflation for the stack interval because it 1228 // is all copies. 1229 unsigned Reg = SA->getParent().reg; 1230 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1231 1232 // First handle all the blocks with uses. 1233 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1234 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1235 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1236 unsigned Number = BI.MBB->getNumber(); 1237 unsigned IntvIn = 0, IntvOut = 0; 1238 SlotIndex IntfIn, IntfOut; 1239 if (BI.LiveIn) { 1240 unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; 1241 if (CandIn != NoCand) { 1242 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1243 IntvIn = Cand.IntvIdx; 1244 Cand.Intf.moveToBlock(Number); 1245 IntfIn = Cand.Intf.first(); 1246 } 1247 } 1248 if (BI.LiveOut) { 1249 unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; 1250 if (CandOut != NoCand) { 1251 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1252 IntvOut = Cand.IntvIdx; 1253 Cand.Intf.moveToBlock(Number); 1254 IntfOut = Cand.Intf.last(); 1255 } 1256 } 1257 1258 // Create separate intervals for isolated blocks with multiple uses. 1259 if (!IntvIn && !IntvOut) { 1260 DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); 1261 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1262 SE->splitSingleBlock(BI); 1263 continue; 1264 } 1265 1266 if (IntvIn && IntvOut) 1267 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1268 else if (IntvIn) 1269 SE->splitRegInBlock(BI, IntvIn, IntfIn); 1270 else 1271 SE->splitRegOutBlock(BI, IntvOut, IntfOut); 1272 } 1273 1274 // Handle live-through blocks. The relevant live-through blocks are stored in 1275 // the ActiveBlocks list with each candidate. We need to filter out 1276 // duplicates. 1277 BitVector Todo = SA->getThroughBlocks(); 1278 for (unsigned c = 0; c != UsedCands.size(); ++c) { 1279 ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; 1280 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 1281 unsigned Number = Blocks[i]; 1282 if (!Todo.test(Number)) 1283 continue; 1284 Todo.reset(Number); 1285 1286 unsigned IntvIn = 0, IntvOut = 0; 1287 SlotIndex IntfIn, IntfOut; 1288 1289 unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; 1290 if (CandIn != NoCand) { 1291 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1292 IntvIn = Cand.IntvIdx; 1293 Cand.Intf.moveToBlock(Number); 1294 IntfIn = Cand.Intf.first(); 1295 } 1296 1297 unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; 1298 if (CandOut != NoCand) { 1299 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1300 IntvOut = Cand.IntvIdx; 1301 Cand.Intf.moveToBlock(Number); 1302 IntfOut = Cand.Intf.last(); 1303 } 1304 if (!IntvIn && !IntvOut) 1305 continue; 1306 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1307 } 1308 } 1309 1310 ++NumGlobalSplits; 1311 1312 SmallVector<unsigned, 8> IntvMap; 1313 SE->finish(&IntvMap); 1314 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1315 1316 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1317 unsigned OrigBlocks = SA->getNumLiveBlocks(); 1318 1319 // Sort out the new intervals created by splitting. We get four kinds: 1320 // - Remainder intervals should not be split again. 1321 // - Candidate intervals can be assigned to Cand.PhysReg. 1322 // - Block-local splits are candidates for local splitting. 1323 // - DCE leftovers should go back on the queue. 1324 for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { 1325 LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); 1326 1327 // Ignore old intervals from DCE. 1328 if (getStage(Reg) != RS_New) 1329 continue; 1330 1331 // Remainder interval. Don't try splitting again, spill if it doesn't 1332 // allocate. 1333 if (IntvMap[i] == 0) { 1334 setStage(Reg, RS_Spill); 1335 continue; 1336 } 1337 1338 // Global intervals. Allow repeated splitting as long as the number of live 1339 // blocks is strictly decreasing. 1340 if (IntvMap[i] < NumGlobalIntvs) { 1341 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { 1342 DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks 1343 << " blocks as original.\n"); 1344 // Don't allow repeated splitting as a safe guard against looping. 1345 setStage(Reg, RS_Split2); 1346 } 1347 continue; 1348 } 1349 1350 // Other intervals are treated as new. This includes local intervals created 1351 // for blocks with multiple uses, and anything created by DCE. 1352 } 1353 1354 if (VerifyEnabled) 1355 MF->verify(this, "After splitting live range around region"); 1356 } 1357 1358 unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1359 SmallVectorImpl<unsigned> &NewVRegs) { 1360 unsigned NumCands = 0; 1361 BlockFrequency BestCost; 1362 1363 // Check if we can split this live range around a compact region. 1364 bool HasCompact = calcCompactRegion(GlobalCand.front()); 1365 if (HasCompact) { 1366 // Yes, keep GlobalCand[0] as the compact region candidate. 1367 NumCands = 1; 1368 BestCost = BlockFrequency::getMaxFrequency(); 1369 } else { 1370 // No benefit from the compact region, our fallback will be per-block 1371 // splitting. Make sure we find a solution that is cheaper than spilling. 1372 BestCost = calcSpillCost(); 1373 DEBUG(dbgs() << "Cost of isolating all blocks = "; 1374 MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); 1375 } 1376 1377 unsigned BestCand = 1378 calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, 1379 false/*IgnoreCSR*/); 1380 1381 // No solutions found, fall back to single block splitting. 1382 if (!HasCompact && BestCand == NoCand) 1383 return 0; 1384 1385 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); 1386 } 1387 1388 unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, 1389 AllocationOrder &Order, 1390 BlockFrequency &BestCost, 1391 unsigned &NumCands, 1392 bool IgnoreCSR) { 1393 unsigned BestCand = NoCand; 1394 Order.rewind(); 1395 while (unsigned PhysReg = Order.next()) { 1396 if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg)) 1397 continue; 1398 1399 // Discard bad candidates before we run out of interference cache cursors. 1400 // This will only affect register classes with a lot of registers (>32). 1401 if (NumCands == IntfCache.getMaxCursors()) { 1402 unsigned WorstCount = ~0u; 1403 unsigned Worst = 0; 1404 for (unsigned i = 0; i != NumCands; ++i) { 1405 if (i == BestCand || !GlobalCand[i].PhysReg) 1406 continue; 1407 unsigned Count = GlobalCand[i].LiveBundles.count(); 1408 if (Count < WorstCount) { 1409 Worst = i; 1410 WorstCount = Count; 1411 } 1412 } 1413 --NumCands; 1414 GlobalCand[Worst] = GlobalCand[NumCands]; 1415 if (BestCand == NumCands) 1416 BestCand = Worst; 1417 } 1418 1419 if (GlobalCand.size() <= NumCands) 1420 GlobalCand.resize(NumCands+1); 1421 GlobalSplitCandidate &Cand = GlobalCand[NumCands]; 1422 Cand.reset(IntfCache, PhysReg); 1423 1424 SpillPlacer->prepare(Cand.LiveBundles); 1425 BlockFrequency Cost; 1426 if (!addSplitConstraints(Cand.Intf, Cost)) { 1427 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); 1428 continue; 1429 } 1430 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = "; 1431 MBFI->printBlockFreq(dbgs(), Cost)); 1432 if (Cost >= BestCost) { 1433 DEBUG({ 1434 if (BestCand == NoCand) 1435 dbgs() << " worse than no bundles\n"; 1436 else 1437 dbgs() << " worse than " 1438 << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; 1439 }); 1440 continue; 1441 } 1442 growRegion(Cand); 1443 1444 SpillPlacer->finish(); 1445 1446 // No live bundles, defer to splitSingleBlocks(). 1447 if (!Cand.LiveBundles.any()) { 1448 DEBUG(dbgs() << " no bundles.\n"); 1449 continue; 1450 } 1451 1452 Cost += calcGlobalSplitCost(Cand); 1453 DEBUG({ 1454 dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) 1455 << " with bundles"; 1456 for (int i = Cand.LiveBundles.find_first(); i>=0; 1457 i = Cand.LiveBundles.find_next(i)) 1458 dbgs() << " EB#" << i; 1459 dbgs() << ".\n"; 1460 }); 1461 if (Cost < BestCost) { 1462 BestCand = NumCands; 1463 BestCost = Cost; 1464 } 1465 ++NumCands; 1466 } 1467 return BestCand; 1468 } 1469 1470 unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, 1471 bool HasCompact, 1472 SmallVectorImpl<unsigned> &NewVRegs) { 1473 SmallVector<unsigned, 8> UsedCands; 1474 // Prepare split editor. 1475 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1476 SE->reset(LREdit, SplitSpillMode); 1477 1478 // Assign all edge bundles to the preferred candidate, or NoCand. 1479 BundleCand.assign(Bundles->getNumBundles(), NoCand); 1480 1481 // Assign bundles for the best candidate region. 1482 if (BestCand != NoCand) { 1483 GlobalSplitCandidate &Cand = GlobalCand[BestCand]; 1484 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { 1485 UsedCands.push_back(BestCand); 1486 Cand.IntvIdx = SE->openIntv(); 1487 DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " 1488 << B << " bundles, intv " << Cand.IntvIdx << ".\n"); 1489 (void)B; 1490 } 1491 } 1492 1493 // Assign bundles for the compact region. 1494 if (HasCompact) { 1495 GlobalSplitCandidate &Cand = GlobalCand.front(); 1496 assert(!Cand.PhysReg && "Compact region has no physreg"); 1497 if (unsigned B = Cand.getBundles(BundleCand, 0)) { 1498 UsedCands.push_back(0); 1499 Cand.IntvIdx = SE->openIntv(); 1500 DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " 1501 << Cand.IntvIdx << ".\n"); 1502 (void)B; 1503 } 1504 } 1505 1506 splitAroundRegion(LREdit, UsedCands); 1507 return 0; 1508 } 1509 1510 1511 //===----------------------------------------------------------------------===// 1512 // Per-Block Splitting 1513 //===----------------------------------------------------------------------===// 1514 1515 /// tryBlockSplit - Split a global live range around every block with uses. This 1516 /// creates a lot of local live ranges, that will be split by tryLocalSplit if 1517 /// they don't allocate. 1518 unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1519 SmallVectorImpl<unsigned> &NewVRegs) { 1520 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); 1521 unsigned Reg = VirtReg.reg; 1522 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1523 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1524 SE->reset(LREdit, SplitSpillMode); 1525 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1526 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1527 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1528 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1529 SE->splitSingleBlock(BI); 1530 } 1531 // No blocks were split. 1532 if (LREdit.empty()) 1533 return 0; 1534 1535 // We did split for some blocks. 1536 SmallVector<unsigned, 8> IntvMap; 1537 SE->finish(&IntvMap); 1538 1539 // Tell LiveDebugVariables about the new ranges. 1540 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1541 1542 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1543 1544 // Sort out the new intervals created by splitting. The remainder interval 1545 // goes straight to spilling, the new local ranges get to stay RS_New. 1546 for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { 1547 LiveInterval &LI = LIS->getInterval(LREdit.get(i)); 1548 if (getStage(LI) == RS_New && IntvMap[i] == 0) 1549 setStage(LI, RS_Spill); 1550 } 1551 1552 if (VerifyEnabled) 1553 MF->verify(this, "After splitting live range around basic blocks"); 1554 return 0; 1555 } 1556 1557 1558 //===----------------------------------------------------------------------===// 1559 // Per-Instruction Splitting 1560 //===----------------------------------------------------------------------===// 1561 1562 /// Get the number of allocatable registers that match the constraints of \p Reg 1563 /// on \p MI and that are also in \p SuperRC. 1564 static unsigned getNumAllocatableRegsForConstraints( 1565 const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, 1566 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, 1567 const RegisterClassInfo &RCI) { 1568 assert(SuperRC && "Invalid register class"); 1569 1570 const TargetRegisterClass *ConstrainedRC = 1571 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, 1572 /* ExploreBundle */ true); 1573 if (!ConstrainedRC) 1574 return 0; 1575 return RCI.getNumAllocatableRegs(ConstrainedRC); 1576 } 1577 1578 /// tryInstructionSplit - Split a live range around individual instructions. 1579 /// This is normally not worthwhile since the spiller is doing essentially the 1580 /// same thing. However, when the live range is in a constrained register 1581 /// class, it may help to insert copies such that parts of the live range can 1582 /// be moved to a larger register class. 1583 /// 1584 /// This is similar to spilling to a larger register class. 1585 unsigned 1586 RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1587 SmallVectorImpl<unsigned> &NewVRegs) { 1588 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); 1589 // There is no point to this if there are no larger sub-classes. 1590 if (!RegClassInfo.isProperSubClass(CurRC)) 1591 return 0; 1592 1593 // Always enable split spill mode, since we're effectively spilling to a 1594 // register. 1595 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1596 SE->reset(LREdit, SplitEditor::SM_Size); 1597 1598 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1599 if (Uses.size() <= 1) 1600 return 0; 1601 1602 DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); 1603 1604 const TargetRegisterClass *SuperRC = 1605 TRI->getLargestLegalSuperClass(CurRC, *MF); 1606 unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); 1607 // Split around every non-copy instruction if this split will relax 1608 // the constraints on the virtual register. 1609 // Otherwise, splitting just inserts uncoalescable copies that do not help 1610 // the allocation. 1611 for (unsigned i = 0; i != Uses.size(); ++i) { 1612 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) 1613 if (MI->isFullCopy() || 1614 SuperRCNumAllocatableRegs == 1615 getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, 1616 TRI, RCI)) { 1617 DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); 1618 continue; 1619 } 1620 SE->openIntv(); 1621 SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); 1622 SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); 1623 SE->useIntv(SegStart, SegStop); 1624 } 1625 1626 if (LREdit.empty()) { 1627 DEBUG(dbgs() << "All uses were copies.\n"); 1628 return 0; 1629 } 1630 1631 SmallVector<unsigned, 8> IntvMap; 1632 SE->finish(&IntvMap); 1633 DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); 1634 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1635 1636 // Assign all new registers to RS_Spill. This was the last chance. 1637 setStage(LREdit.begin(), LREdit.end(), RS_Spill); 1638 return 0; 1639 } 1640 1641 1642 //===----------------------------------------------------------------------===// 1643 // Local Splitting 1644 //===----------------------------------------------------------------------===// 1645 1646 1647 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted 1648 /// in order to use PhysReg between two entries in SA->UseSlots. 1649 /// 1650 /// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1]. 1651 /// 1652 void RAGreedy::calcGapWeights(unsigned PhysReg, 1653 SmallVectorImpl<float> &GapWeight) { 1654 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1655 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1656 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1657 const unsigned NumGaps = Uses.size()-1; 1658 1659 // Start and end points for the interference check. 1660 SlotIndex StartIdx = 1661 BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; 1662 SlotIndex StopIdx = 1663 BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; 1664 1665 GapWeight.assign(NumGaps, 0.0f); 1666 1667 // Add interference from each overlapping register. 1668 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1669 if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units) 1670 .checkInterference()) 1671 continue; 1672 1673 // We know that VirtReg is a continuous interval from FirstInstr to 1674 // LastInstr, so we don't need InterferenceQuery. 1675 // 1676 // Interference that overlaps an instruction is counted in both gaps 1677 // surrounding the instruction. The exception is interference before 1678 // StartIdx and after StopIdx. 1679 // 1680 LiveIntervalUnion::SegmentIter IntI = 1681 Matrix->getLiveUnions()[*Units] .find(StartIdx); 1682 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { 1683 // Skip the gaps before IntI. 1684 while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) 1685 if (++Gap == NumGaps) 1686 break; 1687 if (Gap == NumGaps) 1688 break; 1689 1690 // Update the gaps covered by IntI. 1691 const float weight = IntI.value()->weight; 1692 for (; Gap != NumGaps; ++Gap) { 1693 GapWeight[Gap] = std::max(GapWeight[Gap], weight); 1694 if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) 1695 break; 1696 } 1697 if (Gap == NumGaps) 1698 break; 1699 } 1700 } 1701 1702 // Add fixed interference. 1703 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1704 const LiveRange &LR = LIS->getRegUnit(*Units); 1705 LiveRange::const_iterator I = LR.find(StartIdx); 1706 LiveRange::const_iterator E = LR.end(); 1707 1708 // Same loop as above. Mark any overlapped gaps as HUGE_VALF. 1709 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { 1710 while (Uses[Gap+1].getBoundaryIndex() < I->start) 1711 if (++Gap == NumGaps) 1712 break; 1713 if (Gap == NumGaps) 1714 break; 1715 1716 for (; Gap != NumGaps; ++Gap) { 1717 GapWeight[Gap] = llvm::huge_valf; 1718 if (Uses[Gap+1].getBaseIndex() >= I->end) 1719 break; 1720 } 1721 if (Gap == NumGaps) 1722 break; 1723 } 1724 } 1725 } 1726 1727 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only 1728 /// basic block. 1729 /// 1730 unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1731 SmallVectorImpl<unsigned> &NewVRegs) { 1732 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1733 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1734 1735 // Note that it is possible to have an interval that is live-in or live-out 1736 // while only covering a single block - A phi-def can use undef values from 1737 // predecessors, and the block could be a single-block loop. 1738 // We don't bother doing anything clever about such a case, we simply assume 1739 // that the interval is continuous from FirstInstr to LastInstr. We should 1740 // make sure that we don't do anything illegal to such an interval, though. 1741 1742 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1743 if (Uses.size() <= 2) 1744 return 0; 1745 const unsigned NumGaps = Uses.size()-1; 1746 1747 DEBUG({ 1748 dbgs() << "tryLocalSplit: "; 1749 for (unsigned i = 0, e = Uses.size(); i != e; ++i) 1750 dbgs() << ' ' << Uses[i]; 1751 dbgs() << '\n'; 1752 }); 1753 1754 // If VirtReg is live across any register mask operands, compute a list of 1755 // gaps with register masks. 1756 SmallVector<unsigned, 8> RegMaskGaps; 1757 if (Matrix->checkRegMaskInterference(VirtReg)) { 1758 // Get regmask slots for the whole block. 1759 ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); 1760 DEBUG(dbgs() << RMS.size() << " regmasks in block:"); 1761 // Constrain to VirtReg's live range. 1762 unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), 1763 Uses.front().getRegSlot()) - RMS.begin(); 1764 unsigned re = RMS.size(); 1765 for (unsigned i = 0; i != NumGaps && ri != re; ++i) { 1766 // Look for Uses[i] <= RMS <= Uses[i+1]. 1767 assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); 1768 if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) 1769 continue; 1770 // Skip a regmask on the same instruction as the last use. It doesn't 1771 // overlap the live range. 1772 if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) 1773 break; 1774 DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); 1775 RegMaskGaps.push_back(i); 1776 // Advance ri to the next gap. A regmask on one of the uses counts in 1777 // both gaps. 1778 while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) 1779 ++ri; 1780 } 1781 DEBUG(dbgs() << '\n'); 1782 } 1783 1784 // Since we allow local split results to be split again, there is a risk of 1785 // creating infinite loops. It is tempting to require that the new live 1786 // ranges have less instructions than the original. That would guarantee 1787 // convergence, but it is too strict. A live range with 3 instructions can be 1788 // split 2+3 (including the COPY), and we want to allow that. 1789 // 1790 // Instead we use these rules: 1791 // 1792 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the 1793 // noop split, of course). 1794 // 2. Require progress be made for ranges with getStage() == RS_Split2. All 1795 // the new ranges must have fewer instructions than before the split. 1796 // 3. New ranges with the same number of instructions are marked RS_Split2, 1797 // smaller ranges are marked RS_New. 1798 // 1799 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent 1800 // excessive splitting and infinite loops. 1801 // 1802 bool ProgressRequired = getStage(VirtReg) >= RS_Split2; 1803 1804 // Best split candidate. 1805 unsigned BestBefore = NumGaps; 1806 unsigned BestAfter = 0; 1807 float BestDiff = 0; 1808 1809 const float blockFreq = 1810 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * 1811 (1.0f / MBFI->getEntryFreq()); 1812 SmallVector<float, 8> GapWeight; 1813 1814 Order.rewind(); 1815 while (unsigned PhysReg = Order.next()) { 1816 // Keep track of the largest spill weight that would need to be evicted in 1817 // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. 1818 calcGapWeights(PhysReg, GapWeight); 1819 1820 // Remove any gaps with regmask clobbers. 1821 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) 1822 for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) 1823 GapWeight[RegMaskGaps[i]] = llvm::huge_valf; 1824 1825 // Try to find the best sequence of gaps to close. 1826 // The new spill weight must be larger than any gap interference. 1827 1828 // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. 1829 unsigned SplitBefore = 0, SplitAfter = 1; 1830 1831 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). 1832 // It is the spill weight that needs to be evicted. 1833 float MaxGap = GapWeight[0]; 1834 1835 for (;;) { 1836 // Live before/after split? 1837 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; 1838 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; 1839 1840 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' ' 1841 << Uses[SplitBefore] << '-' << Uses[SplitAfter] 1842 << " i=" << MaxGap); 1843 1844 // Stop before the interval gets so big we wouldn't be making progress. 1845 if (!LiveBefore && !LiveAfter) { 1846 DEBUG(dbgs() << " all\n"); 1847 break; 1848 } 1849 // Should the interval be extended or shrunk? 1850 bool Shrink = true; 1851 1852 // How many gaps would the new range have? 1853 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; 1854 1855 // Legally, without causing looping? 1856 bool Legal = !ProgressRequired || NewGaps < NumGaps; 1857 1858 if (Legal && MaxGap < llvm::huge_valf) { 1859 // Estimate the new spill weight. Each instruction reads or writes the 1860 // register. Conservatively assume there are no read-modify-write 1861 // instructions. 1862 // 1863 // Try to guess the size of the new interval. 1864 const float EstWeight = normalizeSpillWeight( 1865 blockFreq * (NewGaps + 1), 1866 Uses[SplitBefore].distance(Uses[SplitAfter]) + 1867 (LiveBefore + LiveAfter) * SlotIndex::InstrDist, 1868 1); 1869 // Would this split be possible to allocate? 1870 // Never allocate all gaps, we wouldn't be making progress. 1871 DEBUG(dbgs() << " w=" << EstWeight); 1872 if (EstWeight * Hysteresis >= MaxGap) { 1873 Shrink = false; 1874 float Diff = EstWeight - MaxGap; 1875 if (Diff > BestDiff) { 1876 DEBUG(dbgs() << " (best)"); 1877 BestDiff = Hysteresis * Diff; 1878 BestBefore = SplitBefore; 1879 BestAfter = SplitAfter; 1880 } 1881 } 1882 } 1883 1884 // Try to shrink. 1885 if (Shrink) { 1886 if (++SplitBefore < SplitAfter) { 1887 DEBUG(dbgs() << " shrink\n"); 1888 // Recompute the max when necessary. 1889 if (GapWeight[SplitBefore - 1] >= MaxGap) { 1890 MaxGap = GapWeight[SplitBefore]; 1891 for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i) 1892 MaxGap = std::max(MaxGap, GapWeight[i]); 1893 } 1894 continue; 1895 } 1896 MaxGap = 0; 1897 } 1898 1899 // Try to extend the interval. 1900 if (SplitAfter >= NumGaps) { 1901 DEBUG(dbgs() << " end\n"); 1902 break; 1903 } 1904 1905 DEBUG(dbgs() << " extend\n"); 1906 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); 1907 } 1908 } 1909 1910 // Didn't find any candidates? 1911 if (BestBefore == NumGaps) 1912 return 0; 1913 1914 DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] 1915 << '-' << Uses[BestAfter] << ", " << BestDiff 1916 << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); 1917 1918 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1919 SE->reset(LREdit); 1920 1921 SE->openIntv(); 1922 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); 1923 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); 1924 SE->useIntv(SegStart, SegStop); 1925 SmallVector<unsigned, 8> IntvMap; 1926 SE->finish(&IntvMap); 1927 DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); 1928 1929 // If the new range has the same number of instructions as before, mark it as 1930 // RS_Split2 so the next split will be forced to make progress. Otherwise, 1931 // leave the new intervals as RS_New so they can compete. 1932 bool LiveBefore = BestBefore != 0 || BI.LiveIn; 1933 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; 1934 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; 1935 if (NewGaps >= NumGaps) { 1936 DEBUG(dbgs() << "Tagging non-progress ranges: "); 1937 assert(!ProgressRequired && "Didn't make progress when it was required."); 1938 for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) 1939 if (IntvMap[i] == 1) { 1940 setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); 1941 DEBUG(dbgs() << PrintReg(LREdit.get(i))); 1942 } 1943 DEBUG(dbgs() << '\n'); 1944 } 1945 ++NumLocalSplits; 1946 1947 return 0; 1948 } 1949 1950 //===----------------------------------------------------------------------===// 1951 // Live Range Splitting 1952 //===----------------------------------------------------------------------===// 1953 1954 /// trySplit - Try to split VirtReg or one of its interferences, making it 1955 /// assignable. 1956 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. 1957 unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, 1958 SmallVectorImpl<unsigned>&NewVRegs) { 1959 // Ranges must be Split2 or less. 1960 if (getStage(VirtReg) >= RS_Spill) 1961 return 0; 1962 1963 // Local intervals are handled separately. 1964 if (LIS->intervalIsInOneMBB(VirtReg)) { 1965 NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); 1966 SA->analyze(&VirtReg); 1967 unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); 1968 if (PhysReg || !NewVRegs.empty()) 1969 return PhysReg; 1970 return tryInstructionSplit(VirtReg, Order, NewVRegs); 1971 } 1972 1973 NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); 1974 1975 SA->analyze(&VirtReg); 1976 1977 // FIXME: SplitAnalysis may repair broken live ranges coming from the 1978 // coalescer. That may cause the range to become allocatable which means that 1979 // tryRegionSplit won't be making progress. This check should be replaced with 1980 // an assertion when the coalescer is fixed. 1981 if (SA->didRepairRange()) { 1982 // VirtReg has changed, so all cached queries are invalid. 1983 Matrix->invalidateVirtRegs(); 1984 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) 1985 return PhysReg; 1986 } 1987 1988 // First try to split around a region spanning multiple blocks. RS_Split2 1989 // ranges already made dubious progress with region splitting, so they go 1990 // straight to single block splitting. 1991 if (getStage(VirtReg) < RS_Split2) { 1992 unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); 1993 if (PhysReg || !NewVRegs.empty()) 1994 return PhysReg; 1995 } 1996 1997 // Then isolate blocks. 1998 return tryBlockSplit(VirtReg, Order, NewVRegs); 1999 } 2000 2001 //===----------------------------------------------------------------------===// 2002 // Last Chance Recoloring 2003 //===----------------------------------------------------------------------===// 2004 2005 /// mayRecolorAllInterferences - Check if the virtual registers that 2006 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be 2007 /// recolored to free \p PhysReg. 2008 /// When true is returned, \p RecoloringCandidates has been augmented with all 2009 /// the live intervals that need to be recolored in order to free \p PhysReg 2010 /// for \p VirtReg. 2011 /// \p FixedRegisters contains all the virtual registers that cannot be 2012 /// recolored. 2013 bool 2014 RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, 2015 SmallLISet &RecoloringCandidates, 2016 const SmallVirtRegSet &FixedRegisters) { 2017 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); 2018 2019 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 2020 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 2021 // If there is LastChanceRecoloringMaxInterference or more interferences, 2022 // chances are one would not be recolorable. 2023 if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= 2024 LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { 2025 DEBUG(dbgs() << "Early abort: too many interferences.\n"); 2026 CutOffInfo |= CO_Interf; 2027 return false; 2028 } 2029 for (unsigned i = Q.interferingVRegs().size(); i; --i) { 2030 LiveInterval *Intf = Q.interferingVRegs()[i - 1]; 2031 // If Intf is done and sit on the same register class as VirtReg, 2032 // it would not be recolorable as it is in the same state as VirtReg. 2033 if ((getStage(*Intf) == RS_Done && 2034 MRI->getRegClass(Intf->reg) == CurRC) || 2035 FixedRegisters.count(Intf->reg)) { 2036 DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n"); 2037 return false; 2038 } 2039 RecoloringCandidates.insert(Intf); 2040 } 2041 } 2042 return true; 2043 } 2044 2045 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring 2046 /// its interferences. 2047 /// Last chance recoloring chooses a color for \p VirtReg and recolors every 2048 /// virtual register that was using it. The recoloring process may recursively 2049 /// use the last chance recoloring. Therefore, when a virtual register has been 2050 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot 2051 /// be last-chance-recolored again during this recoloring "session". 2052 /// E.g., 2053 /// Let 2054 /// vA can use {R1, R2 } 2055 /// vB can use { R2, R3} 2056 /// vC can use {R1 } 2057 /// Where vA, vB, and vC cannot be split anymore (they are reloads for 2058 /// instance) and they all interfere. 2059 /// 2060 /// vA is assigned R1 2061 /// vB is assigned R2 2062 /// vC tries to evict vA but vA is already done. 2063 /// Regular register allocation fails. 2064 /// 2065 /// Last chance recoloring kicks in: 2066 /// vC does as if vA was evicted => vC uses R1. 2067 /// vC is marked as fixed. 2068 /// vA needs to find a color. 2069 /// None are available. 2070 /// vA cannot evict vC: vC is a fixed virtual register now. 2071 /// vA does as if vB was evicted => vA uses R2. 2072 /// vB needs to find a color. 2073 /// R3 is available. 2074 /// Recoloring => vC = R1, vA = R2, vB = R3 2075 /// 2076 /// \p Order defines the preferred allocation order for \p VirtReg. 2077 /// \p NewRegs will contain any new virtual register that have been created 2078 /// (split, spill) during the process and that must be assigned. 2079 /// \p FixedRegisters contains all the virtual registers that cannot be 2080 /// recolored. 2081 /// \p Depth gives the current depth of the last chance recoloring. 2082 /// \return a physical register that can be used for VirtReg or ~0u if none 2083 /// exists. 2084 unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, 2085 AllocationOrder &Order, 2086 SmallVectorImpl<unsigned> &NewVRegs, 2087 SmallVirtRegSet &FixedRegisters, 2088 unsigned Depth) { 2089 DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); 2090 // Ranges must be Done. 2091 assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && 2092 "Last chance recoloring should really be last chance"); 2093 // Set the max depth to LastChanceRecoloringMaxDepth. 2094 // We may want to reconsider that if we end up with a too large search space 2095 // for target with hundreds of registers. 2096 // Indeed, in that case we may want to cut the search space earlier. 2097 if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { 2098 DEBUG(dbgs() << "Abort because max depth has been reached.\n"); 2099 CutOffInfo |= CO_Depth; 2100 return ~0u; 2101 } 2102 2103 // Set of Live intervals that will need to be recolored. 2104 SmallLISet RecoloringCandidates; 2105 // Record the original mapping virtual register to physical register in case 2106 // the recoloring fails. 2107 DenseMap<unsigned, unsigned> VirtRegToPhysReg; 2108 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in 2109 // this recoloring "session". 2110 FixedRegisters.insert(VirtReg.reg); 2111 2112 Order.rewind(); 2113 while (unsigned PhysReg = Order.next()) { 2114 DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " 2115 << PrintReg(PhysReg, TRI) << '\n'); 2116 RecoloringCandidates.clear(); 2117 VirtRegToPhysReg.clear(); 2118 2119 // It is only possible to recolor virtual register interference. 2120 if (Matrix->checkInterference(VirtReg, PhysReg) > 2121 LiveRegMatrix::IK_VirtReg) { 2122 DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n"); 2123 2124 continue; 2125 } 2126 2127 // Early give up on this PhysReg if it is obvious we cannot recolor all 2128 // the interferences. 2129 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, 2130 FixedRegisters)) { 2131 DEBUG(dbgs() << "Some inteferences cannot be recolored.\n"); 2132 continue; 2133 } 2134 2135 // RecoloringCandidates contains all the virtual registers that interfer 2136 // with VirtReg on PhysReg (or one of its aliases). 2137 // Enqueue them for recoloring and perform the actual recoloring. 2138 PQueue RecoloringQueue; 2139 for (SmallLISet::iterator It = RecoloringCandidates.begin(), 2140 EndIt = RecoloringCandidates.end(); 2141 It != EndIt; ++It) { 2142 unsigned ItVirtReg = (*It)->reg; 2143 enqueue(RecoloringQueue, *It); 2144 assert(VRM->hasPhys(ItVirtReg) && 2145 "Interferences are supposed to be with allocated vairables"); 2146 2147 // Record the current allocation. 2148 VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); 2149 // unset the related struct. 2150 Matrix->unassign(**It); 2151 } 2152 2153 // Do as if VirtReg was assigned to PhysReg so that the underlying 2154 // recoloring has the right information about the interferes and 2155 // available colors. 2156 Matrix->assign(VirtReg, PhysReg); 2157 2158 // Save the current recoloring state. 2159 // If we cannot recolor all the interferences, we will have to start again 2160 // at this point for the next physical register. 2161 SmallVirtRegSet SaveFixedRegisters(FixedRegisters); 2162 if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters, 2163 Depth)) { 2164 // Do not mess up with the global assignment process. 2165 // I.e., VirtReg must be unassigned. 2166 Matrix->unassign(VirtReg); 2167 return PhysReg; 2168 } 2169 2170 DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " 2171 << PrintReg(PhysReg, TRI) << '\n'); 2172 2173 // The recoloring attempt failed, undo the changes. 2174 FixedRegisters = SaveFixedRegisters; 2175 Matrix->unassign(VirtReg); 2176 2177 for (SmallLISet::iterator It = RecoloringCandidates.begin(), 2178 EndIt = RecoloringCandidates.end(); 2179 It != EndIt; ++It) { 2180 unsigned ItVirtReg = (*It)->reg; 2181 if (VRM->hasPhys(ItVirtReg)) 2182 Matrix->unassign(**It); 2183 unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg]; 2184 Matrix->assign(**It, ItPhysReg); 2185 } 2186 } 2187 2188 // Last chance recoloring did not worked either, give up. 2189 return ~0u; 2190 } 2191 2192 /// tryRecoloringCandidates - Try to assign a new color to every register 2193 /// in \RecoloringQueue. 2194 /// \p NewRegs will contain any new virtual register created during the 2195 /// recoloring process. 2196 /// \p FixedRegisters[in/out] contains all the registers that have been 2197 /// recolored. 2198 /// \return true if all virtual registers in RecoloringQueue were successfully 2199 /// recolored, false otherwise. 2200 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, 2201 SmallVectorImpl<unsigned> &NewVRegs, 2202 SmallVirtRegSet &FixedRegisters, 2203 unsigned Depth) { 2204 while (!RecoloringQueue.empty()) { 2205 LiveInterval *LI = dequeue(RecoloringQueue); 2206 DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); 2207 unsigned PhysReg; 2208 PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); 2209 if (PhysReg == ~0u || !PhysReg) 2210 return false; 2211 DEBUG(dbgs() << "Recoloring of " << *LI 2212 << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); 2213 Matrix->assign(*LI, PhysReg); 2214 FixedRegisters.insert(LI->reg); 2215 } 2216 return true; 2217 } 2218 2219 //===----------------------------------------------------------------------===// 2220 // Main Entry Point 2221 //===----------------------------------------------------------------------===// 2222 2223 unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, 2224 SmallVectorImpl<unsigned> &NewVRegs) { 2225 CutOffInfo = CO_None; 2226 LLVMContext &Ctx = MF->getFunction()->getContext(); 2227 SmallVirtRegSet FixedRegisters; 2228 unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); 2229 if (Reg == ~0U && (CutOffInfo != CO_None)) { 2230 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); 2231 if (CutOffEncountered == CO_Depth) 2232 Ctx.emitError("register allocation failed: maximum depth for recoloring " 2233 "reached. Use -fexhaustive-register-search to skip " 2234 "cutoffs"); 2235 else if (CutOffEncountered == CO_Interf) 2236 Ctx.emitError("register allocation failed: maximum interference for " 2237 "recoloring reached. Use -fexhaustive-register-search " 2238 "to skip cutoffs"); 2239 else if (CutOffEncountered == (CO_Depth | CO_Interf)) 2240 Ctx.emitError("register allocation failed: maximum interference and " 2241 "depth for recoloring reached. Use " 2242 "-fexhaustive-register-search to skip cutoffs"); 2243 } 2244 return Reg; 2245 } 2246 2247 /// Using a CSR for the first time has a cost because it causes push|pop 2248 /// to be added to prologue|epilogue. Splitting a cold section of the live 2249 /// range can have lower cost than using the CSR for the first time; 2250 /// Spilling a live range in the cold path can have lower cost than using 2251 /// the CSR for the first time. Returns the physical register if we decide 2252 /// to use the CSR; otherwise return 0. 2253 unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, 2254 AllocationOrder &Order, 2255 unsigned PhysReg, 2256 unsigned &CostPerUseLimit, 2257 SmallVectorImpl<unsigned> &NewVRegs) { 2258 if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { 2259 // We choose spill over using the CSR for the first time if the spill cost 2260 // is lower than CSRCost. 2261 SA->analyze(&VirtReg); 2262 if (calcSpillCost() >= CSRCost) 2263 return PhysReg; 2264 2265 // We are going to spill, set CostPerUseLimit to 1 to make sure that 2266 // we will not use a callee-saved register in tryEvict. 2267 CostPerUseLimit = 1; 2268 return 0; 2269 } 2270 if (getStage(VirtReg) < RS_Split) { 2271 // We choose pre-splitting over using the CSR for the first time if 2272 // the cost of splitting is lower than CSRCost. 2273 SA->analyze(&VirtReg); 2274 unsigned NumCands = 0; 2275 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. 2276 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, 2277 NumCands, true /*IgnoreCSR*/); 2278 if (BestCand == NoCand) 2279 // Use the CSR if we can't find a region split below CSRCost. 2280 return PhysReg; 2281 2282 // Perform the actual pre-splitting. 2283 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); 2284 return 0; 2285 } 2286 return PhysReg; 2287 } 2288 2289 void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { 2290 // Do not keep invalid information around. 2291 SetOfBrokenHints.remove(&LI); 2292 } 2293 2294 void RAGreedy::initializeCSRCost() { 2295 // We use the larger one out of the command-line option and the value report 2296 // by TRI. 2297 CSRCost = BlockFrequency( 2298 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); 2299 if (!CSRCost.getFrequency()) 2300 return; 2301 2302 // Raw cost is relative to Entry == 2^14; scale it appropriately. 2303 uint64_t ActualEntry = MBFI->getEntryFreq(); 2304 if (!ActualEntry) { 2305 CSRCost = 0; 2306 return; 2307 } 2308 uint64_t FixedEntry = 1 << 14; 2309 if (ActualEntry < FixedEntry) 2310 CSRCost *= BranchProbability(ActualEntry, FixedEntry); 2311 else if (ActualEntry <= UINT32_MAX) 2312 // Invert the fraction and divide. 2313 CSRCost /= BranchProbability(FixedEntry, ActualEntry); 2314 else 2315 // Can't use BranchProbability in general, since it takes 32-bit numbers. 2316 CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); 2317 } 2318 2319 /// \brief Collect the hint info for \p Reg. 2320 /// The results are stored into \p Out. 2321 /// \p Out is not cleared before being populated. 2322 void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { 2323 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { 2324 if (!Instr.isFullCopy()) 2325 continue; 2326 // Look for the other end of the copy. 2327 unsigned OtherReg = Instr.getOperand(0).getReg(); 2328 if (OtherReg == Reg) { 2329 OtherReg = Instr.getOperand(1).getReg(); 2330 if (OtherReg == Reg) 2331 continue; 2332 } 2333 // Get the current assignment. 2334 unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) 2335 ? OtherReg 2336 : VRM->getPhys(OtherReg); 2337 // Push the collected information. 2338 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, 2339 OtherPhysReg)); 2340 } 2341 } 2342 2343 /// \brief Using the given \p List, compute the cost of the broken hints if 2344 /// \p PhysReg was used. 2345 /// \return The cost of \p List for \p PhysReg. 2346 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, 2347 unsigned PhysReg) { 2348 BlockFrequency Cost = 0; 2349 for (const HintInfo &Info : List) { 2350 if (Info.PhysReg != PhysReg) 2351 Cost += Info.Freq; 2352 } 2353 return Cost; 2354 } 2355 2356 /// \brief Using the register assigned to \p VirtReg, try to recolor 2357 /// all the live ranges that are copy-related with \p VirtReg. 2358 /// The recoloring is then propagated to all the live-ranges that have 2359 /// been recolored and so on, until no more copies can be coalesced or 2360 /// it is not profitable. 2361 /// For a given live range, profitability is determined by the sum of the 2362 /// frequencies of the non-identity copies it would introduce with the old 2363 /// and new register. 2364 void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { 2365 // We have a broken hint, check if it is possible to fix it by 2366 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted 2367 // some register and PhysReg may be available for the other live-ranges. 2368 SmallSet<unsigned, 4> Visited; 2369 SmallVector<unsigned, 2> RecoloringCandidates; 2370 HintsInfo Info; 2371 unsigned Reg = VirtReg.reg; 2372 unsigned PhysReg = VRM->getPhys(Reg); 2373 // Start the recoloring algorithm from the input live-interval, then 2374 // it will propagate to the ones that are copy-related with it. 2375 Visited.insert(Reg); 2376 RecoloringCandidates.push_back(Reg); 2377 2378 DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '(' 2379 << PrintReg(PhysReg, TRI) << ")\n"); 2380 2381 do { 2382 Reg = RecoloringCandidates.pop_back_val(); 2383 2384 // We cannot recolor physcal register. 2385 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 2386 continue; 2387 2388 assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); 2389 2390 // Get the live interval mapped with this virtual register to be able 2391 // to check for the interference with the new color. 2392 LiveInterval &LI = LIS->getInterval(Reg); 2393 unsigned CurrPhys = VRM->getPhys(Reg); 2394 // Check that the new color matches the register class constraints and 2395 // that it is free for this live range. 2396 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || 2397 Matrix->checkInterference(LI, PhysReg))) 2398 continue; 2399 2400 DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI) 2401 << ") is recolorable.\n"); 2402 2403 // Gather the hint info. 2404 Info.clear(); 2405 collectHintInfo(Reg, Info); 2406 // Check if recoloring the live-range will increase the cost of the 2407 // non-identity copies. 2408 if (CurrPhys != PhysReg) { 2409 DEBUG(dbgs() << "Checking profitability:\n"); 2410 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); 2411 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); 2412 DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() 2413 << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n'); 2414 if (OldCopiesCost < NewCopiesCost) { 2415 DEBUG(dbgs() << "=> Not profitable.\n"); 2416 continue; 2417 } 2418 // At this point, the cost is either cheaper or equal. If it is 2419 // equal, we consider this is profitable because it may expose 2420 // more recoloring opportunities. 2421 DEBUG(dbgs() << "=> Profitable.\n"); 2422 // Recolor the live-range. 2423 Matrix->unassign(LI); 2424 Matrix->assign(LI, PhysReg); 2425 } 2426 // Push all copy-related live-ranges to keep reconciling the broken 2427 // hints. 2428 for (const HintInfo &HI : Info) { 2429 if (Visited.insert(HI.Reg).second) 2430 RecoloringCandidates.push_back(HI.Reg); 2431 } 2432 } while (!RecoloringCandidates.empty()); 2433 } 2434 2435 /// \brief Try to recolor broken hints. 2436 /// Broken hints may be repaired by recoloring when an evicted variable 2437 /// freed up a register for a larger live-range. 2438 /// Consider the following example: 2439 /// BB1: 2440 /// a = 2441 /// b = 2442 /// BB2: 2443 /// ... 2444 /// = b 2445 /// = a 2446 /// Let us assume b gets split: 2447 /// BB1: 2448 /// a = 2449 /// b = 2450 /// BB2: 2451 /// c = b 2452 /// ... 2453 /// d = c 2454 /// = d 2455 /// = a 2456 /// Because of how the allocation work, b, c, and d may be assigned different 2457 /// colors. Now, if a gets evicted later: 2458 /// BB1: 2459 /// a = 2460 /// st a, SpillSlot 2461 /// b = 2462 /// BB2: 2463 /// c = b 2464 /// ... 2465 /// d = c 2466 /// = d 2467 /// e = ld SpillSlot 2468 /// = e 2469 /// This is likely that we can assign the same register for b, c, and d, 2470 /// getting rid of 2 copies. 2471 void RAGreedy::tryHintsRecoloring() { 2472 for (LiveInterval *LI : SetOfBrokenHints) { 2473 assert(TargetRegisterInfo::isVirtualRegister(LI->reg) && 2474 "Recoloring is possible only for virtual registers"); 2475 // Some dead defs may be around (e.g., because of debug uses). 2476 // Ignore those. 2477 if (!VRM->hasPhys(LI->reg)) 2478 continue; 2479 tryHintRecoloring(*LI); 2480 } 2481 } 2482 2483 unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, 2484 SmallVectorImpl<unsigned> &NewVRegs, 2485 SmallVirtRegSet &FixedRegisters, 2486 unsigned Depth) { 2487 unsigned CostPerUseLimit = ~0u; 2488 // First try assigning a free register. 2489 AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); 2490 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { 2491 // When NewVRegs is not empty, we may have made decisions such as evicting 2492 // a virtual register, go with the earlier decisions and use the physical 2493 // register. 2494 if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && 2495 NewVRegs.empty()) { 2496 unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, 2497 CostPerUseLimit, NewVRegs); 2498 if (CSRReg || !NewVRegs.empty()) 2499 // Return now if we decide to use a CSR or create new vregs due to 2500 // pre-splitting. 2501 return CSRReg; 2502 } else 2503 return PhysReg; 2504 } 2505 2506 LiveRangeStage Stage = getStage(VirtReg); 2507 DEBUG(dbgs() << StageName[Stage] 2508 << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); 2509 2510 // Try to evict a less worthy live range, but only for ranges from the primary 2511 // queue. The RS_Split ranges already failed to do this, and they should not 2512 // get a second chance until they have been split. 2513 if (Stage != RS_Split) 2514 if (unsigned PhysReg = 2515 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) { 2516 unsigned Hint = MRI->getSimpleHint(VirtReg.reg); 2517 // If VirtReg has a hint and that hint is broken record this 2518 // virtual register as a recoloring candidate for broken hint. 2519 // Indeed, since we evicted a variable in its neighborhood it is 2520 // likely we can at least partially recolor some of the 2521 // copy-related live-ranges. 2522 if (Hint && Hint != PhysReg) 2523 SetOfBrokenHints.insert(&VirtReg); 2524 return PhysReg; 2525 } 2526 2527 assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); 2528 2529 // The first time we see a live range, don't try to split or spill. 2530 // Wait until the second time, when all smaller ranges have been allocated. 2531 // This gives a better picture of the interference to split around. 2532 if (Stage < RS_Split) { 2533 setStage(VirtReg, RS_Split); 2534 DEBUG(dbgs() << "wait for second round\n"); 2535 NewVRegs.push_back(VirtReg.reg); 2536 return 0; 2537 } 2538 2539 // If we couldn't allocate a register from spilling, there is probably some 2540 // invalid inline assembly. The base class wil report it. 2541 if (Stage >= RS_Done || !VirtReg.isSpillable()) 2542 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, 2543 Depth); 2544 2545 // Try splitting VirtReg or interferences. 2546 unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); 2547 if (PhysReg || !NewVRegs.empty()) 2548 return PhysReg; 2549 2550 // Finally spill VirtReg itself. 2551 if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) { 2552 // TODO: This is experimental and in particular, we do not model 2553 // the live range splitting done by spilling correctly. 2554 // We would need a deep integration with the spiller to do the 2555 // right thing here. Anyway, that is still good for early testing. 2556 setStage(VirtReg, RS_Memory); 2557 DEBUG(dbgs() << "Do as if this register is in memory\n"); 2558 NewVRegs.push_back(VirtReg.reg); 2559 } else { 2560 NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); 2561 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 2562 spiller().spill(LRE); 2563 setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); 2564 2565 if (VerifyEnabled) 2566 MF->verify(this, "After spilling"); 2567 } 2568 2569 // The live virtual register requesting allocation was spilled, so tell 2570 // the caller not to allocate anything during this round. 2571 return 0; 2572 } 2573 2574 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { 2575 DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" 2576 << "********** Function: " << mf.getName() << '\n'); 2577 2578 MF = &mf; 2579 TRI = MF->getSubtarget().getRegisterInfo(); 2580 TII = MF->getSubtarget().getInstrInfo(); 2581 RCI.runOnMachineFunction(mf); 2582 2583 EnableLocalReassign = EnableLocalReassignment || 2584 MF->getSubtarget().enableRALocalReassignment( 2585 MF->getTarget().getOptLevel()); 2586 2587 if (VerifyEnabled) 2588 MF->verify(this, "Before greedy register allocator"); 2589 2590 RegAllocBase::init(getAnalysis<VirtRegMap>(), 2591 getAnalysis<LiveIntervals>(), 2592 getAnalysis<LiveRegMatrix>()); 2593 Indexes = &getAnalysis<SlotIndexes>(); 2594 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 2595 DomTree = &getAnalysis<MachineDominatorTree>(); 2596 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); 2597 Loops = &getAnalysis<MachineLoopInfo>(); 2598 Bundles = &getAnalysis<EdgeBundles>(); 2599 SpillPlacer = &getAnalysis<SpillPlacement>(); 2600 DebugVars = &getAnalysis<LiveDebugVariables>(); 2601 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 2602 2603 initializeCSRCost(); 2604 2605 calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); 2606 2607 DEBUG(LIS->dump()); 2608 2609 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); 2610 SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI)); 2611 ExtraRegInfo.clear(); 2612 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 2613 NextCascade = 1; 2614 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); 2615 GlobalCand.resize(32); // This will grow as needed. 2616 SetOfBrokenHints.clear(); 2617 2618 allocatePhysRegs(); 2619 tryHintsRecoloring(); 2620 postOptimization(); 2621 2622 releaseMemory(); 2623 return true; 2624 } 2625