1 //===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the RAGreedy function pass for register allocation in 11 // optimized builds. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AllocationOrder.h" 16 #include "InterferenceCache.h" 17 #include "LiveDebugVariables.h" 18 #include "RegAllocBase.h" 19 #include "SpillPlacement.h" 20 #include "Spiller.h" 21 #include "SplitKit.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/CodeGen/CalcSpillWeights.h" 25 #include "llvm/CodeGen/EdgeBundles.h" 26 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 27 #include "llvm/CodeGen/LiveRangeEdit.h" 28 #include "llvm/CodeGen/LiveRegMatrix.h" 29 #include "llvm/CodeGen/LiveStackAnalysis.h" 30 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 31 #include "llvm/CodeGen/MachineDominators.h" 32 #include "llvm/CodeGen/MachineFunctionPass.h" 33 #include "llvm/CodeGen/MachineLoopInfo.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/Passes.h" 36 #include "llvm/CodeGen/RegAllocRegistry.h" 37 #include "llvm/CodeGen/RegisterClassInfo.h" 38 #include "llvm/CodeGen/VirtRegMap.h" 39 #include "llvm/IR/LLVMContext.h" 40 #include "llvm/PassAnalysisSupport.h" 41 #include "llvm/Support/BranchProbability.h" 42 #include "llvm/Support/CommandLine.h" 43 #include "llvm/Support/Debug.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/Timer.h" 46 #include "llvm/Support/raw_ostream.h" 47 #include "llvm/Target/TargetInstrInfo.h" 48 #include "llvm/Target/TargetSubtargetInfo.h" 49 #include <queue> 50 51 using namespace llvm; 52 53 #define DEBUG_TYPE "regalloc" 54 55 STATISTIC(NumGlobalSplits, "Number of split global live ranges"); 56 STATISTIC(NumLocalSplits, "Number of split local live ranges"); 57 STATISTIC(NumEvicted, "Number of interferences evicted"); 58 59 static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode( 60 "split-spill-mode", cl::Hidden, 61 cl::desc("Spill mode for splitting live ranges"), 62 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), 63 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), 64 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")), 65 cl::init(SplitEditor::SM_Speed)); 66 67 static cl::opt<unsigned> 68 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, 69 cl::desc("Last chance recoloring max depth"), 70 cl::init(5)); 71 72 static cl::opt<unsigned> LastChanceRecoloringMaxInterference( 73 "lcr-max-interf", cl::Hidden, 74 cl::desc("Last chance recoloring maximum number of considered" 75 " interference at a time"), 76 cl::init(8)); 77 78 static cl::opt<bool> 79 ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, 80 cl::desc("Exhaustive Search for registers bypassing the depth " 81 "and interference cutoffs of last chance recoloring")); 82 83 static cl::opt<bool> EnableLocalReassignment( 84 "enable-local-reassign", cl::Hidden, 85 cl::desc("Local reassignment can yield better allocation decisions, but " 86 "may be compile time intensive"), 87 cl::init(false)); 88 89 static cl::opt<bool> EnableDeferredSpilling( 90 "enable-deferred-spilling", cl::Hidden, 91 cl::desc("Instead of spilling a variable right away, defer the actual " 92 "code insertion to the end of the allocation. That way the " 93 "allocator might still find a suitable coloring for this " 94 "variable because of other evicted variables."), 95 cl::init(false)); 96 97 // FIXME: Find a good default for this flag and remove the flag. 98 static cl::opt<unsigned> 99 CSRFirstTimeCost("regalloc-csr-first-time-cost", 100 cl::desc("Cost for first time use of callee-saved register."), 101 cl::init(0), cl::Hidden); 102 103 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", 104 createGreedyRegisterAllocator); 105 106 namespace { 107 class RAGreedy : public MachineFunctionPass, 108 public RegAllocBase, 109 private LiveRangeEdit::Delegate { 110 // Convenient shortcuts. 111 typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue; 112 typedef SmallPtrSet<LiveInterval *, 4> SmallLISet; 113 typedef SmallSet<unsigned, 16> SmallVirtRegSet; 114 115 // context 116 MachineFunction *MF; 117 118 // Shortcuts to some useful interface. 119 const TargetInstrInfo *TII; 120 const TargetRegisterInfo *TRI; 121 RegisterClassInfo RCI; 122 123 // analyses 124 SlotIndexes *Indexes; 125 MachineBlockFrequencyInfo *MBFI; 126 MachineDominatorTree *DomTree; 127 MachineLoopInfo *Loops; 128 EdgeBundles *Bundles; 129 SpillPlacement *SpillPlacer; 130 LiveDebugVariables *DebugVars; 131 AliasAnalysis *AA; 132 133 // state 134 std::unique_ptr<Spiller> SpillerInstance; 135 PQueue Queue; 136 unsigned NextCascade; 137 138 // Live ranges pass through a number of stages as we try to allocate them. 139 // Some of the stages may also create new live ranges: 140 // 141 // - Region splitting. 142 // - Per-block splitting. 143 // - Local splitting. 144 // - Spilling. 145 // 146 // Ranges produced by one of the stages skip the previous stages when they are 147 // dequeued. This improves performance because we can skip interference checks 148 // that are unlikely to give any results. It also guarantees that the live 149 // range splitting algorithm terminates, something that is otherwise hard to 150 // ensure. 151 enum LiveRangeStage { 152 /// Newly created live range that has never been queued. 153 RS_New, 154 155 /// Only attempt assignment and eviction. Then requeue as RS_Split. 156 RS_Assign, 157 158 /// Attempt live range splitting if assignment is impossible. 159 RS_Split, 160 161 /// Attempt more aggressive live range splitting that is guaranteed to make 162 /// progress. This is used for split products that may not be making 163 /// progress. 164 RS_Split2, 165 166 /// Live range will be spilled. No more splitting will be attempted. 167 RS_Spill, 168 169 170 /// Live range is in memory. Because of other evictions, it might get moved 171 /// in a register in the end. 172 RS_Memory, 173 174 /// There is nothing more we can do to this live range. Abort compilation 175 /// if it can't be assigned. 176 RS_Done 177 }; 178 179 // Enum CutOffStage to keep a track whether the register allocation failed 180 // because of the cutoffs encountered in last chance recoloring. 181 // Note: This is used as bitmask. New value should be next power of 2. 182 enum CutOffStage { 183 // No cutoffs encountered 184 CO_None = 0, 185 186 // lcr-max-depth cutoff encountered 187 CO_Depth = 1, 188 189 // lcr-max-interf cutoff encountered 190 CO_Interf = 2 191 }; 192 193 uint8_t CutOffInfo; 194 195 #ifndef NDEBUG 196 static const char *const StageName[]; 197 #endif 198 199 // RegInfo - Keep additional information about each live range. 200 struct RegInfo { 201 LiveRangeStage Stage; 202 203 // Cascade - Eviction loop prevention. See canEvictInterference(). 204 unsigned Cascade; 205 206 RegInfo() : Stage(RS_New), Cascade(0) {} 207 }; 208 209 IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; 210 211 LiveRangeStage getStage(const LiveInterval &VirtReg) const { 212 return ExtraRegInfo[VirtReg.reg].Stage; 213 } 214 215 void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { 216 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 217 ExtraRegInfo[VirtReg.reg].Stage = Stage; 218 } 219 220 template<typename Iterator> 221 void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { 222 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 223 for (;Begin != End; ++Begin) { 224 unsigned Reg = *Begin; 225 if (ExtraRegInfo[Reg].Stage == RS_New) 226 ExtraRegInfo[Reg].Stage = NewStage; 227 } 228 } 229 230 /// Cost of evicting interference. 231 struct EvictionCost { 232 unsigned BrokenHints; ///< Total number of broken hints. 233 float MaxWeight; ///< Maximum spill weight evicted. 234 235 EvictionCost(): BrokenHints(0), MaxWeight(0) {} 236 237 bool isMax() const { return BrokenHints == ~0u; } 238 239 void setMax() { BrokenHints = ~0u; } 240 241 void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } 242 243 bool operator<(const EvictionCost &O) const { 244 return std::tie(BrokenHints, MaxWeight) < 245 std::tie(O.BrokenHints, O.MaxWeight); 246 } 247 }; 248 249 // splitting state. 250 std::unique_ptr<SplitAnalysis> SA; 251 std::unique_ptr<SplitEditor> SE; 252 253 /// Cached per-block interference maps 254 InterferenceCache IntfCache; 255 256 /// All basic blocks where the current register has uses. 257 SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; 258 259 /// Global live range splitting candidate info. 260 struct GlobalSplitCandidate { 261 // Register intended for assignment, or 0. 262 unsigned PhysReg; 263 264 // SplitKit interval index for this candidate. 265 unsigned IntvIdx; 266 267 // Interference for PhysReg. 268 InterferenceCache::Cursor Intf; 269 270 // Bundles where this candidate should be live. 271 BitVector LiveBundles; 272 SmallVector<unsigned, 8> ActiveBlocks; 273 274 void reset(InterferenceCache &Cache, unsigned Reg) { 275 PhysReg = Reg; 276 IntvIdx = 0; 277 Intf.setPhysReg(Cache, Reg); 278 LiveBundles.clear(); 279 ActiveBlocks.clear(); 280 } 281 282 // Set B[i] = C for every live bundle where B[i] was NoCand. 283 unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { 284 unsigned Count = 0; 285 for (int i = LiveBundles.find_first(); i >= 0; 286 i = LiveBundles.find_next(i)) 287 if (B[i] == NoCand) { 288 B[i] = C; 289 Count++; 290 } 291 return Count; 292 } 293 }; 294 295 /// Candidate info for each PhysReg in AllocationOrder. 296 /// This vector never shrinks, but grows to the size of the largest register 297 /// class. 298 SmallVector<GlobalSplitCandidate, 32> GlobalCand; 299 300 enum : unsigned { NoCand = ~0u }; 301 302 /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to 303 /// NoCand which indicates the stack interval. 304 SmallVector<unsigned, 32> BundleCand; 305 306 /// Callee-save register cost, calculated once per machine function. 307 BlockFrequency CSRCost; 308 309 /// Run or not the local reassignment heuristic. This information is 310 /// obtained from the TargetSubtargetInfo. 311 bool EnableLocalReassign; 312 313 /// Set of broken hints that may be reconciled later because of eviction. 314 SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; 315 316 public: 317 RAGreedy(); 318 319 /// Return the pass name. 320 StringRef getPassName() const override { return "Greedy Register Allocator"; } 321 322 /// RAGreedy analysis usage. 323 void getAnalysisUsage(AnalysisUsage &AU) const override; 324 void releaseMemory() override; 325 Spiller &spiller() override { return *SpillerInstance; } 326 void enqueue(LiveInterval *LI) override; 327 LiveInterval *dequeue() override; 328 unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override; 329 void aboutToRemoveInterval(LiveInterval &) override; 330 331 /// Perform register allocation. 332 bool runOnMachineFunction(MachineFunction &mf) override; 333 334 MachineFunctionProperties getRequiredProperties() const override { 335 return MachineFunctionProperties().set( 336 MachineFunctionProperties::Property::NoPHIs); 337 } 338 339 static char ID; 340 341 private: 342 unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &, 343 SmallVirtRegSet &, unsigned = 0); 344 345 bool LRE_CanEraseVirtReg(unsigned) override; 346 void LRE_WillShrinkVirtReg(unsigned) override; 347 void LRE_DidCloneVirtReg(unsigned, unsigned) override; 348 void enqueue(PQueue &CurQueue, LiveInterval *LI); 349 LiveInterval *dequeue(PQueue &CurQueue); 350 351 BlockFrequency calcSpillCost(); 352 bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); 353 void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); 354 void growRegion(GlobalSplitCandidate &Cand); 355 BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); 356 bool calcCompactRegion(GlobalSplitCandidate&); 357 void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); 358 void calcGapWeights(unsigned, SmallVectorImpl<float>&); 359 unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); 360 bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); 361 bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); 362 void evictInterference(LiveInterval&, unsigned, 363 SmallVectorImpl<unsigned>&); 364 bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, 365 SmallLISet &RecoloringCandidates, 366 const SmallVirtRegSet &FixedRegisters); 367 368 unsigned tryAssign(LiveInterval&, AllocationOrder&, 369 SmallVectorImpl<unsigned>&); 370 unsigned tryEvict(LiveInterval&, AllocationOrder&, 371 SmallVectorImpl<unsigned>&, unsigned = ~0u); 372 unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, 373 SmallVectorImpl<unsigned>&); 374 /// Calculate cost of region splitting. 375 unsigned calculateRegionSplitCost(LiveInterval &VirtReg, 376 AllocationOrder &Order, 377 BlockFrequency &BestCost, 378 unsigned &NumCands, bool IgnoreCSR); 379 /// Perform region splitting. 380 unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, 381 bool HasCompact, 382 SmallVectorImpl<unsigned> &NewVRegs); 383 /// Check other options before using a callee-saved register for the first 384 /// time. 385 unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, 386 unsigned PhysReg, unsigned &CostPerUseLimit, 387 SmallVectorImpl<unsigned> &NewVRegs); 388 void initializeCSRCost(); 389 unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, 390 SmallVectorImpl<unsigned>&); 391 unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, 392 SmallVectorImpl<unsigned>&); 393 unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, 394 SmallVectorImpl<unsigned>&); 395 unsigned trySplit(LiveInterval&, AllocationOrder&, 396 SmallVectorImpl<unsigned>&); 397 unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, 398 SmallVectorImpl<unsigned> &, 399 SmallVirtRegSet &, unsigned); 400 bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, 401 SmallVirtRegSet &, unsigned); 402 void tryHintRecoloring(LiveInterval &); 403 void tryHintsRecoloring(); 404 405 /// Model the information carried by one end of a copy. 406 struct HintInfo { 407 /// The frequency of the copy. 408 BlockFrequency Freq; 409 /// The virtual register or physical register. 410 unsigned Reg; 411 /// Its currently assigned register. 412 /// In case of a physical register Reg == PhysReg. 413 unsigned PhysReg; 414 HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) 415 : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} 416 }; 417 typedef SmallVector<HintInfo, 4> HintsInfo; 418 BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); 419 void collectHintInfo(unsigned, HintsInfo &); 420 421 bool isUnusedCalleeSavedReg(unsigned PhysReg) const; 422 }; 423 } // end anonymous namespace 424 425 char RAGreedy::ID = 0; 426 427 #ifndef NDEBUG 428 const char *const RAGreedy::StageName[] = { 429 "RS_New", 430 "RS_Assign", 431 "RS_Split", 432 "RS_Split2", 433 "RS_Spill", 434 "RS_Memory", 435 "RS_Done" 436 }; 437 #endif 438 439 // Hysteresis to use when comparing floats. 440 // This helps stabilize decisions based on float comparisons. 441 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 442 443 444 FunctionPass* llvm::createGreedyRegisterAllocator() { 445 return new RAGreedy(); 446 } 447 448 RAGreedy::RAGreedy(): MachineFunctionPass(ID) { 449 initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); 450 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 451 initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); 452 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 453 initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); 454 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 455 initializeLiveStacksPass(*PassRegistry::getPassRegistry()); 456 initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); 457 initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); 458 initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); 459 initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); 460 initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); 461 initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); 462 } 463 464 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { 465 AU.setPreservesCFG(); 466 AU.addRequired<MachineBlockFrequencyInfo>(); 467 AU.addPreserved<MachineBlockFrequencyInfo>(); 468 AU.addRequired<AAResultsWrapperPass>(); 469 AU.addPreserved<AAResultsWrapperPass>(); 470 AU.addRequired<LiveIntervals>(); 471 AU.addPreserved<LiveIntervals>(); 472 AU.addRequired<SlotIndexes>(); 473 AU.addPreserved<SlotIndexes>(); 474 AU.addRequired<LiveDebugVariables>(); 475 AU.addPreserved<LiveDebugVariables>(); 476 AU.addRequired<LiveStacks>(); 477 AU.addPreserved<LiveStacks>(); 478 AU.addRequired<MachineDominatorTree>(); 479 AU.addPreserved<MachineDominatorTree>(); 480 AU.addRequired<MachineLoopInfo>(); 481 AU.addPreserved<MachineLoopInfo>(); 482 AU.addRequired<VirtRegMap>(); 483 AU.addPreserved<VirtRegMap>(); 484 AU.addRequired<LiveRegMatrix>(); 485 AU.addPreserved<LiveRegMatrix>(); 486 AU.addRequired<EdgeBundles>(); 487 AU.addRequired<SpillPlacement>(); 488 MachineFunctionPass::getAnalysisUsage(AU); 489 } 490 491 492 //===----------------------------------------------------------------------===// 493 // LiveRangeEdit delegate methods 494 //===----------------------------------------------------------------------===// 495 496 bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { 497 if (VRM->hasPhys(VirtReg)) { 498 LiveInterval &LI = LIS->getInterval(VirtReg); 499 Matrix->unassign(LI); 500 aboutToRemoveInterval(LI); 501 return true; 502 } 503 // Unassigned virtreg is probably in the priority queue. 504 // RegAllocBase will erase it after dequeueing. 505 return false; 506 } 507 508 void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { 509 if (!VRM->hasPhys(VirtReg)) 510 return; 511 512 // Register is assigned, put it back on the queue for reassignment. 513 LiveInterval &LI = LIS->getInterval(VirtReg); 514 Matrix->unassign(LI); 515 enqueue(&LI); 516 } 517 518 void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { 519 // Cloning a register we haven't even heard about yet? Just ignore it. 520 if (!ExtraRegInfo.inBounds(Old)) 521 return; 522 523 // LRE may clone a virtual register because dead code elimination causes it to 524 // be split into connected components. The new components are much smaller 525 // than the original, so they should get a new chance at being assigned. 526 // same stage as the parent. 527 ExtraRegInfo[Old].Stage = RS_Assign; 528 ExtraRegInfo.grow(New); 529 ExtraRegInfo[New] = ExtraRegInfo[Old]; 530 } 531 532 void RAGreedy::releaseMemory() { 533 SpillerInstance.reset(); 534 ExtraRegInfo.clear(); 535 GlobalCand.clear(); 536 } 537 538 void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } 539 540 void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { 541 // Prioritize live ranges by size, assigning larger ranges first. 542 // The queue holds (size, reg) pairs. 543 const unsigned Size = LI->getSize(); 544 const unsigned Reg = LI->reg; 545 assert(TargetRegisterInfo::isVirtualRegister(Reg) && 546 "Can only enqueue virtual registers"); 547 unsigned Prio; 548 549 ExtraRegInfo.grow(Reg); 550 if (ExtraRegInfo[Reg].Stage == RS_New) 551 ExtraRegInfo[Reg].Stage = RS_Assign; 552 553 if (ExtraRegInfo[Reg].Stage == RS_Split) { 554 // Unsplit ranges that couldn't be allocated immediately are deferred until 555 // everything else has been allocated. 556 Prio = Size; 557 } else if (ExtraRegInfo[Reg].Stage == RS_Memory) { 558 // Memory operand should be considered last. 559 // Change the priority such that Memory operand are assigned in 560 // the reverse order that they came in. 561 // TODO: Make this a member variable and probably do something about hints. 562 static unsigned MemOp = 0; 563 Prio = MemOp++; 564 } else { 565 // Giant live ranges fall back to the global assignment heuristic, which 566 // prevents excessive spilling in pathological cases. 567 bool ReverseLocal = TRI->reverseLocalAssignment(); 568 const TargetRegisterClass &RC = *MRI->getRegClass(Reg); 569 bool ForceGlobal = !ReverseLocal && 570 (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs()); 571 572 if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && 573 LIS->intervalIsInOneMBB(*LI)) { 574 // Allocate original local ranges in linear instruction order. Since they 575 // are singly defined, this produces optimal coloring in the absence of 576 // global interference and other constraints. 577 if (!ReverseLocal) 578 Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); 579 else { 580 // Allocating bottom up may allow many short LRGs to be assigned first 581 // to one of the cheap registers. This could be much faster for very 582 // large blocks on targets with many physical registers. 583 Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex()); 584 } 585 Prio |= RC.AllocationPriority << 24; 586 } else { 587 // Allocate global and split ranges in long->short order. Long ranges that 588 // don't fit should be spilled (or split) ASAP so they don't create 589 // interference. Mark a bit to prioritize global above local ranges. 590 Prio = (1u << 29) + Size; 591 } 592 // Mark a higher bit to prioritize global and local above RS_Split. 593 Prio |= (1u << 31); 594 595 // Boost ranges that have a physical register hint. 596 if (VRM->hasKnownPreference(Reg)) 597 Prio |= (1u << 30); 598 } 599 // The virtual register number is a tie breaker for same-sized ranges. 600 // Give lower vreg numbers higher priority to assign them first. 601 CurQueue.push(std::make_pair(Prio, ~Reg)); 602 } 603 604 LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } 605 606 LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { 607 if (CurQueue.empty()) 608 return nullptr; 609 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); 610 CurQueue.pop(); 611 return LI; 612 } 613 614 615 //===----------------------------------------------------------------------===// 616 // Direct Assignment 617 //===----------------------------------------------------------------------===// 618 619 /// tryAssign - Try to assign VirtReg to an available register. 620 unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, 621 AllocationOrder &Order, 622 SmallVectorImpl<unsigned> &NewVRegs) { 623 Order.rewind(); 624 unsigned PhysReg; 625 while ((PhysReg = Order.next())) 626 if (!Matrix->checkInterference(VirtReg, PhysReg)) 627 break; 628 if (!PhysReg || Order.isHint()) 629 return PhysReg; 630 631 // PhysReg is available, but there may be a better choice. 632 633 // If we missed a simple hint, try to cheaply evict interference from the 634 // preferred register. 635 if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) 636 if (Order.isHint(Hint)) { 637 DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); 638 EvictionCost MaxCost; 639 MaxCost.setBrokenHints(1); 640 if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { 641 evictInterference(VirtReg, Hint, NewVRegs); 642 return Hint; 643 } 644 } 645 646 // Try to evict interference from a cheaper alternative. 647 unsigned Cost = TRI->getCostPerUse(PhysReg); 648 649 // Most registers have 0 additional cost. 650 if (!Cost) 651 return PhysReg; 652 653 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost 654 << '\n'); 655 unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); 656 return CheapReg ? CheapReg : PhysReg; 657 } 658 659 660 //===----------------------------------------------------------------------===// 661 // Interference eviction 662 //===----------------------------------------------------------------------===// 663 664 unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { 665 AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); 666 unsigned PhysReg; 667 while ((PhysReg = Order.next())) { 668 if (PhysReg == PrevReg) 669 continue; 670 671 MCRegUnitIterator Units(PhysReg, TRI); 672 for (; Units.isValid(); ++Units) { 673 // Instantiate a "subquery", not to be confused with the Queries array. 674 LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]); 675 if (subQ.checkInterference()) 676 break; 677 } 678 // If no units have interference, break out with the current PhysReg. 679 if (!Units.isValid()) 680 break; 681 } 682 if (PhysReg) 683 DEBUG(dbgs() << "can reassign: " << VirtReg << " from " 684 << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) 685 << '\n'); 686 return PhysReg; 687 } 688 689 /// shouldEvict - determine if A should evict the assigned live range B. The 690 /// eviction policy defined by this function together with the allocation order 691 /// defined by enqueue() decides which registers ultimately end up being split 692 /// and spilled. 693 /// 694 /// Cascade numbers are used to prevent infinite loops if this function is a 695 /// cyclic relation. 696 /// 697 /// @param A The live range to be assigned. 698 /// @param IsHint True when A is about to be assigned to its preferred 699 /// register. 700 /// @param B The live range to be evicted. 701 /// @param BreaksHint True when B is already assigned to its preferred register. 702 bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, 703 LiveInterval &B, bool BreaksHint) { 704 bool CanSplit = getStage(B) < RS_Spill; 705 706 // Be fairly aggressive about following hints as long as the evictee can be 707 // split. 708 if (CanSplit && IsHint && !BreaksHint) 709 return true; 710 711 if (A.weight > B.weight) { 712 DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); 713 return true; 714 } 715 return false; 716 } 717 718 /// canEvictInterference - Return true if all interferences between VirtReg and 719 /// PhysReg can be evicted. 720 /// 721 /// @param VirtReg Live range that is about to be assigned. 722 /// @param PhysReg Desired register for assignment. 723 /// @param IsHint True when PhysReg is VirtReg's preferred register. 724 /// @param MaxCost Only look for cheaper candidates and update with new cost 725 /// when returning true. 726 /// @returns True when interference can be evicted cheaper than MaxCost. 727 bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, 728 bool IsHint, EvictionCost &MaxCost) { 729 // It is only possible to evict virtual register interference. 730 if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) 731 return false; 732 733 bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); 734 735 // Find VirtReg's cascade number. This will be unassigned if VirtReg was never 736 // involved in an eviction before. If a cascade number was assigned, deny 737 // evicting anything with the same or a newer cascade number. This prevents 738 // infinite eviction loops. 739 // 740 // This works out so a register without a cascade number is allowed to evict 741 // anything, and it can be evicted by anything. 742 unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; 743 if (!Cascade) 744 Cascade = NextCascade; 745 746 EvictionCost Cost; 747 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 748 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 749 // If there is 10 or more interferences, chances are one is heavier. 750 if (Q.collectInterferingVRegs(10) >= 10) 751 return false; 752 753 // Check if any interfering live range is heavier than MaxWeight. 754 for (unsigned i = Q.interferingVRegs().size(); i; --i) { 755 LiveInterval *Intf = Q.interferingVRegs()[i - 1]; 756 assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && 757 "Only expecting virtual register interference from query"); 758 // Never evict spill products. They cannot split or spill. 759 if (getStage(*Intf) == RS_Done) 760 return false; 761 // Once a live range becomes small enough, it is urgent that we find a 762 // register for it. This is indicated by an infinite spill weight. These 763 // urgent live ranges get to evict almost anything. 764 // 765 // Also allow urgent evictions of unspillable ranges from a strictly 766 // larger allocation order. 767 bool Urgent = !VirtReg.isSpillable() && 768 (Intf->isSpillable() || 769 RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < 770 RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); 771 // Only evict older cascades or live ranges without a cascade. 772 unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; 773 if (Cascade <= IntfCascade) { 774 if (!Urgent) 775 return false; 776 // We permit breaking cascades for urgent evictions. It should be the 777 // last resort, though, so make it really expensive. 778 Cost.BrokenHints += 10; 779 } 780 // Would this break a satisfied hint? 781 bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); 782 // Update eviction cost. 783 Cost.BrokenHints += BreaksHint; 784 Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); 785 // Abort if this would be too expensive. 786 if (!(Cost < MaxCost)) 787 return false; 788 if (Urgent) 789 continue; 790 // Apply the eviction policy for non-urgent evictions. 791 if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) 792 return false; 793 // If !MaxCost.isMax(), then we're just looking for a cheap register. 794 // Evicting another local live range in this case could lead to suboptimal 795 // coloring. 796 if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && 797 (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) { 798 return false; 799 } 800 } 801 } 802 MaxCost = Cost; 803 return true; 804 } 805 806 /// evictInterference - Evict any interferring registers that prevent VirtReg 807 /// from being assigned to Physreg. This assumes that canEvictInterference 808 /// returned true. 809 void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, 810 SmallVectorImpl<unsigned> &NewVRegs) { 811 // Make sure that VirtReg has a cascade number, and assign that cascade 812 // number to every evicted register. These live ranges than then only be 813 // evicted by a newer cascade, preventing infinite loops. 814 unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; 815 if (!Cascade) 816 Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; 817 818 DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) 819 << " interference: Cascade " << Cascade << '\n'); 820 821 // Collect all interfering virtregs first. 822 SmallVector<LiveInterval*, 8> Intfs; 823 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 824 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 825 assert(Q.seenAllInterferences() && "Didn't check all interfererences."); 826 ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); 827 Intfs.append(IVR.begin(), IVR.end()); 828 } 829 830 // Evict them second. This will invalidate the queries. 831 for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { 832 LiveInterval *Intf = Intfs[i]; 833 // The same VirtReg may be present in multiple RegUnits. Skip duplicates. 834 if (!VRM->hasPhys(Intf->reg)) 835 continue; 836 Matrix->unassign(*Intf); 837 assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || 838 VirtReg.isSpillable() < Intf->isSpillable()) && 839 "Cannot decrease cascade number, illegal eviction"); 840 ExtraRegInfo[Intf->reg].Cascade = Cascade; 841 ++NumEvicted; 842 NewVRegs.push_back(Intf->reg); 843 } 844 } 845 846 /// Returns true if the given \p PhysReg is a callee saved register and has not 847 /// been used for allocation yet. 848 bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const { 849 unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg); 850 if (CSR == 0) 851 return false; 852 853 return !Matrix->isPhysRegUsed(PhysReg); 854 } 855 856 /// tryEvict - Try to evict all interferences for a physreg. 857 /// @param VirtReg Currently unassigned virtual register. 858 /// @param Order Physregs to try. 859 /// @return Physreg to assign VirtReg, or 0. 860 unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, 861 AllocationOrder &Order, 862 SmallVectorImpl<unsigned> &NewVRegs, 863 unsigned CostPerUseLimit) { 864 NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); 865 866 // Keep track of the cheapest interference seen so far. 867 EvictionCost BestCost; 868 BestCost.setMax(); 869 unsigned BestPhys = 0; 870 unsigned OrderLimit = Order.getOrder().size(); 871 872 // When we are just looking for a reduced cost per use, don't break any 873 // hints, and only evict smaller spill weights. 874 if (CostPerUseLimit < ~0u) { 875 BestCost.BrokenHints = 0; 876 BestCost.MaxWeight = VirtReg.weight; 877 878 // Check of any registers in RC are below CostPerUseLimit. 879 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); 880 unsigned MinCost = RegClassInfo.getMinCost(RC); 881 if (MinCost >= CostPerUseLimit) { 882 DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost 883 << ", no cheaper registers to be found.\n"); 884 return 0; 885 } 886 887 // It is normal for register classes to have a long tail of registers with 888 // the same cost. We don't need to look at them if they're too expensive. 889 if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { 890 OrderLimit = RegClassInfo.getLastCostChange(RC); 891 DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); 892 } 893 } 894 895 Order.rewind(); 896 while (unsigned PhysReg = Order.next(OrderLimit)) { 897 if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) 898 continue; 899 // The first use of a callee-saved register in a function has cost 1. 900 // Don't start using a CSR when the CostPerUseLimit is low. 901 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) { 902 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " 903 << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) 904 << '\n'); 905 continue; 906 } 907 908 if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) 909 continue; 910 911 // Best so far. 912 BestPhys = PhysReg; 913 914 // Stop if the hint can be used. 915 if (Order.isHint()) 916 break; 917 } 918 919 if (!BestPhys) 920 return 0; 921 922 evictInterference(VirtReg, BestPhys, NewVRegs); 923 return BestPhys; 924 } 925 926 927 //===----------------------------------------------------------------------===// 928 // Region Splitting 929 //===----------------------------------------------------------------------===// 930 931 /// addSplitConstraints - Fill out the SplitConstraints vector based on the 932 /// interference pattern in Physreg and its aliases. Add the constraints to 933 /// SpillPlacement and return the static cost of this split in Cost, assuming 934 /// that all preferences in SplitConstraints are met. 935 /// Return false if there are no bundles with positive bias. 936 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, 937 BlockFrequency &Cost) { 938 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 939 940 // Reset interference dependent info. 941 SplitConstraints.resize(UseBlocks.size()); 942 BlockFrequency StaticCost = 0; 943 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 944 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 945 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 946 947 BC.Number = BI.MBB->getNumber(); 948 Intf.moveToBlock(BC.Number); 949 BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 950 BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 951 BC.ChangesValue = BI.FirstDef.isValid(); 952 953 if (!Intf.hasInterference()) 954 continue; 955 956 // Number of spill code instructions to insert. 957 unsigned Ins = 0; 958 959 // Interference for the live-in value. 960 if (BI.LiveIn) { 961 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) { 962 BC.Entry = SpillPlacement::MustSpill; 963 ++Ins; 964 } else if (Intf.first() < BI.FirstInstr) { 965 BC.Entry = SpillPlacement::PrefSpill; 966 ++Ins; 967 } else if (Intf.first() < BI.LastInstr) { 968 ++Ins; 969 } 970 } 971 972 // Interference for the live-out value. 973 if (BI.LiveOut) { 974 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) { 975 BC.Exit = SpillPlacement::MustSpill; 976 ++Ins; 977 } else if (Intf.last() > BI.LastInstr) { 978 BC.Exit = SpillPlacement::PrefSpill; 979 ++Ins; 980 } else if (Intf.last() > BI.FirstInstr) { 981 ++Ins; 982 } 983 } 984 985 // Accumulate the total frequency of inserted spill code. 986 while (Ins--) 987 StaticCost += SpillPlacer->getBlockFrequency(BC.Number); 988 } 989 Cost = StaticCost; 990 991 // Add constraints for use-blocks. Note that these are the only constraints 992 // that may add a positive bias, it is downhill from here. 993 SpillPlacer->addConstraints(SplitConstraints); 994 return SpillPlacer->scanActiveBundles(); 995 } 996 997 998 /// addThroughConstraints - Add constraints and links to SpillPlacer from the 999 /// live-through blocks in Blocks. 1000 void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, 1001 ArrayRef<unsigned> Blocks) { 1002 const unsigned GroupSize = 8; 1003 SpillPlacement::BlockConstraint BCS[GroupSize]; 1004 unsigned TBS[GroupSize]; 1005 unsigned B = 0, T = 0; 1006 1007 for (unsigned i = 0; i != Blocks.size(); ++i) { 1008 unsigned Number = Blocks[i]; 1009 Intf.moveToBlock(Number); 1010 1011 if (!Intf.hasInterference()) { 1012 assert(T < GroupSize && "Array overflow"); 1013 TBS[T] = Number; 1014 if (++T == GroupSize) { 1015 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 1016 T = 0; 1017 } 1018 continue; 1019 } 1020 1021 assert(B < GroupSize && "Array overflow"); 1022 BCS[B].Number = Number; 1023 1024 // Interference for the live-in value. 1025 if (Intf.first() <= Indexes->getMBBStartIdx(Number)) 1026 BCS[B].Entry = SpillPlacement::MustSpill; 1027 else 1028 BCS[B].Entry = SpillPlacement::PrefSpill; 1029 1030 // Interference for the live-out value. 1031 if (Intf.last() >= SA->getLastSplitPoint(Number)) 1032 BCS[B].Exit = SpillPlacement::MustSpill; 1033 else 1034 BCS[B].Exit = SpillPlacement::PrefSpill; 1035 1036 if (++B == GroupSize) { 1037 SpillPlacer->addConstraints(makeArrayRef(BCS, B)); 1038 B = 0; 1039 } 1040 } 1041 1042 SpillPlacer->addConstraints(makeArrayRef(BCS, B)); 1043 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 1044 } 1045 1046 void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { 1047 // Keep track of through blocks that have not been added to SpillPlacer. 1048 BitVector Todo = SA->getThroughBlocks(); 1049 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; 1050 unsigned AddedTo = 0; 1051 #ifndef NDEBUG 1052 unsigned Visited = 0; 1053 #endif 1054 1055 for (;;) { 1056 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); 1057 // Find new through blocks in the periphery of PrefRegBundles. 1058 for (int i = 0, e = NewBundles.size(); i != e; ++i) { 1059 unsigned Bundle = NewBundles[i]; 1060 // Look at all blocks connected to Bundle in the full graph. 1061 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); 1062 for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); 1063 I != E; ++I) { 1064 unsigned Block = *I; 1065 if (!Todo.test(Block)) 1066 continue; 1067 Todo.reset(Block); 1068 // This is a new through block. Add it to SpillPlacer later. 1069 ActiveBlocks.push_back(Block); 1070 #ifndef NDEBUG 1071 ++Visited; 1072 #endif 1073 } 1074 } 1075 // Any new blocks to add? 1076 if (ActiveBlocks.size() == AddedTo) 1077 break; 1078 1079 // Compute through constraints from the interference, or assume that all 1080 // through blocks prefer spilling when forming compact regions. 1081 auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); 1082 if (Cand.PhysReg) 1083 addThroughConstraints(Cand.Intf, NewBlocks); 1084 else 1085 // Provide a strong negative bias on through blocks to prevent unwanted 1086 // liveness on loop backedges. 1087 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); 1088 AddedTo = ActiveBlocks.size(); 1089 1090 // Perhaps iterating can enable more bundles? 1091 SpillPlacer->iterate(); 1092 } 1093 DEBUG(dbgs() << ", v=" << Visited); 1094 } 1095 1096 /// calcCompactRegion - Compute the set of edge bundles that should be live 1097 /// when splitting the current live range into compact regions. Compact 1098 /// regions can be computed without looking at interference. They are the 1099 /// regions formed by removing all the live-through blocks from the live range. 1100 /// 1101 /// Returns false if the current live range is already compact, or if the 1102 /// compact regions would form single block regions anyway. 1103 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { 1104 // Without any through blocks, the live range is already compact. 1105 if (!SA->getNumThroughBlocks()) 1106 return false; 1107 1108 // Compact regions don't correspond to any physreg. 1109 Cand.reset(IntfCache, 0); 1110 1111 DEBUG(dbgs() << "Compact region bundles"); 1112 1113 // Use the spill placer to determine the live bundles. GrowRegion pretends 1114 // that all the through blocks have interference when PhysReg is unset. 1115 SpillPlacer->prepare(Cand.LiveBundles); 1116 1117 // The static split cost will be zero since Cand.Intf reports no interference. 1118 BlockFrequency Cost; 1119 if (!addSplitConstraints(Cand.Intf, Cost)) { 1120 DEBUG(dbgs() << ", none.\n"); 1121 return false; 1122 } 1123 1124 growRegion(Cand); 1125 SpillPlacer->finish(); 1126 1127 if (!Cand.LiveBundles.any()) { 1128 DEBUG(dbgs() << ", none.\n"); 1129 return false; 1130 } 1131 1132 DEBUG({ 1133 for (int i = Cand.LiveBundles.find_first(); i>=0; 1134 i = Cand.LiveBundles.find_next(i)) 1135 dbgs() << " EB#" << i; 1136 dbgs() << ".\n"; 1137 }); 1138 return true; 1139 } 1140 1141 /// calcSpillCost - Compute how expensive it would be to split the live range in 1142 /// SA around all use blocks instead of forming bundle regions. 1143 BlockFrequency RAGreedy::calcSpillCost() { 1144 BlockFrequency Cost = 0; 1145 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1146 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1147 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1148 unsigned Number = BI.MBB->getNumber(); 1149 // We normally only need one spill instruction - a load or a store. 1150 Cost += SpillPlacer->getBlockFrequency(Number); 1151 1152 // Unless the value is redefined in the block. 1153 if (BI.LiveIn && BI.LiveOut && BI.FirstDef) 1154 Cost += SpillPlacer->getBlockFrequency(Number); 1155 } 1156 return Cost; 1157 } 1158 1159 /// calcGlobalSplitCost - Return the global split cost of following the split 1160 /// pattern in LiveBundles. This cost should be added to the local cost of the 1161 /// interference pattern in SplitConstraints. 1162 /// 1163 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { 1164 BlockFrequency GlobalCost = 0; 1165 const BitVector &LiveBundles = Cand.LiveBundles; 1166 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1167 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1168 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1169 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 1170 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; 1171 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; 1172 unsigned Ins = 0; 1173 1174 if (BI.LiveIn) 1175 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); 1176 if (BI.LiveOut) 1177 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); 1178 while (Ins--) 1179 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); 1180 } 1181 1182 for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { 1183 unsigned Number = Cand.ActiveBlocks[i]; 1184 bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; 1185 bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; 1186 if (!RegIn && !RegOut) 1187 continue; 1188 if (RegIn && RegOut) { 1189 // We need double spill code if this block has interference. 1190 Cand.Intf.moveToBlock(Number); 1191 if (Cand.Intf.hasInterference()) { 1192 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1193 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1194 } 1195 continue; 1196 } 1197 // live-in / stack-out or stack-in live-out. 1198 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1199 } 1200 return GlobalCost; 1201 } 1202 1203 /// splitAroundRegion - Split the current live range around the regions 1204 /// determined by BundleCand and GlobalCand. 1205 /// 1206 /// Before calling this function, GlobalCand and BundleCand must be initialized 1207 /// so each bundle is assigned to a valid candidate, or NoCand for the 1208 /// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor 1209 /// objects must be initialized for the current live range, and intervals 1210 /// created for the used candidates. 1211 /// 1212 /// @param LREdit The LiveRangeEdit object handling the current split. 1213 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value 1214 /// must appear in this list. 1215 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, 1216 ArrayRef<unsigned> UsedCands) { 1217 // These are the intervals created for new global ranges. We may create more 1218 // intervals for local ranges. 1219 const unsigned NumGlobalIntvs = LREdit.size(); 1220 DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); 1221 assert(NumGlobalIntvs && "No global intervals configured"); 1222 1223 // Isolate even single instructions when dealing with a proper sub-class. 1224 // That guarantees register class inflation for the stack interval because it 1225 // is all copies. 1226 unsigned Reg = SA->getParent().reg; 1227 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1228 1229 // First handle all the blocks with uses. 1230 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1231 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1232 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1233 unsigned Number = BI.MBB->getNumber(); 1234 unsigned IntvIn = 0, IntvOut = 0; 1235 SlotIndex IntfIn, IntfOut; 1236 if (BI.LiveIn) { 1237 unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; 1238 if (CandIn != NoCand) { 1239 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1240 IntvIn = Cand.IntvIdx; 1241 Cand.Intf.moveToBlock(Number); 1242 IntfIn = Cand.Intf.first(); 1243 } 1244 } 1245 if (BI.LiveOut) { 1246 unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; 1247 if (CandOut != NoCand) { 1248 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1249 IntvOut = Cand.IntvIdx; 1250 Cand.Intf.moveToBlock(Number); 1251 IntfOut = Cand.Intf.last(); 1252 } 1253 } 1254 1255 // Create separate intervals for isolated blocks with multiple uses. 1256 if (!IntvIn && !IntvOut) { 1257 DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); 1258 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1259 SE->splitSingleBlock(BI); 1260 continue; 1261 } 1262 1263 if (IntvIn && IntvOut) 1264 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1265 else if (IntvIn) 1266 SE->splitRegInBlock(BI, IntvIn, IntfIn); 1267 else 1268 SE->splitRegOutBlock(BI, IntvOut, IntfOut); 1269 } 1270 1271 // Handle live-through blocks. The relevant live-through blocks are stored in 1272 // the ActiveBlocks list with each candidate. We need to filter out 1273 // duplicates. 1274 BitVector Todo = SA->getThroughBlocks(); 1275 for (unsigned c = 0; c != UsedCands.size(); ++c) { 1276 ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; 1277 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 1278 unsigned Number = Blocks[i]; 1279 if (!Todo.test(Number)) 1280 continue; 1281 Todo.reset(Number); 1282 1283 unsigned IntvIn = 0, IntvOut = 0; 1284 SlotIndex IntfIn, IntfOut; 1285 1286 unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; 1287 if (CandIn != NoCand) { 1288 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1289 IntvIn = Cand.IntvIdx; 1290 Cand.Intf.moveToBlock(Number); 1291 IntfIn = Cand.Intf.first(); 1292 } 1293 1294 unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; 1295 if (CandOut != NoCand) { 1296 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1297 IntvOut = Cand.IntvIdx; 1298 Cand.Intf.moveToBlock(Number); 1299 IntfOut = Cand.Intf.last(); 1300 } 1301 if (!IntvIn && !IntvOut) 1302 continue; 1303 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1304 } 1305 } 1306 1307 ++NumGlobalSplits; 1308 1309 SmallVector<unsigned, 8> IntvMap; 1310 SE->finish(&IntvMap); 1311 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1312 1313 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1314 unsigned OrigBlocks = SA->getNumLiveBlocks(); 1315 1316 // Sort out the new intervals created by splitting. We get four kinds: 1317 // - Remainder intervals should not be split again. 1318 // - Candidate intervals can be assigned to Cand.PhysReg. 1319 // - Block-local splits are candidates for local splitting. 1320 // - DCE leftovers should go back on the queue. 1321 for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { 1322 LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); 1323 1324 // Ignore old intervals from DCE. 1325 if (getStage(Reg) != RS_New) 1326 continue; 1327 1328 // Remainder interval. Don't try splitting again, spill if it doesn't 1329 // allocate. 1330 if (IntvMap[i] == 0) { 1331 setStage(Reg, RS_Spill); 1332 continue; 1333 } 1334 1335 // Global intervals. Allow repeated splitting as long as the number of live 1336 // blocks is strictly decreasing. 1337 if (IntvMap[i] < NumGlobalIntvs) { 1338 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { 1339 DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks 1340 << " blocks as original.\n"); 1341 // Don't allow repeated splitting as a safe guard against looping. 1342 setStage(Reg, RS_Split2); 1343 } 1344 continue; 1345 } 1346 1347 // Other intervals are treated as new. This includes local intervals created 1348 // for blocks with multiple uses, and anything created by DCE. 1349 } 1350 1351 if (VerifyEnabled) 1352 MF->verify(this, "After splitting live range around region"); 1353 } 1354 1355 unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1356 SmallVectorImpl<unsigned> &NewVRegs) { 1357 unsigned NumCands = 0; 1358 BlockFrequency BestCost; 1359 1360 // Check if we can split this live range around a compact region. 1361 bool HasCompact = calcCompactRegion(GlobalCand.front()); 1362 if (HasCompact) { 1363 // Yes, keep GlobalCand[0] as the compact region candidate. 1364 NumCands = 1; 1365 BestCost = BlockFrequency::getMaxFrequency(); 1366 } else { 1367 // No benefit from the compact region, our fallback will be per-block 1368 // splitting. Make sure we find a solution that is cheaper than spilling. 1369 BestCost = calcSpillCost(); 1370 DEBUG(dbgs() << "Cost of isolating all blocks = "; 1371 MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); 1372 } 1373 1374 unsigned BestCand = 1375 calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, 1376 false/*IgnoreCSR*/); 1377 1378 // No solutions found, fall back to single block splitting. 1379 if (!HasCompact && BestCand == NoCand) 1380 return 0; 1381 1382 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); 1383 } 1384 1385 unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, 1386 AllocationOrder &Order, 1387 BlockFrequency &BestCost, 1388 unsigned &NumCands, 1389 bool IgnoreCSR) { 1390 unsigned BestCand = NoCand; 1391 Order.rewind(); 1392 while (unsigned PhysReg = Order.next()) { 1393 if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg)) 1394 continue; 1395 1396 // Discard bad candidates before we run out of interference cache cursors. 1397 // This will only affect register classes with a lot of registers (>32). 1398 if (NumCands == IntfCache.getMaxCursors()) { 1399 unsigned WorstCount = ~0u; 1400 unsigned Worst = 0; 1401 for (unsigned i = 0; i != NumCands; ++i) { 1402 if (i == BestCand || !GlobalCand[i].PhysReg) 1403 continue; 1404 unsigned Count = GlobalCand[i].LiveBundles.count(); 1405 if (Count < WorstCount) { 1406 Worst = i; 1407 WorstCount = Count; 1408 } 1409 } 1410 --NumCands; 1411 GlobalCand[Worst] = GlobalCand[NumCands]; 1412 if (BestCand == NumCands) 1413 BestCand = Worst; 1414 } 1415 1416 if (GlobalCand.size() <= NumCands) 1417 GlobalCand.resize(NumCands+1); 1418 GlobalSplitCandidate &Cand = GlobalCand[NumCands]; 1419 Cand.reset(IntfCache, PhysReg); 1420 1421 SpillPlacer->prepare(Cand.LiveBundles); 1422 BlockFrequency Cost; 1423 if (!addSplitConstraints(Cand.Intf, Cost)) { 1424 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); 1425 continue; 1426 } 1427 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = "; 1428 MBFI->printBlockFreq(dbgs(), Cost)); 1429 if (Cost >= BestCost) { 1430 DEBUG({ 1431 if (BestCand == NoCand) 1432 dbgs() << " worse than no bundles\n"; 1433 else 1434 dbgs() << " worse than " 1435 << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; 1436 }); 1437 continue; 1438 } 1439 growRegion(Cand); 1440 1441 SpillPlacer->finish(); 1442 1443 // No live bundles, defer to splitSingleBlocks(). 1444 if (!Cand.LiveBundles.any()) { 1445 DEBUG(dbgs() << " no bundles.\n"); 1446 continue; 1447 } 1448 1449 Cost += calcGlobalSplitCost(Cand); 1450 DEBUG({ 1451 dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) 1452 << " with bundles"; 1453 for (int i = Cand.LiveBundles.find_first(); i>=0; 1454 i = Cand.LiveBundles.find_next(i)) 1455 dbgs() << " EB#" << i; 1456 dbgs() << ".\n"; 1457 }); 1458 if (Cost < BestCost) { 1459 BestCand = NumCands; 1460 BestCost = Cost; 1461 } 1462 ++NumCands; 1463 } 1464 return BestCand; 1465 } 1466 1467 unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, 1468 bool HasCompact, 1469 SmallVectorImpl<unsigned> &NewVRegs) { 1470 SmallVector<unsigned, 8> UsedCands; 1471 // Prepare split editor. 1472 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1473 SE->reset(LREdit, SplitSpillMode); 1474 1475 // Assign all edge bundles to the preferred candidate, or NoCand. 1476 BundleCand.assign(Bundles->getNumBundles(), NoCand); 1477 1478 // Assign bundles for the best candidate region. 1479 if (BestCand != NoCand) { 1480 GlobalSplitCandidate &Cand = GlobalCand[BestCand]; 1481 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { 1482 UsedCands.push_back(BestCand); 1483 Cand.IntvIdx = SE->openIntv(); 1484 DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " 1485 << B << " bundles, intv " << Cand.IntvIdx << ".\n"); 1486 (void)B; 1487 } 1488 } 1489 1490 // Assign bundles for the compact region. 1491 if (HasCompact) { 1492 GlobalSplitCandidate &Cand = GlobalCand.front(); 1493 assert(!Cand.PhysReg && "Compact region has no physreg"); 1494 if (unsigned B = Cand.getBundles(BundleCand, 0)) { 1495 UsedCands.push_back(0); 1496 Cand.IntvIdx = SE->openIntv(); 1497 DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " 1498 << Cand.IntvIdx << ".\n"); 1499 (void)B; 1500 } 1501 } 1502 1503 splitAroundRegion(LREdit, UsedCands); 1504 return 0; 1505 } 1506 1507 1508 //===----------------------------------------------------------------------===// 1509 // Per-Block Splitting 1510 //===----------------------------------------------------------------------===// 1511 1512 /// tryBlockSplit - Split a global live range around every block with uses. This 1513 /// creates a lot of local live ranges, that will be split by tryLocalSplit if 1514 /// they don't allocate. 1515 unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1516 SmallVectorImpl<unsigned> &NewVRegs) { 1517 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); 1518 unsigned Reg = VirtReg.reg; 1519 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1520 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1521 SE->reset(LREdit, SplitSpillMode); 1522 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1523 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1524 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1525 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1526 SE->splitSingleBlock(BI); 1527 } 1528 // No blocks were split. 1529 if (LREdit.empty()) 1530 return 0; 1531 1532 // We did split for some blocks. 1533 SmallVector<unsigned, 8> IntvMap; 1534 SE->finish(&IntvMap); 1535 1536 // Tell LiveDebugVariables about the new ranges. 1537 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1538 1539 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1540 1541 // Sort out the new intervals created by splitting. The remainder interval 1542 // goes straight to spilling, the new local ranges get to stay RS_New. 1543 for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { 1544 LiveInterval &LI = LIS->getInterval(LREdit.get(i)); 1545 if (getStage(LI) == RS_New && IntvMap[i] == 0) 1546 setStage(LI, RS_Spill); 1547 } 1548 1549 if (VerifyEnabled) 1550 MF->verify(this, "After splitting live range around basic blocks"); 1551 return 0; 1552 } 1553 1554 1555 //===----------------------------------------------------------------------===// 1556 // Per-Instruction Splitting 1557 //===----------------------------------------------------------------------===// 1558 1559 /// Get the number of allocatable registers that match the constraints of \p Reg 1560 /// on \p MI and that are also in \p SuperRC. 1561 static unsigned getNumAllocatableRegsForConstraints( 1562 const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, 1563 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, 1564 const RegisterClassInfo &RCI) { 1565 assert(SuperRC && "Invalid register class"); 1566 1567 const TargetRegisterClass *ConstrainedRC = 1568 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, 1569 /* ExploreBundle */ true); 1570 if (!ConstrainedRC) 1571 return 0; 1572 return RCI.getNumAllocatableRegs(ConstrainedRC); 1573 } 1574 1575 /// tryInstructionSplit - Split a live range around individual instructions. 1576 /// This is normally not worthwhile since the spiller is doing essentially the 1577 /// same thing. However, when the live range is in a constrained register 1578 /// class, it may help to insert copies such that parts of the live range can 1579 /// be moved to a larger register class. 1580 /// 1581 /// This is similar to spilling to a larger register class. 1582 unsigned 1583 RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1584 SmallVectorImpl<unsigned> &NewVRegs) { 1585 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); 1586 // There is no point to this if there are no larger sub-classes. 1587 if (!RegClassInfo.isProperSubClass(CurRC)) 1588 return 0; 1589 1590 // Always enable split spill mode, since we're effectively spilling to a 1591 // register. 1592 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1593 SE->reset(LREdit, SplitEditor::SM_Size); 1594 1595 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1596 if (Uses.size() <= 1) 1597 return 0; 1598 1599 DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); 1600 1601 const TargetRegisterClass *SuperRC = 1602 TRI->getLargestLegalSuperClass(CurRC, *MF); 1603 unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); 1604 // Split around every non-copy instruction if this split will relax 1605 // the constraints on the virtual register. 1606 // Otherwise, splitting just inserts uncoalescable copies that do not help 1607 // the allocation. 1608 for (unsigned i = 0; i != Uses.size(); ++i) { 1609 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) 1610 if (MI->isFullCopy() || 1611 SuperRCNumAllocatableRegs == 1612 getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, 1613 TRI, RCI)) { 1614 DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); 1615 continue; 1616 } 1617 SE->openIntv(); 1618 SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); 1619 SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); 1620 SE->useIntv(SegStart, SegStop); 1621 } 1622 1623 if (LREdit.empty()) { 1624 DEBUG(dbgs() << "All uses were copies.\n"); 1625 return 0; 1626 } 1627 1628 SmallVector<unsigned, 8> IntvMap; 1629 SE->finish(&IntvMap); 1630 DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); 1631 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1632 1633 // Assign all new registers to RS_Spill. This was the last chance. 1634 setStage(LREdit.begin(), LREdit.end(), RS_Spill); 1635 return 0; 1636 } 1637 1638 1639 //===----------------------------------------------------------------------===// 1640 // Local Splitting 1641 //===----------------------------------------------------------------------===// 1642 1643 1644 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted 1645 /// in order to use PhysReg between two entries in SA->UseSlots. 1646 /// 1647 /// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1]. 1648 /// 1649 void RAGreedy::calcGapWeights(unsigned PhysReg, 1650 SmallVectorImpl<float> &GapWeight) { 1651 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1652 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1653 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1654 const unsigned NumGaps = Uses.size()-1; 1655 1656 // Start and end points for the interference check. 1657 SlotIndex StartIdx = 1658 BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; 1659 SlotIndex StopIdx = 1660 BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; 1661 1662 GapWeight.assign(NumGaps, 0.0f); 1663 1664 // Add interference from each overlapping register. 1665 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1666 if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units) 1667 .checkInterference()) 1668 continue; 1669 1670 // We know that VirtReg is a continuous interval from FirstInstr to 1671 // LastInstr, so we don't need InterferenceQuery. 1672 // 1673 // Interference that overlaps an instruction is counted in both gaps 1674 // surrounding the instruction. The exception is interference before 1675 // StartIdx and after StopIdx. 1676 // 1677 LiveIntervalUnion::SegmentIter IntI = 1678 Matrix->getLiveUnions()[*Units] .find(StartIdx); 1679 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { 1680 // Skip the gaps before IntI. 1681 while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) 1682 if (++Gap == NumGaps) 1683 break; 1684 if (Gap == NumGaps) 1685 break; 1686 1687 // Update the gaps covered by IntI. 1688 const float weight = IntI.value()->weight; 1689 for (; Gap != NumGaps; ++Gap) { 1690 GapWeight[Gap] = std::max(GapWeight[Gap], weight); 1691 if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) 1692 break; 1693 } 1694 if (Gap == NumGaps) 1695 break; 1696 } 1697 } 1698 1699 // Add fixed interference. 1700 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1701 const LiveRange &LR = LIS->getRegUnit(*Units); 1702 LiveRange::const_iterator I = LR.find(StartIdx); 1703 LiveRange::const_iterator E = LR.end(); 1704 1705 // Same loop as above. Mark any overlapped gaps as HUGE_VALF. 1706 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { 1707 while (Uses[Gap+1].getBoundaryIndex() < I->start) 1708 if (++Gap == NumGaps) 1709 break; 1710 if (Gap == NumGaps) 1711 break; 1712 1713 for (; Gap != NumGaps; ++Gap) { 1714 GapWeight[Gap] = llvm::huge_valf; 1715 if (Uses[Gap+1].getBaseIndex() >= I->end) 1716 break; 1717 } 1718 if (Gap == NumGaps) 1719 break; 1720 } 1721 } 1722 } 1723 1724 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only 1725 /// basic block. 1726 /// 1727 unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1728 SmallVectorImpl<unsigned> &NewVRegs) { 1729 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1730 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1731 1732 // Note that it is possible to have an interval that is live-in or live-out 1733 // while only covering a single block - A phi-def can use undef values from 1734 // predecessors, and the block could be a single-block loop. 1735 // We don't bother doing anything clever about such a case, we simply assume 1736 // that the interval is continuous from FirstInstr to LastInstr. We should 1737 // make sure that we don't do anything illegal to such an interval, though. 1738 1739 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1740 if (Uses.size() <= 2) 1741 return 0; 1742 const unsigned NumGaps = Uses.size()-1; 1743 1744 DEBUG({ 1745 dbgs() << "tryLocalSplit: "; 1746 for (unsigned i = 0, e = Uses.size(); i != e; ++i) 1747 dbgs() << ' ' << Uses[i]; 1748 dbgs() << '\n'; 1749 }); 1750 1751 // If VirtReg is live across any register mask operands, compute a list of 1752 // gaps with register masks. 1753 SmallVector<unsigned, 8> RegMaskGaps; 1754 if (Matrix->checkRegMaskInterference(VirtReg)) { 1755 // Get regmask slots for the whole block. 1756 ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); 1757 DEBUG(dbgs() << RMS.size() << " regmasks in block:"); 1758 // Constrain to VirtReg's live range. 1759 unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), 1760 Uses.front().getRegSlot()) - RMS.begin(); 1761 unsigned re = RMS.size(); 1762 for (unsigned i = 0; i != NumGaps && ri != re; ++i) { 1763 // Look for Uses[i] <= RMS <= Uses[i+1]. 1764 assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); 1765 if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) 1766 continue; 1767 // Skip a regmask on the same instruction as the last use. It doesn't 1768 // overlap the live range. 1769 if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) 1770 break; 1771 DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); 1772 RegMaskGaps.push_back(i); 1773 // Advance ri to the next gap. A regmask on one of the uses counts in 1774 // both gaps. 1775 while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) 1776 ++ri; 1777 } 1778 DEBUG(dbgs() << '\n'); 1779 } 1780 1781 // Since we allow local split results to be split again, there is a risk of 1782 // creating infinite loops. It is tempting to require that the new live 1783 // ranges have less instructions than the original. That would guarantee 1784 // convergence, but it is too strict. A live range with 3 instructions can be 1785 // split 2+3 (including the COPY), and we want to allow that. 1786 // 1787 // Instead we use these rules: 1788 // 1789 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the 1790 // noop split, of course). 1791 // 2. Require progress be made for ranges with getStage() == RS_Split2. All 1792 // the new ranges must have fewer instructions than before the split. 1793 // 3. New ranges with the same number of instructions are marked RS_Split2, 1794 // smaller ranges are marked RS_New. 1795 // 1796 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent 1797 // excessive splitting and infinite loops. 1798 // 1799 bool ProgressRequired = getStage(VirtReg) >= RS_Split2; 1800 1801 // Best split candidate. 1802 unsigned BestBefore = NumGaps; 1803 unsigned BestAfter = 0; 1804 float BestDiff = 0; 1805 1806 const float blockFreq = 1807 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * 1808 (1.0f / MBFI->getEntryFreq()); 1809 SmallVector<float, 8> GapWeight; 1810 1811 Order.rewind(); 1812 while (unsigned PhysReg = Order.next()) { 1813 // Keep track of the largest spill weight that would need to be evicted in 1814 // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. 1815 calcGapWeights(PhysReg, GapWeight); 1816 1817 // Remove any gaps with regmask clobbers. 1818 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) 1819 for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) 1820 GapWeight[RegMaskGaps[i]] = llvm::huge_valf; 1821 1822 // Try to find the best sequence of gaps to close. 1823 // The new spill weight must be larger than any gap interference. 1824 1825 // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. 1826 unsigned SplitBefore = 0, SplitAfter = 1; 1827 1828 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). 1829 // It is the spill weight that needs to be evicted. 1830 float MaxGap = GapWeight[0]; 1831 1832 for (;;) { 1833 // Live before/after split? 1834 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; 1835 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; 1836 1837 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' ' 1838 << Uses[SplitBefore] << '-' << Uses[SplitAfter] 1839 << " i=" << MaxGap); 1840 1841 // Stop before the interval gets so big we wouldn't be making progress. 1842 if (!LiveBefore && !LiveAfter) { 1843 DEBUG(dbgs() << " all\n"); 1844 break; 1845 } 1846 // Should the interval be extended or shrunk? 1847 bool Shrink = true; 1848 1849 // How many gaps would the new range have? 1850 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; 1851 1852 // Legally, without causing looping? 1853 bool Legal = !ProgressRequired || NewGaps < NumGaps; 1854 1855 if (Legal && MaxGap < llvm::huge_valf) { 1856 // Estimate the new spill weight. Each instruction reads or writes the 1857 // register. Conservatively assume there are no read-modify-write 1858 // instructions. 1859 // 1860 // Try to guess the size of the new interval. 1861 const float EstWeight = normalizeSpillWeight( 1862 blockFreq * (NewGaps + 1), 1863 Uses[SplitBefore].distance(Uses[SplitAfter]) + 1864 (LiveBefore + LiveAfter) * SlotIndex::InstrDist, 1865 1); 1866 // Would this split be possible to allocate? 1867 // Never allocate all gaps, we wouldn't be making progress. 1868 DEBUG(dbgs() << " w=" << EstWeight); 1869 if (EstWeight * Hysteresis >= MaxGap) { 1870 Shrink = false; 1871 float Diff = EstWeight - MaxGap; 1872 if (Diff > BestDiff) { 1873 DEBUG(dbgs() << " (best)"); 1874 BestDiff = Hysteresis * Diff; 1875 BestBefore = SplitBefore; 1876 BestAfter = SplitAfter; 1877 } 1878 } 1879 } 1880 1881 // Try to shrink. 1882 if (Shrink) { 1883 if (++SplitBefore < SplitAfter) { 1884 DEBUG(dbgs() << " shrink\n"); 1885 // Recompute the max when necessary. 1886 if (GapWeight[SplitBefore - 1] >= MaxGap) { 1887 MaxGap = GapWeight[SplitBefore]; 1888 for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i) 1889 MaxGap = std::max(MaxGap, GapWeight[i]); 1890 } 1891 continue; 1892 } 1893 MaxGap = 0; 1894 } 1895 1896 // Try to extend the interval. 1897 if (SplitAfter >= NumGaps) { 1898 DEBUG(dbgs() << " end\n"); 1899 break; 1900 } 1901 1902 DEBUG(dbgs() << " extend\n"); 1903 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); 1904 } 1905 } 1906 1907 // Didn't find any candidates? 1908 if (BestBefore == NumGaps) 1909 return 0; 1910 1911 DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] 1912 << '-' << Uses[BestAfter] << ", " << BestDiff 1913 << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); 1914 1915 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1916 SE->reset(LREdit); 1917 1918 SE->openIntv(); 1919 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); 1920 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); 1921 SE->useIntv(SegStart, SegStop); 1922 SmallVector<unsigned, 8> IntvMap; 1923 SE->finish(&IntvMap); 1924 DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); 1925 1926 // If the new range has the same number of instructions as before, mark it as 1927 // RS_Split2 so the next split will be forced to make progress. Otherwise, 1928 // leave the new intervals as RS_New so they can compete. 1929 bool LiveBefore = BestBefore != 0 || BI.LiveIn; 1930 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; 1931 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; 1932 if (NewGaps >= NumGaps) { 1933 DEBUG(dbgs() << "Tagging non-progress ranges: "); 1934 assert(!ProgressRequired && "Didn't make progress when it was required."); 1935 for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) 1936 if (IntvMap[i] == 1) { 1937 setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); 1938 DEBUG(dbgs() << PrintReg(LREdit.get(i))); 1939 } 1940 DEBUG(dbgs() << '\n'); 1941 } 1942 ++NumLocalSplits; 1943 1944 return 0; 1945 } 1946 1947 //===----------------------------------------------------------------------===// 1948 // Live Range Splitting 1949 //===----------------------------------------------------------------------===// 1950 1951 /// trySplit - Try to split VirtReg or one of its interferences, making it 1952 /// assignable. 1953 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. 1954 unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, 1955 SmallVectorImpl<unsigned>&NewVRegs) { 1956 // Ranges must be Split2 or less. 1957 if (getStage(VirtReg) >= RS_Spill) 1958 return 0; 1959 1960 // Local intervals are handled separately. 1961 if (LIS->intervalIsInOneMBB(VirtReg)) { 1962 NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); 1963 SA->analyze(&VirtReg); 1964 unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); 1965 if (PhysReg || !NewVRegs.empty()) 1966 return PhysReg; 1967 return tryInstructionSplit(VirtReg, Order, NewVRegs); 1968 } 1969 1970 NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); 1971 1972 SA->analyze(&VirtReg); 1973 1974 // FIXME: SplitAnalysis may repair broken live ranges coming from the 1975 // coalescer. That may cause the range to become allocatable which means that 1976 // tryRegionSplit won't be making progress. This check should be replaced with 1977 // an assertion when the coalescer is fixed. 1978 if (SA->didRepairRange()) { 1979 // VirtReg has changed, so all cached queries are invalid. 1980 Matrix->invalidateVirtRegs(); 1981 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) 1982 return PhysReg; 1983 } 1984 1985 // First try to split around a region spanning multiple blocks. RS_Split2 1986 // ranges already made dubious progress with region splitting, so they go 1987 // straight to single block splitting. 1988 if (getStage(VirtReg) < RS_Split2) { 1989 unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); 1990 if (PhysReg || !NewVRegs.empty()) 1991 return PhysReg; 1992 } 1993 1994 // Then isolate blocks. 1995 return tryBlockSplit(VirtReg, Order, NewVRegs); 1996 } 1997 1998 //===----------------------------------------------------------------------===// 1999 // Last Chance Recoloring 2000 //===----------------------------------------------------------------------===// 2001 2002 /// mayRecolorAllInterferences - Check if the virtual registers that 2003 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be 2004 /// recolored to free \p PhysReg. 2005 /// When true is returned, \p RecoloringCandidates has been augmented with all 2006 /// the live intervals that need to be recolored in order to free \p PhysReg 2007 /// for \p VirtReg. 2008 /// \p FixedRegisters contains all the virtual registers that cannot be 2009 /// recolored. 2010 bool 2011 RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, 2012 SmallLISet &RecoloringCandidates, 2013 const SmallVirtRegSet &FixedRegisters) { 2014 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); 2015 2016 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 2017 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 2018 // If there is LastChanceRecoloringMaxInterference or more interferences, 2019 // chances are one would not be recolorable. 2020 if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= 2021 LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { 2022 DEBUG(dbgs() << "Early abort: too many interferences.\n"); 2023 CutOffInfo |= CO_Interf; 2024 return false; 2025 } 2026 for (unsigned i = Q.interferingVRegs().size(); i; --i) { 2027 LiveInterval *Intf = Q.interferingVRegs()[i - 1]; 2028 // If Intf is done and sit on the same register class as VirtReg, 2029 // it would not be recolorable as it is in the same state as VirtReg. 2030 if ((getStage(*Intf) == RS_Done && 2031 MRI->getRegClass(Intf->reg) == CurRC) || 2032 FixedRegisters.count(Intf->reg)) { 2033 DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n"); 2034 return false; 2035 } 2036 RecoloringCandidates.insert(Intf); 2037 } 2038 } 2039 return true; 2040 } 2041 2042 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring 2043 /// its interferences. 2044 /// Last chance recoloring chooses a color for \p VirtReg and recolors every 2045 /// virtual register that was using it. The recoloring process may recursively 2046 /// use the last chance recoloring. Therefore, when a virtual register has been 2047 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot 2048 /// be last-chance-recolored again during this recoloring "session". 2049 /// E.g., 2050 /// Let 2051 /// vA can use {R1, R2 } 2052 /// vB can use { R2, R3} 2053 /// vC can use {R1 } 2054 /// Where vA, vB, and vC cannot be split anymore (they are reloads for 2055 /// instance) and they all interfere. 2056 /// 2057 /// vA is assigned R1 2058 /// vB is assigned R2 2059 /// vC tries to evict vA but vA is already done. 2060 /// Regular register allocation fails. 2061 /// 2062 /// Last chance recoloring kicks in: 2063 /// vC does as if vA was evicted => vC uses R1. 2064 /// vC is marked as fixed. 2065 /// vA needs to find a color. 2066 /// None are available. 2067 /// vA cannot evict vC: vC is a fixed virtual register now. 2068 /// vA does as if vB was evicted => vA uses R2. 2069 /// vB needs to find a color. 2070 /// R3 is available. 2071 /// Recoloring => vC = R1, vA = R2, vB = R3 2072 /// 2073 /// \p Order defines the preferred allocation order for \p VirtReg. 2074 /// \p NewRegs will contain any new virtual register that have been created 2075 /// (split, spill) during the process and that must be assigned. 2076 /// \p FixedRegisters contains all the virtual registers that cannot be 2077 /// recolored. 2078 /// \p Depth gives the current depth of the last chance recoloring. 2079 /// \return a physical register that can be used for VirtReg or ~0u if none 2080 /// exists. 2081 unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, 2082 AllocationOrder &Order, 2083 SmallVectorImpl<unsigned> &NewVRegs, 2084 SmallVirtRegSet &FixedRegisters, 2085 unsigned Depth) { 2086 DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); 2087 // Ranges must be Done. 2088 assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && 2089 "Last chance recoloring should really be last chance"); 2090 // Set the max depth to LastChanceRecoloringMaxDepth. 2091 // We may want to reconsider that if we end up with a too large search space 2092 // for target with hundreds of registers. 2093 // Indeed, in that case we may want to cut the search space earlier. 2094 if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { 2095 DEBUG(dbgs() << "Abort because max depth has been reached.\n"); 2096 CutOffInfo |= CO_Depth; 2097 return ~0u; 2098 } 2099 2100 // Set of Live intervals that will need to be recolored. 2101 SmallLISet RecoloringCandidates; 2102 // Record the original mapping virtual register to physical register in case 2103 // the recoloring fails. 2104 DenseMap<unsigned, unsigned> VirtRegToPhysReg; 2105 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in 2106 // this recoloring "session". 2107 FixedRegisters.insert(VirtReg.reg); 2108 // Remember the ID of the last vreg in case the recoloring fails. 2109 unsigned LastVReg = 2110 TargetRegisterInfo::index2VirtReg(MRI->getNumVirtRegs() - 1); 2111 SmallVector<unsigned, 4> CurrentNewVRegs; 2112 2113 Order.rewind(); 2114 while (unsigned PhysReg = Order.next()) { 2115 DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " 2116 << PrintReg(PhysReg, TRI) << '\n'); 2117 RecoloringCandidates.clear(); 2118 VirtRegToPhysReg.clear(); 2119 CurrentNewVRegs.clear(); 2120 2121 // It is only possible to recolor virtual register interference. 2122 if (Matrix->checkInterference(VirtReg, PhysReg) > 2123 LiveRegMatrix::IK_VirtReg) { 2124 DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n"); 2125 2126 continue; 2127 } 2128 2129 // Early give up on this PhysReg if it is obvious we cannot recolor all 2130 // the interferences. 2131 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, 2132 FixedRegisters)) { 2133 DEBUG(dbgs() << "Some inteferences cannot be recolored.\n"); 2134 continue; 2135 } 2136 2137 // RecoloringCandidates contains all the virtual registers that interfer 2138 // with VirtReg on PhysReg (or one of its aliases). 2139 // Enqueue them for recoloring and perform the actual recoloring. 2140 PQueue RecoloringQueue; 2141 for (SmallLISet::iterator It = RecoloringCandidates.begin(), 2142 EndIt = RecoloringCandidates.end(); 2143 It != EndIt; ++It) { 2144 unsigned ItVirtReg = (*It)->reg; 2145 enqueue(RecoloringQueue, *It); 2146 assert(VRM->hasPhys(ItVirtReg) && 2147 "Interferences are supposed to be with allocated vairables"); 2148 2149 // Record the current allocation. 2150 VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); 2151 // unset the related struct. 2152 Matrix->unassign(**It); 2153 } 2154 2155 // Do as if VirtReg was assigned to PhysReg so that the underlying 2156 // recoloring has the right information about the interferes and 2157 // available colors. 2158 Matrix->assign(VirtReg, PhysReg); 2159 2160 // Save the current recoloring state. 2161 // If we cannot recolor all the interferences, we will have to start again 2162 // at this point for the next physical register. 2163 SmallVirtRegSet SaveFixedRegisters(FixedRegisters); 2164 if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs, 2165 FixedRegisters, Depth)) { 2166 // Push the queued vregs into the main queue. 2167 for (unsigned NewVReg : CurrentNewVRegs) 2168 NewVRegs.push_back(NewVReg); 2169 // Do not mess up with the global assignment process. 2170 // I.e., VirtReg must be unassigned. 2171 Matrix->unassign(VirtReg); 2172 return PhysReg; 2173 } 2174 2175 DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " 2176 << PrintReg(PhysReg, TRI) << '\n'); 2177 2178 // The recoloring attempt failed, undo the changes. 2179 FixedRegisters = SaveFixedRegisters; 2180 Matrix->unassign(VirtReg); 2181 2182 // When we move a register from RS_Assign to RS_Split, we do not 2183 // actually do anything with it. I.e., it should not end up in NewVRegs. 2184 // For the other cases, since we created new live-ranges, we need to 2185 // process them. 2186 for (SmallVectorImpl<unsigned>::iterator Next = CurrentNewVRegs.begin(), 2187 End = CurrentNewVRegs.end(); 2188 Next != End; ++Next) { 2189 if (*Next <= LastVReg && getStage(LIS->getInterval(*Next)) == RS_Split) 2190 continue; 2191 NewVRegs.push_back(*Next); 2192 } 2193 2194 for (SmallLISet::iterator It = RecoloringCandidates.begin(), 2195 EndIt = RecoloringCandidates.end(); 2196 It != EndIt; ++It) { 2197 unsigned ItVirtReg = (*It)->reg; 2198 if (VRM->hasPhys(ItVirtReg)) 2199 Matrix->unassign(**It); 2200 unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg]; 2201 Matrix->assign(**It, ItPhysReg); 2202 } 2203 } 2204 2205 // Last chance recoloring did not worked either, give up. 2206 return ~0u; 2207 } 2208 2209 /// tryRecoloringCandidates - Try to assign a new color to every register 2210 /// in \RecoloringQueue. 2211 /// \p NewRegs will contain any new virtual register created during the 2212 /// recoloring process. 2213 /// \p FixedRegisters[in/out] contains all the registers that have been 2214 /// recolored. 2215 /// \return true if all virtual registers in RecoloringQueue were successfully 2216 /// recolored, false otherwise. 2217 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, 2218 SmallVectorImpl<unsigned> &NewVRegs, 2219 SmallVirtRegSet &FixedRegisters, 2220 unsigned Depth) { 2221 while (!RecoloringQueue.empty()) { 2222 LiveInterval *LI = dequeue(RecoloringQueue); 2223 DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); 2224 unsigned PhysReg; 2225 PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); 2226 // When splitting happens, the live-range may actually be empty. 2227 // In that case, this is okay to continue the recoloring even 2228 // if we did not find an alternative color for it. Indeed, 2229 // there will not be anything to color for LI in the end. 2230 if (PhysReg == ~0u || (!PhysReg && !LI->empty())) 2231 return false; 2232 2233 if (!PhysReg) { 2234 assert(LI->empty() && "Only empty live-range do not require a register"); 2235 DEBUG(dbgs() << "Recoloring of " << *LI << " succeeded. Empty LI.\n"); 2236 continue; 2237 } 2238 DEBUG(dbgs() << "Recoloring of " << *LI 2239 << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); 2240 2241 Matrix->assign(*LI, PhysReg); 2242 FixedRegisters.insert(LI->reg); 2243 } 2244 return true; 2245 } 2246 2247 //===----------------------------------------------------------------------===// 2248 // Main Entry Point 2249 //===----------------------------------------------------------------------===// 2250 2251 unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, 2252 SmallVectorImpl<unsigned> &NewVRegs) { 2253 CutOffInfo = CO_None; 2254 LLVMContext &Ctx = MF->getFunction()->getContext(); 2255 SmallVirtRegSet FixedRegisters; 2256 unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); 2257 if (Reg == ~0U && (CutOffInfo != CO_None)) { 2258 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); 2259 if (CutOffEncountered == CO_Depth) 2260 Ctx.emitError("register allocation failed: maximum depth for recoloring " 2261 "reached. Use -fexhaustive-register-search to skip " 2262 "cutoffs"); 2263 else if (CutOffEncountered == CO_Interf) 2264 Ctx.emitError("register allocation failed: maximum interference for " 2265 "recoloring reached. Use -fexhaustive-register-search " 2266 "to skip cutoffs"); 2267 else if (CutOffEncountered == (CO_Depth | CO_Interf)) 2268 Ctx.emitError("register allocation failed: maximum interference and " 2269 "depth for recoloring reached. Use " 2270 "-fexhaustive-register-search to skip cutoffs"); 2271 } 2272 return Reg; 2273 } 2274 2275 /// Using a CSR for the first time has a cost because it causes push|pop 2276 /// to be added to prologue|epilogue. Splitting a cold section of the live 2277 /// range can have lower cost than using the CSR for the first time; 2278 /// Spilling a live range in the cold path can have lower cost than using 2279 /// the CSR for the first time. Returns the physical register if we decide 2280 /// to use the CSR; otherwise return 0. 2281 unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, 2282 AllocationOrder &Order, 2283 unsigned PhysReg, 2284 unsigned &CostPerUseLimit, 2285 SmallVectorImpl<unsigned> &NewVRegs) { 2286 if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { 2287 // We choose spill over using the CSR for the first time if the spill cost 2288 // is lower than CSRCost. 2289 SA->analyze(&VirtReg); 2290 if (calcSpillCost() >= CSRCost) 2291 return PhysReg; 2292 2293 // We are going to spill, set CostPerUseLimit to 1 to make sure that 2294 // we will not use a callee-saved register in tryEvict. 2295 CostPerUseLimit = 1; 2296 return 0; 2297 } 2298 if (getStage(VirtReg) < RS_Split) { 2299 // We choose pre-splitting over using the CSR for the first time if 2300 // the cost of splitting is lower than CSRCost. 2301 SA->analyze(&VirtReg); 2302 unsigned NumCands = 0; 2303 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. 2304 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, 2305 NumCands, true /*IgnoreCSR*/); 2306 if (BestCand == NoCand) 2307 // Use the CSR if we can't find a region split below CSRCost. 2308 return PhysReg; 2309 2310 // Perform the actual pre-splitting. 2311 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); 2312 return 0; 2313 } 2314 return PhysReg; 2315 } 2316 2317 void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { 2318 // Do not keep invalid information around. 2319 SetOfBrokenHints.remove(&LI); 2320 } 2321 2322 void RAGreedy::initializeCSRCost() { 2323 // We use the larger one out of the command-line option and the value report 2324 // by TRI. 2325 CSRCost = BlockFrequency( 2326 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); 2327 if (!CSRCost.getFrequency()) 2328 return; 2329 2330 // Raw cost is relative to Entry == 2^14; scale it appropriately. 2331 uint64_t ActualEntry = MBFI->getEntryFreq(); 2332 if (!ActualEntry) { 2333 CSRCost = 0; 2334 return; 2335 } 2336 uint64_t FixedEntry = 1 << 14; 2337 if (ActualEntry < FixedEntry) 2338 CSRCost *= BranchProbability(ActualEntry, FixedEntry); 2339 else if (ActualEntry <= UINT32_MAX) 2340 // Invert the fraction and divide. 2341 CSRCost /= BranchProbability(FixedEntry, ActualEntry); 2342 else 2343 // Can't use BranchProbability in general, since it takes 32-bit numbers. 2344 CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); 2345 } 2346 2347 /// \brief Collect the hint info for \p Reg. 2348 /// The results are stored into \p Out. 2349 /// \p Out is not cleared before being populated. 2350 void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { 2351 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { 2352 if (!Instr.isFullCopy()) 2353 continue; 2354 // Look for the other end of the copy. 2355 unsigned OtherReg = Instr.getOperand(0).getReg(); 2356 if (OtherReg == Reg) { 2357 OtherReg = Instr.getOperand(1).getReg(); 2358 if (OtherReg == Reg) 2359 continue; 2360 } 2361 // Get the current assignment. 2362 unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) 2363 ? OtherReg 2364 : VRM->getPhys(OtherReg); 2365 // Push the collected information. 2366 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, 2367 OtherPhysReg)); 2368 } 2369 } 2370 2371 /// \brief Using the given \p List, compute the cost of the broken hints if 2372 /// \p PhysReg was used. 2373 /// \return The cost of \p List for \p PhysReg. 2374 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, 2375 unsigned PhysReg) { 2376 BlockFrequency Cost = 0; 2377 for (const HintInfo &Info : List) { 2378 if (Info.PhysReg != PhysReg) 2379 Cost += Info.Freq; 2380 } 2381 return Cost; 2382 } 2383 2384 /// \brief Using the register assigned to \p VirtReg, try to recolor 2385 /// all the live ranges that are copy-related with \p VirtReg. 2386 /// The recoloring is then propagated to all the live-ranges that have 2387 /// been recolored and so on, until no more copies can be coalesced or 2388 /// it is not profitable. 2389 /// For a given live range, profitability is determined by the sum of the 2390 /// frequencies of the non-identity copies it would introduce with the old 2391 /// and new register. 2392 void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { 2393 // We have a broken hint, check if it is possible to fix it by 2394 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted 2395 // some register and PhysReg may be available for the other live-ranges. 2396 SmallSet<unsigned, 4> Visited; 2397 SmallVector<unsigned, 2> RecoloringCandidates; 2398 HintsInfo Info; 2399 unsigned Reg = VirtReg.reg; 2400 unsigned PhysReg = VRM->getPhys(Reg); 2401 // Start the recoloring algorithm from the input live-interval, then 2402 // it will propagate to the ones that are copy-related with it. 2403 Visited.insert(Reg); 2404 RecoloringCandidates.push_back(Reg); 2405 2406 DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '(' 2407 << PrintReg(PhysReg, TRI) << ")\n"); 2408 2409 do { 2410 Reg = RecoloringCandidates.pop_back_val(); 2411 2412 // We cannot recolor physcal register. 2413 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 2414 continue; 2415 2416 assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); 2417 2418 // Get the live interval mapped with this virtual register to be able 2419 // to check for the interference with the new color. 2420 LiveInterval &LI = LIS->getInterval(Reg); 2421 unsigned CurrPhys = VRM->getPhys(Reg); 2422 // Check that the new color matches the register class constraints and 2423 // that it is free for this live range. 2424 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || 2425 Matrix->checkInterference(LI, PhysReg))) 2426 continue; 2427 2428 DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI) 2429 << ") is recolorable.\n"); 2430 2431 // Gather the hint info. 2432 Info.clear(); 2433 collectHintInfo(Reg, Info); 2434 // Check if recoloring the live-range will increase the cost of the 2435 // non-identity copies. 2436 if (CurrPhys != PhysReg) { 2437 DEBUG(dbgs() << "Checking profitability:\n"); 2438 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); 2439 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); 2440 DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() 2441 << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n'); 2442 if (OldCopiesCost < NewCopiesCost) { 2443 DEBUG(dbgs() << "=> Not profitable.\n"); 2444 continue; 2445 } 2446 // At this point, the cost is either cheaper or equal. If it is 2447 // equal, we consider this is profitable because it may expose 2448 // more recoloring opportunities. 2449 DEBUG(dbgs() << "=> Profitable.\n"); 2450 // Recolor the live-range. 2451 Matrix->unassign(LI); 2452 Matrix->assign(LI, PhysReg); 2453 } 2454 // Push all copy-related live-ranges to keep reconciling the broken 2455 // hints. 2456 for (const HintInfo &HI : Info) { 2457 if (Visited.insert(HI.Reg).second) 2458 RecoloringCandidates.push_back(HI.Reg); 2459 } 2460 } while (!RecoloringCandidates.empty()); 2461 } 2462 2463 /// \brief Try to recolor broken hints. 2464 /// Broken hints may be repaired by recoloring when an evicted variable 2465 /// freed up a register for a larger live-range. 2466 /// Consider the following example: 2467 /// BB1: 2468 /// a = 2469 /// b = 2470 /// BB2: 2471 /// ... 2472 /// = b 2473 /// = a 2474 /// Let us assume b gets split: 2475 /// BB1: 2476 /// a = 2477 /// b = 2478 /// BB2: 2479 /// c = b 2480 /// ... 2481 /// d = c 2482 /// = d 2483 /// = a 2484 /// Because of how the allocation work, b, c, and d may be assigned different 2485 /// colors. Now, if a gets evicted later: 2486 /// BB1: 2487 /// a = 2488 /// st a, SpillSlot 2489 /// b = 2490 /// BB2: 2491 /// c = b 2492 /// ... 2493 /// d = c 2494 /// = d 2495 /// e = ld SpillSlot 2496 /// = e 2497 /// This is likely that we can assign the same register for b, c, and d, 2498 /// getting rid of 2 copies. 2499 void RAGreedy::tryHintsRecoloring() { 2500 for (LiveInterval *LI : SetOfBrokenHints) { 2501 assert(TargetRegisterInfo::isVirtualRegister(LI->reg) && 2502 "Recoloring is possible only for virtual registers"); 2503 // Some dead defs may be around (e.g., because of debug uses). 2504 // Ignore those. 2505 if (!VRM->hasPhys(LI->reg)) 2506 continue; 2507 tryHintRecoloring(*LI); 2508 } 2509 } 2510 2511 unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, 2512 SmallVectorImpl<unsigned> &NewVRegs, 2513 SmallVirtRegSet &FixedRegisters, 2514 unsigned Depth) { 2515 unsigned CostPerUseLimit = ~0u; 2516 // First try assigning a free register. 2517 AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); 2518 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { 2519 // When NewVRegs is not empty, we may have made decisions such as evicting 2520 // a virtual register, go with the earlier decisions and use the physical 2521 // register. 2522 if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && 2523 NewVRegs.empty()) { 2524 unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, 2525 CostPerUseLimit, NewVRegs); 2526 if (CSRReg || !NewVRegs.empty()) 2527 // Return now if we decide to use a CSR or create new vregs due to 2528 // pre-splitting. 2529 return CSRReg; 2530 } else 2531 return PhysReg; 2532 } 2533 2534 LiveRangeStage Stage = getStage(VirtReg); 2535 DEBUG(dbgs() << StageName[Stage] 2536 << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); 2537 2538 // Try to evict a less worthy live range, but only for ranges from the primary 2539 // queue. The RS_Split ranges already failed to do this, and they should not 2540 // get a second chance until they have been split. 2541 if (Stage != RS_Split) 2542 if (unsigned PhysReg = 2543 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) { 2544 unsigned Hint = MRI->getSimpleHint(VirtReg.reg); 2545 // If VirtReg has a hint and that hint is broken record this 2546 // virtual register as a recoloring candidate for broken hint. 2547 // Indeed, since we evicted a variable in its neighborhood it is 2548 // likely we can at least partially recolor some of the 2549 // copy-related live-ranges. 2550 if (Hint && Hint != PhysReg) 2551 SetOfBrokenHints.insert(&VirtReg); 2552 return PhysReg; 2553 } 2554 2555 assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs"); 2556 2557 // The first time we see a live range, don't try to split or spill. 2558 // Wait until the second time, when all smaller ranges have been allocated. 2559 // This gives a better picture of the interference to split around. 2560 if (Stage < RS_Split) { 2561 setStage(VirtReg, RS_Split); 2562 DEBUG(dbgs() << "wait for second round\n"); 2563 NewVRegs.push_back(VirtReg.reg); 2564 return 0; 2565 } 2566 2567 if (Stage < RS_Spill) { 2568 // Try splitting VirtReg or interferences. 2569 unsigned NewVRegSizeBefore = NewVRegs.size(); 2570 unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); 2571 if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) 2572 return PhysReg; 2573 } 2574 2575 // If we couldn't allocate a register from spilling, there is probably some 2576 // invalid inline assembly. The base class wil report it. 2577 if (Stage >= RS_Done || !VirtReg.isSpillable()) 2578 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, 2579 Depth); 2580 2581 // Finally spill VirtReg itself. 2582 if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) { 2583 // TODO: This is experimental and in particular, we do not model 2584 // the live range splitting done by spilling correctly. 2585 // We would need a deep integration with the spiller to do the 2586 // right thing here. Anyway, that is still good for early testing. 2587 setStage(VirtReg, RS_Memory); 2588 DEBUG(dbgs() << "Do as if this register is in memory\n"); 2589 NewVRegs.push_back(VirtReg.reg); 2590 } else { 2591 NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); 2592 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 2593 spiller().spill(LRE); 2594 setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); 2595 2596 if (VerifyEnabled) 2597 MF->verify(this, "After spilling"); 2598 } 2599 2600 // The live virtual register requesting allocation was spilled, so tell 2601 // the caller not to allocate anything during this round. 2602 return 0; 2603 } 2604 2605 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { 2606 DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" 2607 << "********** Function: " << mf.getName() << '\n'); 2608 2609 MF = &mf; 2610 TRI = MF->getSubtarget().getRegisterInfo(); 2611 TII = MF->getSubtarget().getInstrInfo(); 2612 RCI.runOnMachineFunction(mf); 2613 2614 EnableLocalReassign = EnableLocalReassignment || 2615 MF->getSubtarget().enableRALocalReassignment( 2616 MF->getTarget().getOptLevel()); 2617 2618 if (VerifyEnabled) 2619 MF->verify(this, "Before greedy register allocator"); 2620 2621 RegAllocBase::init(getAnalysis<VirtRegMap>(), 2622 getAnalysis<LiveIntervals>(), 2623 getAnalysis<LiveRegMatrix>()); 2624 Indexes = &getAnalysis<SlotIndexes>(); 2625 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 2626 DomTree = &getAnalysis<MachineDominatorTree>(); 2627 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); 2628 Loops = &getAnalysis<MachineLoopInfo>(); 2629 Bundles = &getAnalysis<EdgeBundles>(); 2630 SpillPlacer = &getAnalysis<SpillPlacement>(); 2631 DebugVars = &getAnalysis<LiveDebugVariables>(); 2632 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 2633 2634 initializeCSRCost(); 2635 2636 calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); 2637 2638 DEBUG(LIS->dump()); 2639 2640 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); 2641 SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI)); 2642 ExtraRegInfo.clear(); 2643 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 2644 NextCascade = 1; 2645 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); 2646 GlobalCand.resize(32); // This will grow as needed. 2647 SetOfBrokenHints.clear(); 2648 2649 allocatePhysRegs(); 2650 tryHintsRecoloring(); 2651 postOptimization(); 2652 2653 releaseMemory(); 2654 return true; 2655 } 2656