1 //===- RegAllocGreedy.cpp - greedy register allocator ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the RAGreedy function pass for register allocation in 10 // optimized builds. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RegAllocGreedy.h" 15 #include "AllocationOrder.h" 16 #include "InterferenceCache.h" 17 #include "LiveDebugVariables.h" 18 #include "RegAllocBase.h" 19 #include "RegAllocEvictionAdvisor.h" 20 #include "SpillPlacement.h" 21 #include "SplitKit.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/ADT/BitVector.h" 24 #include "llvm/ADT/IndexedMap.h" 25 #include "llvm/ADT/SetVector.h" 26 #include "llvm/ADT/SmallPtrSet.h" 27 #include "llvm/ADT/SmallSet.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/Statistic.h" 30 #include "llvm/ADT/StringRef.h" 31 #include "llvm/Analysis/AliasAnalysis.h" 32 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 33 #include "llvm/CodeGen/CalcSpillWeights.h" 34 #include "llvm/CodeGen/EdgeBundles.h" 35 #include "llvm/CodeGen/LiveInterval.h" 36 #include "llvm/CodeGen/LiveIntervalUnion.h" 37 #include "llvm/CodeGen/LiveIntervals.h" 38 #include "llvm/CodeGen/LiveRangeEdit.h" 39 #include "llvm/CodeGen/LiveRegMatrix.h" 40 #include "llvm/CodeGen/LiveStacks.h" 41 #include "llvm/CodeGen/MachineBasicBlock.h" 42 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 43 #include "llvm/CodeGen/MachineDominators.h" 44 #include "llvm/CodeGen/MachineFrameInfo.h" 45 #include "llvm/CodeGen/MachineFunction.h" 46 #include "llvm/CodeGen/MachineFunctionPass.h" 47 #include "llvm/CodeGen/MachineInstr.h" 48 #include "llvm/CodeGen/MachineLoopInfo.h" 49 #include "llvm/CodeGen/MachineOperand.h" 50 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" 51 #include "llvm/CodeGen/MachineRegisterInfo.h" 52 #include "llvm/CodeGen/RegAllocRegistry.h" 53 #include "llvm/CodeGen/RegisterClassInfo.h" 54 #include "llvm/CodeGen/SlotIndexes.h" 55 #include "llvm/CodeGen/Spiller.h" 56 #include "llvm/CodeGen/TargetInstrInfo.h" 57 #include "llvm/CodeGen/TargetRegisterInfo.h" 58 #include "llvm/CodeGen/TargetSubtargetInfo.h" 59 #include "llvm/CodeGen/VirtRegMap.h" 60 #include "llvm/IR/DebugInfoMetadata.h" 61 #include "llvm/IR/Function.h" 62 #include "llvm/IR/LLVMContext.h" 63 #include "llvm/InitializePasses.h" 64 #include "llvm/MC/MCRegisterInfo.h" 65 #include "llvm/Pass.h" 66 #include "llvm/Support/BlockFrequency.h" 67 #include "llvm/Support/BranchProbability.h" 68 #include "llvm/Support/CommandLine.h" 69 #include "llvm/Support/Debug.h" 70 #include "llvm/Support/MathExtras.h" 71 #include "llvm/Support/Timer.h" 72 #include "llvm/Support/raw_ostream.h" 73 #include <algorithm> 74 #include <cassert> 75 #include <cstdint> 76 #include <utility> 77 78 using namespace llvm; 79 80 #define DEBUG_TYPE "regalloc" 81 82 STATISTIC(NumGlobalSplits, "Number of split global live ranges"); 83 STATISTIC(NumLocalSplits, "Number of split local live ranges"); 84 STATISTIC(NumEvicted, "Number of interferences evicted"); 85 86 static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode( 87 "split-spill-mode", cl::Hidden, 88 cl::desc("Spill mode for splitting live ranges"), 89 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), 90 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), 91 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")), 92 cl::init(SplitEditor::SM_Speed)); 93 94 static cl::opt<unsigned> 95 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, 96 cl::desc("Last chance recoloring max depth"), 97 cl::init(5)); 98 99 static cl::opt<unsigned> LastChanceRecoloringMaxInterference( 100 "lcr-max-interf", cl::Hidden, 101 cl::desc("Last chance recoloring maximum number of considered" 102 " interference at a time"), 103 cl::init(8)); 104 105 static cl::opt<bool> ExhaustiveSearch( 106 "exhaustive-register-search", cl::NotHidden, 107 cl::desc("Exhaustive Search for registers bypassing the depth " 108 "and interference cutoffs of last chance recoloring"), 109 cl::Hidden); 110 111 static cl::opt<bool> EnableDeferredSpilling( 112 "enable-deferred-spilling", cl::Hidden, 113 cl::desc("Instead of spilling a variable right away, defer the actual " 114 "code insertion to the end of the allocation. That way the " 115 "allocator might still find a suitable coloring for this " 116 "variable because of other evicted variables."), 117 cl::init(false)); 118 119 // FIXME: Find a good default for this flag and remove the flag. 120 static cl::opt<unsigned> 121 CSRFirstTimeCost("regalloc-csr-first-time-cost", 122 cl::desc("Cost for first time use of callee-saved register."), 123 cl::init(0), cl::Hidden); 124 125 static cl::opt<unsigned long> GrowRegionComplexityBudget( 126 "grow-region-complexity-budget", 127 cl::desc("growRegion() does not scale with the number of BB edges, so " 128 "limit its budget and bail out once we reach the limit."), 129 cl::init(10000), cl::Hidden); 130 131 static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness( 132 "greedy-regclass-priority-trumps-globalness", 133 cl::desc("Change the greedy register allocator's live range priority " 134 "calculation to make the AllocationPriority of the register class " 135 "more important then whether the range is global"), 136 cl::Hidden); 137 138 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", 139 createGreedyRegisterAllocator); 140 141 char RAGreedy::ID = 0; 142 char &llvm::RAGreedyID = RAGreedy::ID; 143 144 INITIALIZE_PASS_BEGIN(RAGreedy, "greedy", 145 "Greedy Register Allocator", false, false) 146 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) 147 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 148 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 149 INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) 150 INITIALIZE_PASS_DEPENDENCY(MachineScheduler) 151 INITIALIZE_PASS_DEPENDENCY(LiveStacks) 152 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 153 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 154 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 155 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) 156 INITIALIZE_PASS_DEPENDENCY(EdgeBundles) 157 INITIALIZE_PASS_DEPENDENCY(SpillPlacement) 158 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) 159 INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis) 160 INITIALIZE_PASS_END(RAGreedy, "greedy", 161 "Greedy Register Allocator", false, false) 162 163 #ifndef NDEBUG 164 const char *const RAGreedy::StageName[] = { 165 "RS_New", 166 "RS_Assign", 167 "RS_Split", 168 "RS_Split2", 169 "RS_Spill", 170 "RS_Memory", 171 "RS_Done" 172 }; 173 #endif 174 175 // Hysteresis to use when comparing floats. 176 // This helps stabilize decisions based on float comparisons. 177 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 178 179 FunctionPass* llvm::createGreedyRegisterAllocator() { 180 return new RAGreedy(); 181 } 182 183 FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) { 184 return new RAGreedy(Ftor); 185 } 186 187 RAGreedy::RAGreedy(RegClassFilterFunc F): 188 MachineFunctionPass(ID), 189 RegAllocBase(F) { 190 } 191 192 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { 193 AU.setPreservesCFG(); 194 AU.addRequired<MachineBlockFrequencyInfo>(); 195 AU.addPreserved<MachineBlockFrequencyInfo>(); 196 AU.addRequired<LiveIntervals>(); 197 AU.addPreserved<LiveIntervals>(); 198 AU.addRequired<SlotIndexes>(); 199 AU.addPreserved<SlotIndexes>(); 200 AU.addRequired<LiveDebugVariables>(); 201 AU.addPreserved<LiveDebugVariables>(); 202 AU.addRequired<LiveStacks>(); 203 AU.addPreserved<LiveStacks>(); 204 AU.addRequired<MachineDominatorTree>(); 205 AU.addPreserved<MachineDominatorTree>(); 206 AU.addRequired<MachineLoopInfo>(); 207 AU.addPreserved<MachineLoopInfo>(); 208 AU.addRequired<VirtRegMap>(); 209 AU.addPreserved<VirtRegMap>(); 210 AU.addRequired<LiveRegMatrix>(); 211 AU.addPreserved<LiveRegMatrix>(); 212 AU.addRequired<EdgeBundles>(); 213 AU.addRequired<SpillPlacement>(); 214 AU.addRequired<MachineOptimizationRemarkEmitterPass>(); 215 AU.addRequired<RegAllocEvictionAdvisorAnalysis>(); 216 MachineFunctionPass::getAnalysisUsage(AU); 217 } 218 219 //===----------------------------------------------------------------------===// 220 // LiveRangeEdit delegate methods 221 //===----------------------------------------------------------------------===// 222 223 bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) { 224 LiveInterval &LI = LIS->getInterval(VirtReg); 225 if (VRM->hasPhys(VirtReg)) { 226 Matrix->unassign(LI); 227 aboutToRemoveInterval(LI); 228 return true; 229 } 230 // Unassigned virtreg is probably in the priority queue. 231 // RegAllocBase will erase it after dequeueing. 232 // Nonetheless, clear the live-range so that the debug 233 // dump will show the right state for that VirtReg. 234 LI.clear(); 235 return false; 236 } 237 238 void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) { 239 if (!VRM->hasPhys(VirtReg)) 240 return; 241 242 // Register is assigned, put it back on the queue for reassignment. 243 LiveInterval &LI = LIS->getInterval(VirtReg); 244 Matrix->unassign(LI); 245 RegAllocBase::enqueue(&LI); 246 } 247 248 void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) { 249 ExtraInfo->LRE_DidCloneVirtReg(New, Old); 250 } 251 252 void RAGreedy::ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) { 253 // Cloning a register we haven't even heard about yet? Just ignore it. 254 if (!Info.inBounds(Old)) 255 return; 256 257 // LRE may clone a virtual register because dead code elimination causes it to 258 // be split into connected components. The new components are much smaller 259 // than the original, so they should get a new chance at being assigned. 260 // same stage as the parent. 261 Info[Old].Stage = RS_Assign; 262 Info.grow(New.id()); 263 Info[New] = Info[Old]; 264 } 265 266 void RAGreedy::releaseMemory() { 267 SpillerInstance.reset(); 268 GlobalCand.clear(); 269 } 270 271 void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); } 272 273 void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) { 274 // Prioritize live ranges by size, assigning larger ranges first. 275 // The queue holds (size, reg) pairs. 276 const unsigned Size = LI->getSize(); 277 const Register Reg = LI->reg(); 278 assert(Reg.isVirtual() && "Can only enqueue virtual registers"); 279 unsigned Prio; 280 281 auto Stage = ExtraInfo->getOrInitStage(Reg); 282 if (Stage == RS_New) { 283 Stage = RS_Assign; 284 ExtraInfo->setStage(Reg, Stage); 285 } 286 if (Stage == RS_Split) { 287 // Unsplit ranges that couldn't be allocated immediately are deferred until 288 // everything else has been allocated. 289 Prio = Size; 290 } else if (Stage == RS_Memory) { 291 // Memory operand should be considered last. 292 // Change the priority such that Memory operand are assigned in 293 // the reverse order that they came in. 294 // TODO: Make this a member variable and probably do something about hints. 295 static unsigned MemOp = 0; 296 Prio = MemOp++; 297 } else { 298 // Giant live ranges fall back to the global assignment heuristic, which 299 // prevents excessive spilling in pathological cases. 300 bool ReverseLocal = TRI->reverseLocalAssignment(); 301 const TargetRegisterClass &RC = *MRI->getRegClass(Reg); 302 bool ForceGlobal = 303 !ReverseLocal && (Size / SlotIndex::InstrDist) > 304 (2 * RegClassInfo.getNumAllocatableRegs(&RC)); 305 unsigned GlobalBit = 0; 306 307 if (Stage == RS_Assign && !ForceGlobal && !LI->empty() && 308 LIS->intervalIsInOneMBB(*LI)) { 309 // Allocate original local ranges in linear instruction order. Since they 310 // are singly defined, this produces optimal coloring in the absence of 311 // global interference and other constraints. 312 if (!ReverseLocal) 313 Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); 314 else { 315 // Allocating bottom up may allow many short LRGs to be assigned first 316 // to one of the cheap registers. This could be much faster for very 317 // large blocks on targets with many physical registers. 318 Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex()); 319 } 320 } else { 321 // Allocate global and split ranges in long->short order. Long ranges that 322 // don't fit should be spilled (or split) ASAP so they don't create 323 // interference. Mark a bit to prioritize global above local ranges. 324 Prio = Size; 325 GlobalBit = 1; 326 } 327 if (RegClassPriorityTrumpsGlobalness) 328 Prio |= RC.AllocationPriority << 25 | GlobalBit << 24; 329 else 330 Prio |= GlobalBit << 29 | RC.AllocationPriority << 24; 331 332 // Mark a higher bit to prioritize global and local above RS_Split. 333 Prio |= (1u << 31); 334 335 // Boost ranges that have a physical register hint. 336 if (VRM->hasKnownPreference(Reg)) 337 Prio |= (1u << 30); 338 } 339 // The virtual register number is a tie breaker for same-sized ranges. 340 // Give lower vreg numbers higher priority to assign them first. 341 CurQueue.push(std::make_pair(Prio, ~Reg)); 342 } 343 344 const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } 345 346 const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { 347 if (CurQueue.empty()) 348 return nullptr; 349 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); 350 CurQueue.pop(); 351 return LI; 352 } 353 354 //===----------------------------------------------------------------------===// 355 // Direct Assignment 356 //===----------------------------------------------------------------------===// 357 358 /// tryAssign - Try to assign VirtReg to an available register. 359 MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg, 360 AllocationOrder &Order, 361 SmallVectorImpl<Register> &NewVRegs, 362 const SmallVirtRegSet &FixedRegisters) { 363 MCRegister PhysReg; 364 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) { 365 assert(*I); 366 if (!Matrix->checkInterference(VirtReg, *I)) { 367 if (I.isHint()) 368 return *I; 369 else 370 PhysReg = *I; 371 } 372 } 373 if (!PhysReg.isValid()) 374 return PhysReg; 375 376 // PhysReg is available, but there may be a better choice. 377 378 // If we missed a simple hint, try to cheaply evict interference from the 379 // preferred register. 380 if (Register Hint = MRI->getSimpleHint(VirtReg.reg())) 381 if (Order.isHint(Hint)) { 382 MCRegister PhysHint = Hint.asMCReg(); 383 LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n'); 384 385 if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint, 386 FixedRegisters)) { 387 evictInterference(VirtReg, PhysHint, NewVRegs); 388 return PhysHint; 389 } 390 // Record the missed hint, we may be able to recover 391 // at the end if the surrounding allocation changed. 392 SetOfBrokenHints.insert(&VirtReg); 393 } 394 395 // Try to evict interference from a cheaper alternative. 396 uint8_t Cost = RegCosts[PhysReg]; 397 398 // Most registers have 0 additional cost. 399 if (!Cost) 400 return PhysReg; 401 402 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " 403 << (unsigned)Cost << '\n'); 404 MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters); 405 return CheapReg ? CheapReg : PhysReg; 406 } 407 408 //===----------------------------------------------------------------------===// 409 // Interference eviction 410 //===----------------------------------------------------------------------===// 411 412 Register RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg, 413 Register PrevReg) const { 414 auto Order = 415 AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); 416 MCRegister PhysReg; 417 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) { 418 if ((*I).id() == PrevReg.id()) 419 continue; 420 421 MCRegUnitIterator Units(*I, TRI); 422 for (; Units.isValid(); ++Units) { 423 // Instantiate a "subquery", not to be confused with the Queries array. 424 LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]); 425 if (subQ.checkInterference()) 426 break; 427 } 428 // If no units have interference, break out with the current PhysReg. 429 if (!Units.isValid()) 430 PhysReg = *I; 431 } 432 if (PhysReg) 433 LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from " 434 << printReg(PrevReg, TRI) << " to " 435 << printReg(PhysReg, TRI) << '\n'); 436 return PhysReg; 437 } 438 439 /// evictInterference - Evict any interferring registers that prevent VirtReg 440 /// from being assigned to Physreg. This assumes that canEvictInterference 441 /// returned true. 442 void RAGreedy::evictInterference(const LiveInterval &VirtReg, 443 MCRegister PhysReg, 444 SmallVectorImpl<Register> &NewVRegs) { 445 // Make sure that VirtReg has a cascade number, and assign that cascade 446 // number to every evicted register. These live ranges than then only be 447 // evicted by a newer cascade, preventing infinite loops. 448 unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg()); 449 450 LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI) 451 << " interference: Cascade " << Cascade << '\n'); 452 453 // Collect all interfering virtregs first. 454 SmallVector<const LiveInterval *, 8> Intfs; 455 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 456 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 457 // We usually have the interfering VRegs cached so collectInterferingVRegs() 458 // should be fast, we may need to recalculate if when different physregs 459 // overlap the same register unit so we had different SubRanges queried 460 // against it. 461 ArrayRef<const LiveInterval *> IVR = Q.interferingVRegs(); 462 Intfs.append(IVR.begin(), IVR.end()); 463 } 464 465 // Evict them second. This will invalidate the queries. 466 for (const LiveInterval *Intf : Intfs) { 467 // The same VirtReg may be present in multiple RegUnits. Skip duplicates. 468 if (!VRM->hasPhys(Intf->reg())) 469 continue; 470 471 Matrix->unassign(*Intf); 472 assert((ExtraInfo->getCascade(Intf->reg()) < Cascade || 473 VirtReg.isSpillable() < Intf->isSpillable()) && 474 "Cannot decrease cascade number, illegal eviction"); 475 ExtraInfo->setCascade(Intf->reg(), Cascade); 476 ++NumEvicted; 477 NewVRegs.push_back(Intf->reg()); 478 } 479 } 480 481 /// Returns true if the given \p PhysReg is a callee saved register and has not 482 /// been used for allocation yet. 483 bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const { 484 MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg); 485 if (!CSR) 486 return false; 487 488 return !Matrix->isPhysRegUsed(PhysReg); 489 } 490 491 Optional<unsigned> 492 RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg, 493 const AllocationOrder &Order, 494 unsigned CostPerUseLimit) const { 495 unsigned OrderLimit = Order.getOrder().size(); 496 497 if (CostPerUseLimit < uint8_t(~0u)) { 498 // Check of any registers in RC are below CostPerUseLimit. 499 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg()); 500 uint8_t MinCost = RegClassInfo.getMinCost(RC); 501 if (MinCost >= CostPerUseLimit) { 502 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " 503 << MinCost << ", no cheaper registers to be found.\n"); 504 return None; 505 } 506 507 // It is normal for register classes to have a long tail of registers with 508 // the same cost. We don't need to look at them if they're too expensive. 509 if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) { 510 OrderLimit = RegClassInfo.getLastCostChange(RC); 511 LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit 512 << " regs.\n"); 513 } 514 } 515 return OrderLimit; 516 } 517 518 bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit, 519 MCRegister PhysReg) const { 520 if (RegCosts[PhysReg] >= CostPerUseLimit) 521 return false; 522 // The first use of a callee-saved register in a function has cost 1. 523 // Don't start using a CSR when the CostPerUseLimit is low. 524 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) { 525 LLVM_DEBUG( 526 dbgs() << printReg(PhysReg, TRI) << " would clobber CSR " 527 << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) 528 << '\n'); 529 return false; 530 } 531 return true; 532 } 533 534 /// tryEvict - Try to evict all interferences for a physreg. 535 /// @param VirtReg Currently unassigned virtual register. 536 /// @param Order Physregs to try. 537 /// @return Physreg to assign VirtReg, or 0. 538 MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg, 539 AllocationOrder &Order, 540 SmallVectorImpl<Register> &NewVRegs, 541 uint8_t CostPerUseLimit, 542 const SmallVirtRegSet &FixedRegisters) { 543 NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription, 544 TimePassesIsEnabled); 545 546 MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate( 547 VirtReg, Order, CostPerUseLimit, FixedRegisters); 548 if (BestPhys.isValid()) 549 evictInterference(VirtReg, BestPhys, NewVRegs); 550 return BestPhys; 551 } 552 553 //===----------------------------------------------------------------------===// 554 // Region Splitting 555 //===----------------------------------------------------------------------===// 556 557 /// addSplitConstraints - Fill out the SplitConstraints vector based on the 558 /// interference pattern in Physreg and its aliases. Add the constraints to 559 /// SpillPlacement and return the static cost of this split in Cost, assuming 560 /// that all preferences in SplitConstraints are met. 561 /// Return false if there are no bundles with positive bias. 562 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, 563 BlockFrequency &Cost) { 564 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 565 566 // Reset interference dependent info. 567 SplitConstraints.resize(UseBlocks.size()); 568 BlockFrequency StaticCost = 0; 569 for (unsigned I = 0; I != UseBlocks.size(); ++I) { 570 const SplitAnalysis::BlockInfo &BI = UseBlocks[I]; 571 SpillPlacement::BlockConstraint &BC = SplitConstraints[I]; 572 573 BC.Number = BI.MBB->getNumber(); 574 Intf.moveToBlock(BC.Number); 575 BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 576 BC.Exit = (BI.LiveOut && 577 !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef()) 578 ? SpillPlacement::PrefReg 579 : SpillPlacement::DontCare; 580 BC.ChangesValue = BI.FirstDef.isValid(); 581 582 if (!Intf.hasInterference()) 583 continue; 584 585 // Number of spill code instructions to insert. 586 unsigned Ins = 0; 587 588 // Interference for the live-in value. 589 if (BI.LiveIn) { 590 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) { 591 BC.Entry = SpillPlacement::MustSpill; 592 ++Ins; 593 } else if (Intf.first() < BI.FirstInstr) { 594 BC.Entry = SpillPlacement::PrefSpill; 595 ++Ins; 596 } else if (Intf.first() < BI.LastInstr) { 597 ++Ins; 598 } 599 600 // Abort if the spill cannot be inserted at the MBB' start 601 if (((BC.Entry == SpillPlacement::MustSpill) || 602 (BC.Entry == SpillPlacement::PrefSpill)) && 603 SlotIndex::isEarlierInstr(BI.FirstInstr, 604 SA->getFirstSplitPoint(BC.Number))) 605 return false; 606 } 607 608 // Interference for the live-out value. 609 if (BI.LiveOut) { 610 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) { 611 BC.Exit = SpillPlacement::MustSpill; 612 ++Ins; 613 } else if (Intf.last() > BI.LastInstr) { 614 BC.Exit = SpillPlacement::PrefSpill; 615 ++Ins; 616 } else if (Intf.last() > BI.FirstInstr) { 617 ++Ins; 618 } 619 } 620 621 // Accumulate the total frequency of inserted spill code. 622 while (Ins--) 623 StaticCost += SpillPlacer->getBlockFrequency(BC.Number); 624 } 625 Cost = StaticCost; 626 627 // Add constraints for use-blocks. Note that these are the only constraints 628 // that may add a positive bias, it is downhill from here. 629 SpillPlacer->addConstraints(SplitConstraints); 630 return SpillPlacer->scanActiveBundles(); 631 } 632 633 /// addThroughConstraints - Add constraints and links to SpillPlacer from the 634 /// live-through blocks in Blocks. 635 bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, 636 ArrayRef<unsigned> Blocks) { 637 const unsigned GroupSize = 8; 638 SpillPlacement::BlockConstraint BCS[GroupSize]; 639 unsigned TBS[GroupSize]; 640 unsigned B = 0, T = 0; 641 642 for (unsigned Number : Blocks) { 643 Intf.moveToBlock(Number); 644 645 if (!Intf.hasInterference()) { 646 assert(T < GroupSize && "Array overflow"); 647 TBS[T] = Number; 648 if (++T == GroupSize) { 649 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 650 T = 0; 651 } 652 continue; 653 } 654 655 assert(B < GroupSize && "Array overflow"); 656 BCS[B].Number = Number; 657 658 // Abort if the spill cannot be inserted at the MBB' start 659 MachineBasicBlock *MBB = MF->getBlockNumbered(Number); 660 auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr(); 661 if (FirstNonDebugInstr != MBB->end() && 662 SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr), 663 SA->getFirstSplitPoint(Number))) 664 return false; 665 // Interference for the live-in value. 666 if (Intf.first() <= Indexes->getMBBStartIdx(Number)) 667 BCS[B].Entry = SpillPlacement::MustSpill; 668 else 669 BCS[B].Entry = SpillPlacement::PrefSpill; 670 671 // Interference for the live-out value. 672 if (Intf.last() >= SA->getLastSplitPoint(Number)) 673 BCS[B].Exit = SpillPlacement::MustSpill; 674 else 675 BCS[B].Exit = SpillPlacement::PrefSpill; 676 677 if (++B == GroupSize) { 678 SpillPlacer->addConstraints(makeArrayRef(BCS, B)); 679 B = 0; 680 } 681 } 682 683 SpillPlacer->addConstraints(makeArrayRef(BCS, B)); 684 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 685 return true; 686 } 687 688 bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) { 689 // Keep track of through blocks that have not been added to SpillPlacer. 690 BitVector Todo = SA->getThroughBlocks(); 691 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; 692 unsigned AddedTo = 0; 693 #ifndef NDEBUG 694 unsigned Visited = 0; 695 #endif 696 697 unsigned long Budget = GrowRegionComplexityBudget; 698 while (true) { 699 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); 700 // Find new through blocks in the periphery of PrefRegBundles. 701 for (unsigned Bundle : NewBundles) { 702 // Look at all blocks connected to Bundle in the full graph. 703 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); 704 // Limit compilation time by bailing out after we use all our budget. 705 if (Blocks.size() >= Budget) 706 return false; 707 Budget -= Blocks.size(); 708 for (unsigned Block : Blocks) { 709 if (!Todo.test(Block)) 710 continue; 711 Todo.reset(Block); 712 // This is a new through block. Add it to SpillPlacer later. 713 ActiveBlocks.push_back(Block); 714 #ifndef NDEBUG 715 ++Visited; 716 #endif 717 } 718 } 719 // Any new blocks to add? 720 if (ActiveBlocks.size() == AddedTo) 721 break; 722 723 // Compute through constraints from the interference, or assume that all 724 // through blocks prefer spilling when forming compact regions. 725 auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); 726 if (Cand.PhysReg) { 727 if (!addThroughConstraints(Cand.Intf, NewBlocks)) 728 return false; 729 } else 730 // Provide a strong negative bias on through blocks to prevent unwanted 731 // liveness on loop backedges. 732 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); 733 AddedTo = ActiveBlocks.size(); 734 735 // Perhaps iterating can enable more bundles? 736 SpillPlacer->iterate(); 737 } 738 LLVM_DEBUG(dbgs() << ", v=" << Visited); 739 return true; 740 } 741 742 /// calcCompactRegion - Compute the set of edge bundles that should be live 743 /// when splitting the current live range into compact regions. Compact 744 /// regions can be computed without looking at interference. They are the 745 /// regions formed by removing all the live-through blocks from the live range. 746 /// 747 /// Returns false if the current live range is already compact, or if the 748 /// compact regions would form single block regions anyway. 749 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { 750 // Without any through blocks, the live range is already compact. 751 if (!SA->getNumThroughBlocks()) 752 return false; 753 754 // Compact regions don't correspond to any physreg. 755 Cand.reset(IntfCache, MCRegister::NoRegister); 756 757 LLVM_DEBUG(dbgs() << "Compact region bundles"); 758 759 // Use the spill placer to determine the live bundles. GrowRegion pretends 760 // that all the through blocks have interference when PhysReg is unset. 761 SpillPlacer->prepare(Cand.LiveBundles); 762 763 // The static split cost will be zero since Cand.Intf reports no interference. 764 BlockFrequency Cost; 765 if (!addSplitConstraints(Cand.Intf, Cost)) { 766 LLVM_DEBUG(dbgs() << ", none.\n"); 767 return false; 768 } 769 770 if (!growRegion(Cand)) { 771 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n"); 772 return false; 773 } 774 775 SpillPlacer->finish(); 776 777 if (!Cand.LiveBundles.any()) { 778 LLVM_DEBUG(dbgs() << ", none.\n"); 779 return false; 780 } 781 782 LLVM_DEBUG({ 783 for (int I : Cand.LiveBundles.set_bits()) 784 dbgs() << " EB#" << I; 785 dbgs() << ".\n"; 786 }); 787 return true; 788 } 789 790 /// calcSpillCost - Compute how expensive it would be to split the live range in 791 /// SA around all use blocks instead of forming bundle regions. 792 BlockFrequency RAGreedy::calcSpillCost() { 793 BlockFrequency Cost = 0; 794 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 795 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) { 796 unsigned Number = BI.MBB->getNumber(); 797 // We normally only need one spill instruction - a load or a store. 798 Cost += SpillPlacer->getBlockFrequency(Number); 799 800 // Unless the value is redefined in the block. 801 if (BI.LiveIn && BI.LiveOut && BI.FirstDef) 802 Cost += SpillPlacer->getBlockFrequency(Number); 803 } 804 return Cost; 805 } 806 807 /// calcGlobalSplitCost - Return the global split cost of following the split 808 /// pattern in LiveBundles. This cost should be added to the local cost of the 809 /// interference pattern in SplitConstraints. 810 /// 811 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, 812 const AllocationOrder &Order) { 813 BlockFrequency GlobalCost = 0; 814 const BitVector &LiveBundles = Cand.LiveBundles; 815 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 816 for (unsigned I = 0; I != UseBlocks.size(); ++I) { 817 const SplitAnalysis::BlockInfo &BI = UseBlocks[I]; 818 SpillPlacement::BlockConstraint &BC = SplitConstraints[I]; 819 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)]; 820 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)]; 821 unsigned Ins = 0; 822 823 Cand.Intf.moveToBlock(BC.Number); 824 825 if (BI.LiveIn) 826 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); 827 if (BI.LiveOut) 828 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); 829 while (Ins--) 830 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); 831 } 832 833 for (unsigned Number : Cand.ActiveBlocks) { 834 bool RegIn = LiveBundles[Bundles->getBundle(Number, false)]; 835 bool RegOut = LiveBundles[Bundles->getBundle(Number, true)]; 836 if (!RegIn && !RegOut) 837 continue; 838 if (RegIn && RegOut) { 839 // We need double spill code if this block has interference. 840 Cand.Intf.moveToBlock(Number); 841 if (Cand.Intf.hasInterference()) { 842 GlobalCost += SpillPlacer->getBlockFrequency(Number); 843 GlobalCost += SpillPlacer->getBlockFrequency(Number); 844 } 845 continue; 846 } 847 // live-in / stack-out or stack-in live-out. 848 GlobalCost += SpillPlacer->getBlockFrequency(Number); 849 } 850 return GlobalCost; 851 } 852 853 /// splitAroundRegion - Split the current live range around the regions 854 /// determined by BundleCand and GlobalCand. 855 /// 856 /// Before calling this function, GlobalCand and BundleCand must be initialized 857 /// so each bundle is assigned to a valid candidate, or NoCand for the 858 /// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor 859 /// objects must be initialized for the current live range, and intervals 860 /// created for the used candidates. 861 /// 862 /// @param LREdit The LiveRangeEdit object handling the current split. 863 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value 864 /// must appear in this list. 865 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, 866 ArrayRef<unsigned> UsedCands) { 867 // These are the intervals created for new global ranges. We may create more 868 // intervals for local ranges. 869 const unsigned NumGlobalIntvs = LREdit.size(); 870 LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs 871 << " globals.\n"); 872 assert(NumGlobalIntvs && "No global intervals configured"); 873 874 // Isolate even single instructions when dealing with a proper sub-class. 875 // That guarantees register class inflation for the stack interval because it 876 // is all copies. 877 Register Reg = SA->getParent().reg(); 878 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 879 880 // First handle all the blocks with uses. 881 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 882 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) { 883 unsigned Number = BI.MBB->getNumber(); 884 unsigned IntvIn = 0, IntvOut = 0; 885 SlotIndex IntfIn, IntfOut; 886 if (BI.LiveIn) { 887 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)]; 888 if (CandIn != NoCand) { 889 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 890 IntvIn = Cand.IntvIdx; 891 Cand.Intf.moveToBlock(Number); 892 IntfIn = Cand.Intf.first(); 893 } 894 } 895 if (BI.LiveOut) { 896 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)]; 897 if (CandOut != NoCand) { 898 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 899 IntvOut = Cand.IntvIdx; 900 Cand.Intf.moveToBlock(Number); 901 IntfOut = Cand.Intf.last(); 902 } 903 } 904 905 // Create separate intervals for isolated blocks with multiple uses. 906 if (!IntvIn && !IntvOut) { 907 LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n"); 908 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 909 SE->splitSingleBlock(BI); 910 continue; 911 } 912 913 if (IntvIn && IntvOut) 914 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 915 else if (IntvIn) 916 SE->splitRegInBlock(BI, IntvIn, IntfIn); 917 else 918 SE->splitRegOutBlock(BI, IntvOut, IntfOut); 919 } 920 921 // Handle live-through blocks. The relevant live-through blocks are stored in 922 // the ActiveBlocks list with each candidate. We need to filter out 923 // duplicates. 924 BitVector Todo = SA->getThroughBlocks(); 925 for (unsigned UsedCand : UsedCands) { 926 ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks; 927 for (unsigned Number : Blocks) { 928 if (!Todo.test(Number)) 929 continue; 930 Todo.reset(Number); 931 932 unsigned IntvIn = 0, IntvOut = 0; 933 SlotIndex IntfIn, IntfOut; 934 935 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)]; 936 if (CandIn != NoCand) { 937 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 938 IntvIn = Cand.IntvIdx; 939 Cand.Intf.moveToBlock(Number); 940 IntfIn = Cand.Intf.first(); 941 } 942 943 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)]; 944 if (CandOut != NoCand) { 945 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 946 IntvOut = Cand.IntvIdx; 947 Cand.Intf.moveToBlock(Number); 948 IntfOut = Cand.Intf.last(); 949 } 950 if (!IntvIn && !IntvOut) 951 continue; 952 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 953 } 954 } 955 956 ++NumGlobalSplits; 957 958 SmallVector<unsigned, 8> IntvMap; 959 SE->finish(&IntvMap); 960 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 961 962 unsigned OrigBlocks = SA->getNumLiveBlocks(); 963 964 // Sort out the new intervals created by splitting. We get four kinds: 965 // - Remainder intervals should not be split again. 966 // - Candidate intervals can be assigned to Cand.PhysReg. 967 // - Block-local splits are candidates for local splitting. 968 // - DCE leftovers should go back on the queue. 969 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) { 970 const LiveInterval &Reg = LIS->getInterval(LREdit.get(I)); 971 972 // Ignore old intervals from DCE. 973 if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New) 974 continue; 975 976 // Remainder interval. Don't try splitting again, spill if it doesn't 977 // allocate. 978 if (IntvMap[I] == 0) { 979 ExtraInfo->setStage(Reg, RS_Spill); 980 continue; 981 } 982 983 // Global intervals. Allow repeated splitting as long as the number of live 984 // blocks is strictly decreasing. 985 if (IntvMap[I] < NumGlobalIntvs) { 986 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { 987 LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks 988 << " blocks as original.\n"); 989 // Don't allow repeated splitting as a safe guard against looping. 990 ExtraInfo->setStage(Reg, RS_Split2); 991 } 992 continue; 993 } 994 995 // Other intervals are treated as new. This includes local intervals created 996 // for blocks with multiple uses, and anything created by DCE. 997 } 998 999 if (VerifyEnabled) 1000 MF->verify(this, "After splitting live range around region"); 1001 } 1002 1003 MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg, 1004 AllocationOrder &Order, 1005 SmallVectorImpl<Register> &NewVRegs) { 1006 if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg)) 1007 return MCRegister::NoRegister; 1008 unsigned NumCands = 0; 1009 BlockFrequency SpillCost = calcSpillCost(); 1010 BlockFrequency BestCost; 1011 1012 // Check if we can split this live range around a compact region. 1013 bool HasCompact = calcCompactRegion(GlobalCand.front()); 1014 if (HasCompact) { 1015 // Yes, keep GlobalCand[0] as the compact region candidate. 1016 NumCands = 1; 1017 BestCost = BlockFrequency::getMaxFrequency(); 1018 } else { 1019 // No benefit from the compact region, our fallback will be per-block 1020 // splitting. Make sure we find a solution that is cheaper than spilling. 1021 BestCost = SpillCost; 1022 LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = "; 1023 MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); 1024 } 1025 1026 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, 1027 NumCands, false /*IgnoreCSR*/); 1028 1029 // No solutions found, fall back to single block splitting. 1030 if (!HasCompact && BestCand == NoCand) 1031 return MCRegister::NoRegister; 1032 1033 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); 1034 } 1035 1036 unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg, 1037 AllocationOrder &Order, 1038 BlockFrequency &BestCost, 1039 unsigned &NumCands, 1040 bool IgnoreCSR) { 1041 unsigned BestCand = NoCand; 1042 for (MCPhysReg PhysReg : Order) { 1043 assert(PhysReg); 1044 if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg)) 1045 continue; 1046 1047 // Discard bad candidates before we run out of interference cache cursors. 1048 // This will only affect register classes with a lot of registers (>32). 1049 if (NumCands == IntfCache.getMaxCursors()) { 1050 unsigned WorstCount = ~0u; 1051 unsigned Worst = 0; 1052 for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) { 1053 if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg) 1054 continue; 1055 unsigned Count = GlobalCand[CandIndex].LiveBundles.count(); 1056 if (Count < WorstCount) { 1057 Worst = CandIndex; 1058 WorstCount = Count; 1059 } 1060 } 1061 --NumCands; 1062 GlobalCand[Worst] = GlobalCand[NumCands]; 1063 if (BestCand == NumCands) 1064 BestCand = Worst; 1065 } 1066 1067 if (GlobalCand.size() <= NumCands) 1068 GlobalCand.resize(NumCands+1); 1069 GlobalSplitCandidate &Cand = GlobalCand[NumCands]; 1070 Cand.reset(IntfCache, PhysReg); 1071 1072 SpillPlacer->prepare(Cand.LiveBundles); 1073 BlockFrequency Cost; 1074 if (!addSplitConstraints(Cand.Intf, Cost)) { 1075 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n"); 1076 continue; 1077 } 1078 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = "; 1079 MBFI->printBlockFreq(dbgs(), Cost)); 1080 if (Cost >= BestCost) { 1081 LLVM_DEBUG({ 1082 if (BestCand == NoCand) 1083 dbgs() << " worse than no bundles\n"; 1084 else 1085 dbgs() << " worse than " 1086 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; 1087 }); 1088 continue; 1089 } 1090 if (!growRegion(Cand)) { 1091 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n"); 1092 continue; 1093 } 1094 1095 SpillPlacer->finish(); 1096 1097 // No live bundles, defer to splitSingleBlocks(). 1098 if (!Cand.LiveBundles.any()) { 1099 LLVM_DEBUG(dbgs() << " no bundles.\n"); 1100 continue; 1101 } 1102 1103 Cost += calcGlobalSplitCost(Cand, Order); 1104 LLVM_DEBUG({ 1105 dbgs() << ", total = "; 1106 MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; 1107 for (int I : Cand.LiveBundles.set_bits()) 1108 dbgs() << " EB#" << I; 1109 dbgs() << ".\n"; 1110 }); 1111 if (Cost < BestCost) { 1112 BestCand = NumCands; 1113 BestCost = Cost; 1114 } 1115 ++NumCands; 1116 } 1117 1118 return BestCand; 1119 } 1120 1121 unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand, 1122 bool HasCompact, 1123 SmallVectorImpl<Register> &NewVRegs) { 1124 SmallVector<unsigned, 8> UsedCands; 1125 // Prepare split editor. 1126 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1127 SE->reset(LREdit, SplitSpillMode); 1128 1129 // Assign all edge bundles to the preferred candidate, or NoCand. 1130 BundleCand.assign(Bundles->getNumBundles(), NoCand); 1131 1132 // Assign bundles for the best candidate region. 1133 if (BestCand != NoCand) { 1134 GlobalSplitCandidate &Cand = GlobalCand[BestCand]; 1135 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { 1136 UsedCands.push_back(BestCand); 1137 Cand.IntvIdx = SE->openIntv(); 1138 LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in " 1139 << B << " bundles, intv " << Cand.IntvIdx << ".\n"); 1140 (void)B; 1141 } 1142 } 1143 1144 // Assign bundles for the compact region. 1145 if (HasCompact) { 1146 GlobalSplitCandidate &Cand = GlobalCand.front(); 1147 assert(!Cand.PhysReg && "Compact region has no physreg"); 1148 if (unsigned B = Cand.getBundles(BundleCand, 0)) { 1149 UsedCands.push_back(0); 1150 Cand.IntvIdx = SE->openIntv(); 1151 LLVM_DEBUG(dbgs() << "Split for compact region in " << B 1152 << " bundles, intv " << Cand.IntvIdx << ".\n"); 1153 (void)B; 1154 } 1155 } 1156 1157 splitAroundRegion(LREdit, UsedCands); 1158 return 0; 1159 } 1160 1161 //===----------------------------------------------------------------------===// 1162 // Per-Block Splitting 1163 //===----------------------------------------------------------------------===// 1164 1165 /// tryBlockSplit - Split a global live range around every block with uses. This 1166 /// creates a lot of local live ranges, that will be split by tryLocalSplit if 1167 /// they don't allocate. 1168 unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg, 1169 AllocationOrder &Order, 1170 SmallVectorImpl<Register> &NewVRegs) { 1171 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); 1172 Register Reg = VirtReg.reg(); 1173 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1174 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1175 SE->reset(LREdit, SplitSpillMode); 1176 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1177 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) { 1178 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1179 SE->splitSingleBlock(BI); 1180 } 1181 // No blocks were split. 1182 if (LREdit.empty()) 1183 return 0; 1184 1185 // We did split for some blocks. 1186 SmallVector<unsigned, 8> IntvMap; 1187 SE->finish(&IntvMap); 1188 1189 // Tell LiveDebugVariables about the new ranges. 1190 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1191 1192 // Sort out the new intervals created by splitting. The remainder interval 1193 // goes straight to spilling, the new local ranges get to stay RS_New. 1194 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) { 1195 const LiveInterval &LI = LIS->getInterval(LREdit.get(I)); 1196 if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0) 1197 ExtraInfo->setStage(LI, RS_Spill); 1198 } 1199 1200 if (VerifyEnabled) 1201 MF->verify(this, "After splitting live range around basic blocks"); 1202 return 0; 1203 } 1204 1205 //===----------------------------------------------------------------------===// 1206 // Per-Instruction Splitting 1207 //===----------------------------------------------------------------------===// 1208 1209 /// Get the number of allocatable registers that match the constraints of \p Reg 1210 /// on \p MI and that are also in \p SuperRC. 1211 static unsigned getNumAllocatableRegsForConstraints( 1212 const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC, 1213 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, 1214 const RegisterClassInfo &RCI) { 1215 assert(SuperRC && "Invalid register class"); 1216 1217 const TargetRegisterClass *ConstrainedRC = 1218 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, 1219 /* ExploreBundle */ true); 1220 if (!ConstrainedRC) 1221 return 0; 1222 return RCI.getNumAllocatableRegs(ConstrainedRC); 1223 } 1224 1225 /// tryInstructionSplit - Split a live range around individual instructions. 1226 /// This is normally not worthwhile since the spiller is doing essentially the 1227 /// same thing. However, when the live range is in a constrained register 1228 /// class, it may help to insert copies such that parts of the live range can 1229 /// be moved to a larger register class. 1230 /// 1231 /// This is similar to spilling to a larger register class. 1232 unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg, 1233 AllocationOrder &Order, 1234 SmallVectorImpl<Register> &NewVRegs) { 1235 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); 1236 // There is no point to this if there are no larger sub-classes. 1237 if (!RegClassInfo.isProperSubClass(CurRC)) 1238 return 0; 1239 1240 // Always enable split spill mode, since we're effectively spilling to a 1241 // register. 1242 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1243 SE->reset(LREdit, SplitEditor::SM_Size); 1244 1245 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1246 if (Uses.size() <= 1) 1247 return 0; 1248 1249 LLVM_DEBUG(dbgs() << "Split around " << Uses.size() 1250 << " individual instrs.\n"); 1251 1252 const TargetRegisterClass *SuperRC = 1253 TRI->getLargestLegalSuperClass(CurRC, *MF); 1254 unsigned SuperRCNumAllocatableRegs = 1255 RegClassInfo.getNumAllocatableRegs(SuperRC); 1256 // Split around every non-copy instruction if this split will relax 1257 // the constraints on the virtual register. 1258 // Otherwise, splitting just inserts uncoalescable copies that do not help 1259 // the allocation. 1260 for (const SlotIndex Use : Uses) { 1261 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) 1262 if (MI->isFullCopy() || 1263 SuperRCNumAllocatableRegs == 1264 getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC, 1265 TII, TRI, RegClassInfo)) { 1266 LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI); 1267 continue; 1268 } 1269 SE->openIntv(); 1270 SlotIndex SegStart = SE->enterIntvBefore(Use); 1271 SlotIndex SegStop = SE->leaveIntvAfter(Use); 1272 SE->useIntv(SegStart, SegStop); 1273 } 1274 1275 if (LREdit.empty()) { 1276 LLVM_DEBUG(dbgs() << "All uses were copies.\n"); 1277 return 0; 1278 } 1279 1280 SmallVector<unsigned, 8> IntvMap; 1281 SE->finish(&IntvMap); 1282 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS); 1283 // Assign all new registers to RS_Spill. This was the last chance. 1284 ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill); 1285 return 0; 1286 } 1287 1288 //===----------------------------------------------------------------------===// 1289 // Local Splitting 1290 //===----------------------------------------------------------------------===// 1291 1292 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted 1293 /// in order to use PhysReg between two entries in SA->UseSlots. 1294 /// 1295 /// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1]. 1296 /// 1297 void RAGreedy::calcGapWeights(MCRegister PhysReg, 1298 SmallVectorImpl<float> &GapWeight) { 1299 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1300 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1301 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1302 const unsigned NumGaps = Uses.size()-1; 1303 1304 // Start and end points for the interference check. 1305 SlotIndex StartIdx = 1306 BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; 1307 SlotIndex StopIdx = 1308 BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; 1309 1310 GapWeight.assign(NumGaps, 0.0f); 1311 1312 // Add interference from each overlapping register. 1313 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1314 if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units) 1315 .checkInterference()) 1316 continue; 1317 1318 // We know that VirtReg is a continuous interval from FirstInstr to 1319 // LastInstr, so we don't need InterferenceQuery. 1320 // 1321 // Interference that overlaps an instruction is counted in both gaps 1322 // surrounding the instruction. The exception is interference before 1323 // StartIdx and after StopIdx. 1324 // 1325 LiveIntervalUnion::SegmentIter IntI = 1326 Matrix->getLiveUnions()[*Units] .find(StartIdx); 1327 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { 1328 // Skip the gaps before IntI. 1329 while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) 1330 if (++Gap == NumGaps) 1331 break; 1332 if (Gap == NumGaps) 1333 break; 1334 1335 // Update the gaps covered by IntI. 1336 const float weight = IntI.value()->weight(); 1337 for (; Gap != NumGaps; ++Gap) { 1338 GapWeight[Gap] = std::max(GapWeight[Gap], weight); 1339 if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) 1340 break; 1341 } 1342 if (Gap == NumGaps) 1343 break; 1344 } 1345 } 1346 1347 // Add fixed interference. 1348 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1349 const LiveRange &LR = LIS->getRegUnit(*Units); 1350 LiveRange::const_iterator I = LR.find(StartIdx); 1351 LiveRange::const_iterator E = LR.end(); 1352 1353 // Same loop as above. Mark any overlapped gaps as HUGE_VALF. 1354 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { 1355 while (Uses[Gap+1].getBoundaryIndex() < I->start) 1356 if (++Gap == NumGaps) 1357 break; 1358 if (Gap == NumGaps) 1359 break; 1360 1361 for (; Gap != NumGaps; ++Gap) { 1362 GapWeight[Gap] = huge_valf; 1363 if (Uses[Gap+1].getBaseIndex() >= I->end) 1364 break; 1365 } 1366 if (Gap == NumGaps) 1367 break; 1368 } 1369 } 1370 } 1371 1372 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only 1373 /// basic block. 1374 /// 1375 unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg, 1376 AllocationOrder &Order, 1377 SmallVectorImpl<Register> &NewVRegs) { 1378 // TODO: the function currently only handles a single UseBlock; it should be 1379 // possible to generalize. 1380 if (SA->getUseBlocks().size() != 1) 1381 return 0; 1382 1383 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1384 1385 // Note that it is possible to have an interval that is live-in or live-out 1386 // while only covering a single block - A phi-def can use undef values from 1387 // predecessors, and the block could be a single-block loop. 1388 // We don't bother doing anything clever about such a case, we simply assume 1389 // that the interval is continuous from FirstInstr to LastInstr. We should 1390 // make sure that we don't do anything illegal to such an interval, though. 1391 1392 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1393 if (Uses.size() <= 2) 1394 return 0; 1395 const unsigned NumGaps = Uses.size()-1; 1396 1397 LLVM_DEBUG({ 1398 dbgs() << "tryLocalSplit: "; 1399 for (const auto &Use : Uses) 1400 dbgs() << ' ' << Use; 1401 dbgs() << '\n'; 1402 }); 1403 1404 // If VirtReg is live across any register mask operands, compute a list of 1405 // gaps with register masks. 1406 SmallVector<unsigned, 8> RegMaskGaps; 1407 if (Matrix->checkRegMaskInterference(VirtReg)) { 1408 // Get regmask slots for the whole block. 1409 ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); 1410 LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:"); 1411 // Constrain to VirtReg's live range. 1412 unsigned RI = 1413 llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin(); 1414 unsigned RE = RMS.size(); 1415 for (unsigned I = 0; I != NumGaps && RI != RE; ++I) { 1416 // Look for Uses[I] <= RMS <= Uses[I + 1]. 1417 assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I])); 1418 if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI])) 1419 continue; 1420 // Skip a regmask on the same instruction as the last use. It doesn't 1421 // overlap the live range. 1422 if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps) 1423 break; 1424 LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-' 1425 << Uses[I + 1]); 1426 RegMaskGaps.push_back(I); 1427 // Advance ri to the next gap. A regmask on one of the uses counts in 1428 // both gaps. 1429 while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1])) 1430 ++RI; 1431 } 1432 LLVM_DEBUG(dbgs() << '\n'); 1433 } 1434 1435 // Since we allow local split results to be split again, there is a risk of 1436 // creating infinite loops. It is tempting to require that the new live 1437 // ranges have less instructions than the original. That would guarantee 1438 // convergence, but it is too strict. A live range with 3 instructions can be 1439 // split 2+3 (including the COPY), and we want to allow that. 1440 // 1441 // Instead we use these rules: 1442 // 1443 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the 1444 // noop split, of course). 1445 // 2. Require progress be made for ranges with getStage() == RS_Split2. All 1446 // the new ranges must have fewer instructions than before the split. 1447 // 3. New ranges with the same number of instructions are marked RS_Split2, 1448 // smaller ranges are marked RS_New. 1449 // 1450 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent 1451 // excessive splitting and infinite loops. 1452 // 1453 bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2; 1454 1455 // Best split candidate. 1456 unsigned BestBefore = NumGaps; 1457 unsigned BestAfter = 0; 1458 float BestDiff = 0; 1459 1460 const float blockFreq = 1461 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * 1462 (1.0f / MBFI->getEntryFreq()); 1463 SmallVector<float, 8> GapWeight; 1464 1465 for (MCPhysReg PhysReg : Order) { 1466 assert(PhysReg); 1467 // Keep track of the largest spill weight that would need to be evicted in 1468 // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1]. 1469 calcGapWeights(PhysReg, GapWeight); 1470 1471 // Remove any gaps with regmask clobbers. 1472 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) 1473 for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I) 1474 GapWeight[RegMaskGaps[I]] = huge_valf; 1475 1476 // Try to find the best sequence of gaps to close. 1477 // The new spill weight must be larger than any gap interference. 1478 1479 // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. 1480 unsigned SplitBefore = 0, SplitAfter = 1; 1481 1482 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). 1483 // It is the spill weight that needs to be evicted. 1484 float MaxGap = GapWeight[0]; 1485 1486 while (true) { 1487 // Live before/after split? 1488 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; 1489 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; 1490 1491 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore] 1492 << '-' << Uses[SplitAfter] << " I=" << MaxGap); 1493 1494 // Stop before the interval gets so big we wouldn't be making progress. 1495 if (!LiveBefore && !LiveAfter) { 1496 LLVM_DEBUG(dbgs() << " all\n"); 1497 break; 1498 } 1499 // Should the interval be extended or shrunk? 1500 bool Shrink = true; 1501 1502 // How many gaps would the new range have? 1503 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; 1504 1505 // Legally, without causing looping? 1506 bool Legal = !ProgressRequired || NewGaps < NumGaps; 1507 1508 if (Legal && MaxGap < huge_valf) { 1509 // Estimate the new spill weight. Each instruction reads or writes the 1510 // register. Conservatively assume there are no read-modify-write 1511 // instructions. 1512 // 1513 // Try to guess the size of the new interval. 1514 const float EstWeight = normalizeSpillWeight( 1515 blockFreq * (NewGaps + 1), 1516 Uses[SplitBefore].distance(Uses[SplitAfter]) + 1517 (LiveBefore + LiveAfter) * SlotIndex::InstrDist, 1518 1); 1519 // Would this split be possible to allocate? 1520 // Never allocate all gaps, we wouldn't be making progress. 1521 LLVM_DEBUG(dbgs() << " w=" << EstWeight); 1522 if (EstWeight * Hysteresis >= MaxGap) { 1523 Shrink = false; 1524 float Diff = EstWeight - MaxGap; 1525 if (Diff > BestDiff) { 1526 LLVM_DEBUG(dbgs() << " (best)"); 1527 BestDiff = Hysteresis * Diff; 1528 BestBefore = SplitBefore; 1529 BestAfter = SplitAfter; 1530 } 1531 } 1532 } 1533 1534 // Try to shrink. 1535 if (Shrink) { 1536 if (++SplitBefore < SplitAfter) { 1537 LLVM_DEBUG(dbgs() << " shrink\n"); 1538 // Recompute the max when necessary. 1539 if (GapWeight[SplitBefore - 1] >= MaxGap) { 1540 MaxGap = GapWeight[SplitBefore]; 1541 for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I) 1542 MaxGap = std::max(MaxGap, GapWeight[I]); 1543 } 1544 continue; 1545 } 1546 MaxGap = 0; 1547 } 1548 1549 // Try to extend the interval. 1550 if (SplitAfter >= NumGaps) { 1551 LLVM_DEBUG(dbgs() << " end\n"); 1552 break; 1553 } 1554 1555 LLVM_DEBUG(dbgs() << " extend\n"); 1556 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); 1557 } 1558 } 1559 1560 // Didn't find any candidates? 1561 if (BestBefore == NumGaps) 1562 return 0; 1563 1564 LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-' 1565 << Uses[BestAfter] << ", " << BestDiff << ", " 1566 << (BestAfter - BestBefore + 1) << " instrs\n"); 1567 1568 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1569 SE->reset(LREdit); 1570 1571 SE->openIntv(); 1572 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); 1573 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); 1574 SE->useIntv(SegStart, SegStop); 1575 SmallVector<unsigned, 8> IntvMap; 1576 SE->finish(&IntvMap); 1577 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS); 1578 // If the new range has the same number of instructions as before, mark it as 1579 // RS_Split2 so the next split will be forced to make progress. Otherwise, 1580 // leave the new intervals as RS_New so they can compete. 1581 bool LiveBefore = BestBefore != 0 || BI.LiveIn; 1582 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; 1583 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; 1584 if (NewGaps >= NumGaps) { 1585 LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:"); 1586 assert(!ProgressRequired && "Didn't make progress when it was required."); 1587 for (unsigned I = 0, E = IntvMap.size(); I != E; ++I) 1588 if (IntvMap[I] == 1) { 1589 ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2); 1590 LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I))); 1591 } 1592 LLVM_DEBUG(dbgs() << '\n'); 1593 } 1594 ++NumLocalSplits; 1595 1596 return 0; 1597 } 1598 1599 //===----------------------------------------------------------------------===// 1600 // Live Range Splitting 1601 //===----------------------------------------------------------------------===// 1602 1603 /// trySplit - Try to split VirtReg or one of its interferences, making it 1604 /// assignable. 1605 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. 1606 unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order, 1607 SmallVectorImpl<Register> &NewVRegs, 1608 const SmallVirtRegSet &FixedRegisters) { 1609 // Ranges must be Split2 or less. 1610 if (ExtraInfo->getStage(VirtReg) >= RS_Spill) 1611 return 0; 1612 1613 // Local intervals are handled separately. 1614 if (LIS->intervalIsInOneMBB(VirtReg)) { 1615 NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName, 1616 TimerGroupDescription, TimePassesIsEnabled); 1617 SA->analyze(&VirtReg); 1618 Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); 1619 if (PhysReg || !NewVRegs.empty()) 1620 return PhysReg; 1621 return tryInstructionSplit(VirtReg, Order, NewVRegs); 1622 } 1623 1624 NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName, 1625 TimerGroupDescription, TimePassesIsEnabled); 1626 1627 SA->analyze(&VirtReg); 1628 1629 // First try to split around a region spanning multiple blocks. RS_Split2 1630 // ranges already made dubious progress with region splitting, so they go 1631 // straight to single block splitting. 1632 if (ExtraInfo->getStage(VirtReg) < RS_Split2) { 1633 MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); 1634 if (PhysReg || !NewVRegs.empty()) 1635 return PhysReg; 1636 } 1637 1638 // Then isolate blocks. 1639 return tryBlockSplit(VirtReg, Order, NewVRegs); 1640 } 1641 1642 //===----------------------------------------------------------------------===// 1643 // Last Chance Recoloring 1644 //===----------------------------------------------------------------------===// 1645 1646 /// Return true if \p reg has any tied def operand. 1647 static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) { 1648 for (const MachineOperand &MO : MRI->def_operands(reg)) 1649 if (MO.isTied()) 1650 return true; 1651 1652 return false; 1653 } 1654 1655 /// Return true if the existing assignment of \p Intf overlaps, but is not the 1656 /// same, as \p PhysReg. 1657 static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI, 1658 const VirtRegMap &VRM, 1659 MCRegister PhysReg, 1660 const LiveInterval &Intf) { 1661 MCRegister AssignedReg = VRM.getPhys(Intf.reg()); 1662 if (PhysReg == AssignedReg) 1663 return false; 1664 return TRI.regsOverlap(PhysReg, AssignedReg); 1665 } 1666 1667 /// mayRecolorAllInterferences - Check if the virtual registers that 1668 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be 1669 /// recolored to free \p PhysReg. 1670 /// When true is returned, \p RecoloringCandidates has been augmented with all 1671 /// the live intervals that need to be recolored in order to free \p PhysReg 1672 /// for \p VirtReg. 1673 /// \p FixedRegisters contains all the virtual registers that cannot be 1674 /// recolored. 1675 bool RAGreedy::mayRecolorAllInterferences( 1676 MCRegister PhysReg, const LiveInterval &VirtReg, 1677 SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) { 1678 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); 1679 1680 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1681 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 1682 // If there is LastChanceRecoloringMaxInterference or more interferences, 1683 // chances are one would not be recolorable. 1684 if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >= 1685 LastChanceRecoloringMaxInterference && 1686 !ExhaustiveSearch) { 1687 LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n"); 1688 CutOffInfo |= CO_Interf; 1689 return false; 1690 } 1691 for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) { 1692 // If Intf is done and sits on the same register class as VirtReg, it 1693 // would not be recolorable as it is in the same state as 1694 // VirtReg. However there are at least two exceptions. 1695 // 1696 // If VirtReg has tied defs and Intf doesn't, then 1697 // there is still a point in examining if it can be recolorable. 1698 // 1699 // Additionally, if the register class has overlapping tuple members, it 1700 // may still be recolorable using a different tuple. This is more likely 1701 // if the existing assignment aliases with the candidate. 1702 // 1703 if (((ExtraInfo->getStage(*Intf) == RS_Done && 1704 MRI->getRegClass(Intf->reg()) == CurRC && 1705 !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) && 1706 !(hasTiedDef(MRI, VirtReg.reg()) && 1707 !hasTiedDef(MRI, Intf->reg()))) || 1708 FixedRegisters.count(Intf->reg())) { 1709 LLVM_DEBUG( 1710 dbgs() << "Early abort: the interference is not recolorable.\n"); 1711 return false; 1712 } 1713 RecoloringCandidates.insert(Intf); 1714 } 1715 } 1716 return true; 1717 } 1718 1719 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring 1720 /// its interferences. 1721 /// Last chance recoloring chooses a color for \p VirtReg and recolors every 1722 /// virtual register that was using it. The recoloring process may recursively 1723 /// use the last chance recoloring. Therefore, when a virtual register has been 1724 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot 1725 /// be last-chance-recolored again during this recoloring "session". 1726 /// E.g., 1727 /// Let 1728 /// vA can use {R1, R2 } 1729 /// vB can use { R2, R3} 1730 /// vC can use {R1 } 1731 /// Where vA, vB, and vC cannot be split anymore (they are reloads for 1732 /// instance) and they all interfere. 1733 /// 1734 /// vA is assigned R1 1735 /// vB is assigned R2 1736 /// vC tries to evict vA but vA is already done. 1737 /// Regular register allocation fails. 1738 /// 1739 /// Last chance recoloring kicks in: 1740 /// vC does as if vA was evicted => vC uses R1. 1741 /// vC is marked as fixed. 1742 /// vA needs to find a color. 1743 /// None are available. 1744 /// vA cannot evict vC: vC is a fixed virtual register now. 1745 /// vA does as if vB was evicted => vA uses R2. 1746 /// vB needs to find a color. 1747 /// R3 is available. 1748 /// Recoloring => vC = R1, vA = R2, vB = R3 1749 /// 1750 /// \p Order defines the preferred allocation order for \p VirtReg. 1751 /// \p NewRegs will contain any new virtual register that have been created 1752 /// (split, spill) during the process and that must be assigned. 1753 /// \p FixedRegisters contains all the virtual registers that cannot be 1754 /// recolored. 1755 /// 1756 /// \p RecolorStack tracks the original assignments of successfully recolored 1757 /// registers. 1758 /// 1759 /// \p Depth gives the current depth of the last chance recoloring. 1760 /// \return a physical register that can be used for VirtReg or ~0u if none 1761 /// exists. 1762 unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg, 1763 AllocationOrder &Order, 1764 SmallVectorImpl<Register> &NewVRegs, 1765 SmallVirtRegSet &FixedRegisters, 1766 RecoloringStack &RecolorStack, 1767 unsigned Depth) { 1768 if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg)) 1769 return ~0u; 1770 1771 LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); 1772 1773 const ssize_t EntryStackSize = RecolorStack.size(); 1774 1775 // Ranges must be Done. 1776 assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && 1777 "Last chance recoloring should really be last chance"); 1778 // Set the max depth to LastChanceRecoloringMaxDepth. 1779 // We may want to reconsider that if we end up with a too large search space 1780 // for target with hundreds of registers. 1781 // Indeed, in that case we may want to cut the search space earlier. 1782 if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { 1783 LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n"); 1784 CutOffInfo |= CO_Depth; 1785 return ~0u; 1786 } 1787 1788 // Set of Live intervals that will need to be recolored. 1789 SmallLISet RecoloringCandidates; 1790 1791 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in 1792 // this recoloring "session". 1793 assert(!FixedRegisters.count(VirtReg.reg())); 1794 FixedRegisters.insert(VirtReg.reg()); 1795 SmallVector<Register, 4> CurrentNewVRegs; 1796 1797 for (MCRegister PhysReg : Order) { 1798 assert(PhysReg.isValid()); 1799 LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " 1800 << printReg(PhysReg, TRI) << '\n'); 1801 RecoloringCandidates.clear(); 1802 CurrentNewVRegs.clear(); 1803 1804 // It is only possible to recolor virtual register interference. 1805 if (Matrix->checkInterference(VirtReg, PhysReg) > 1806 LiveRegMatrix::IK_VirtReg) { 1807 LLVM_DEBUG( 1808 dbgs() << "Some interferences are not with virtual registers.\n"); 1809 1810 continue; 1811 } 1812 1813 // Early give up on this PhysReg if it is obvious we cannot recolor all 1814 // the interferences. 1815 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, 1816 FixedRegisters)) { 1817 LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n"); 1818 continue; 1819 } 1820 1821 // RecoloringCandidates contains all the virtual registers that interfere 1822 // with VirtReg on PhysReg (or one of its aliases). Enqueue them for 1823 // recoloring and perform the actual recoloring. 1824 PQueue RecoloringQueue; 1825 for (const LiveInterval *RC : RecoloringCandidates) { 1826 Register ItVirtReg = RC->reg(); 1827 enqueue(RecoloringQueue, RC); 1828 assert(VRM->hasPhys(ItVirtReg) && 1829 "Interferences are supposed to be with allocated variables"); 1830 1831 // Record the current allocation. 1832 RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg))); 1833 1834 // unset the related struct. 1835 Matrix->unassign(*RC); 1836 } 1837 1838 // Do as if VirtReg was assigned to PhysReg so that the underlying 1839 // recoloring has the right information about the interferes and 1840 // available colors. 1841 Matrix->assign(VirtReg, PhysReg); 1842 1843 // Save the current recoloring state. 1844 // If we cannot recolor all the interferences, we will have to start again 1845 // at this point for the next physical register. 1846 SmallVirtRegSet SaveFixedRegisters(FixedRegisters); 1847 if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs, 1848 FixedRegisters, RecolorStack, Depth)) { 1849 // Push the queued vregs into the main queue. 1850 for (Register NewVReg : CurrentNewVRegs) 1851 NewVRegs.push_back(NewVReg); 1852 // Do not mess up with the global assignment process. 1853 // I.e., VirtReg must be unassigned. 1854 Matrix->unassign(VirtReg); 1855 return PhysReg; 1856 } 1857 1858 LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " 1859 << printReg(PhysReg, TRI) << '\n'); 1860 1861 // The recoloring attempt failed, undo the changes. 1862 FixedRegisters = SaveFixedRegisters; 1863 Matrix->unassign(VirtReg); 1864 1865 // For a newly created vreg which is also in RecoloringCandidates, 1866 // don't add it to NewVRegs because its physical register will be restored 1867 // below. Other vregs in CurrentNewVRegs are created by calling 1868 // selectOrSplit and should be added into NewVRegs. 1869 for (Register &R : CurrentNewVRegs) { 1870 if (RecoloringCandidates.count(&LIS->getInterval(R))) 1871 continue; 1872 NewVRegs.push_back(R); 1873 } 1874 1875 // Roll back our unsuccessful recoloring. Also roll back any successful 1876 // recolorings in any recursive recoloring attempts, since it's possible 1877 // they would have introduced conflicts with assignments we will be 1878 // restoring further up the stack. Perform all unassignments prior to 1879 // reassigning, since sub-recolorings may have conflicted with the registers 1880 // we are going to restore to their original assignments. 1881 for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) { 1882 const LiveInterval *LI; 1883 MCRegister PhysReg; 1884 std::tie(LI, PhysReg) = RecolorStack[I]; 1885 1886 if (VRM->hasPhys(LI->reg())) 1887 Matrix->unassign(*LI); 1888 } 1889 1890 for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) { 1891 const LiveInterval *LI; 1892 MCRegister PhysReg; 1893 std::tie(LI, PhysReg) = RecolorStack[I]; 1894 if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg())) 1895 Matrix->assign(*LI, PhysReg); 1896 } 1897 1898 // Pop the stack of recoloring attempts. 1899 RecolorStack.resize(EntryStackSize); 1900 } 1901 1902 // Last chance recoloring did not worked either, give up. 1903 return ~0u; 1904 } 1905 1906 /// tryRecoloringCandidates - Try to assign a new color to every register 1907 /// in \RecoloringQueue. 1908 /// \p NewRegs will contain any new virtual register created during the 1909 /// recoloring process. 1910 /// \p FixedRegisters[in/out] contains all the registers that have been 1911 /// recolored. 1912 /// \return true if all virtual registers in RecoloringQueue were successfully 1913 /// recolored, false otherwise. 1914 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, 1915 SmallVectorImpl<Register> &NewVRegs, 1916 SmallVirtRegSet &FixedRegisters, 1917 RecoloringStack &RecolorStack, 1918 unsigned Depth) { 1919 while (!RecoloringQueue.empty()) { 1920 const LiveInterval *LI = dequeue(RecoloringQueue); 1921 LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); 1922 MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, 1923 RecolorStack, Depth + 1); 1924 // When splitting happens, the live-range may actually be empty. 1925 // In that case, this is okay to continue the recoloring even 1926 // if we did not find an alternative color for it. Indeed, 1927 // there will not be anything to color for LI in the end. 1928 if (PhysReg == ~0u || (!PhysReg && !LI->empty())) 1929 return false; 1930 1931 if (!PhysReg) { 1932 assert(LI->empty() && "Only empty live-range do not require a register"); 1933 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI 1934 << " succeeded. Empty LI.\n"); 1935 continue; 1936 } 1937 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI 1938 << " succeeded with: " << printReg(PhysReg, TRI) << '\n'); 1939 1940 Matrix->assign(*LI, PhysReg); 1941 FixedRegisters.insert(LI->reg()); 1942 } 1943 return true; 1944 } 1945 1946 //===----------------------------------------------------------------------===// 1947 // Main Entry Point 1948 //===----------------------------------------------------------------------===// 1949 1950 MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg, 1951 SmallVectorImpl<Register> &NewVRegs) { 1952 CutOffInfo = CO_None; 1953 LLVMContext &Ctx = MF->getFunction().getContext(); 1954 SmallVirtRegSet FixedRegisters; 1955 RecoloringStack RecolorStack; 1956 MCRegister Reg = 1957 selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack); 1958 if (Reg == ~0U && (CutOffInfo != CO_None)) { 1959 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); 1960 if (CutOffEncountered == CO_Depth) 1961 Ctx.emitError("register allocation failed: maximum depth for recoloring " 1962 "reached. Use -fexhaustive-register-search to skip " 1963 "cutoffs"); 1964 else if (CutOffEncountered == CO_Interf) 1965 Ctx.emitError("register allocation failed: maximum interference for " 1966 "recoloring reached. Use -fexhaustive-register-search " 1967 "to skip cutoffs"); 1968 else if (CutOffEncountered == (CO_Depth | CO_Interf)) 1969 Ctx.emitError("register allocation failed: maximum interference and " 1970 "depth for recoloring reached. Use " 1971 "-fexhaustive-register-search to skip cutoffs"); 1972 } 1973 return Reg; 1974 } 1975 1976 /// Using a CSR for the first time has a cost because it causes push|pop 1977 /// to be added to prologue|epilogue. Splitting a cold section of the live 1978 /// range can have lower cost than using the CSR for the first time; 1979 /// Spilling a live range in the cold path can have lower cost than using 1980 /// the CSR for the first time. Returns the physical register if we decide 1981 /// to use the CSR; otherwise return 0. 1982 MCRegister RAGreedy::tryAssignCSRFirstTime( 1983 const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg, 1984 uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) { 1985 if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { 1986 // We choose spill over using the CSR for the first time if the spill cost 1987 // is lower than CSRCost. 1988 SA->analyze(&VirtReg); 1989 if (calcSpillCost() >= CSRCost) 1990 return PhysReg; 1991 1992 // We are going to spill, set CostPerUseLimit to 1 to make sure that 1993 // we will not use a callee-saved register in tryEvict. 1994 CostPerUseLimit = 1; 1995 return 0; 1996 } 1997 if (ExtraInfo->getStage(VirtReg) < RS_Split) { 1998 // We choose pre-splitting over using the CSR for the first time if 1999 // the cost of splitting is lower than CSRCost. 2000 SA->analyze(&VirtReg); 2001 unsigned NumCands = 0; 2002 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. 2003 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, 2004 NumCands, true /*IgnoreCSR*/); 2005 if (BestCand == NoCand) 2006 // Use the CSR if we can't find a region split below CSRCost. 2007 return PhysReg; 2008 2009 // Perform the actual pre-splitting. 2010 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); 2011 return 0; 2012 } 2013 return PhysReg; 2014 } 2015 2016 void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) { 2017 // Do not keep invalid information around. 2018 SetOfBrokenHints.remove(&LI); 2019 } 2020 2021 void RAGreedy::initializeCSRCost() { 2022 // We use the larger one out of the command-line option and the value report 2023 // by TRI. 2024 CSRCost = BlockFrequency( 2025 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); 2026 if (!CSRCost.getFrequency()) 2027 return; 2028 2029 // Raw cost is relative to Entry == 2^14; scale it appropriately. 2030 uint64_t ActualEntry = MBFI->getEntryFreq(); 2031 if (!ActualEntry) { 2032 CSRCost = 0; 2033 return; 2034 } 2035 uint64_t FixedEntry = 1 << 14; 2036 if (ActualEntry < FixedEntry) 2037 CSRCost *= BranchProbability(ActualEntry, FixedEntry); 2038 else if (ActualEntry <= UINT32_MAX) 2039 // Invert the fraction and divide. 2040 CSRCost /= BranchProbability(FixedEntry, ActualEntry); 2041 else 2042 // Can't use BranchProbability in general, since it takes 32-bit numbers. 2043 CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); 2044 } 2045 2046 /// Collect the hint info for \p Reg. 2047 /// The results are stored into \p Out. 2048 /// \p Out is not cleared before being populated. 2049 void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) { 2050 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { 2051 if (!Instr.isFullCopy()) 2052 continue; 2053 // Look for the other end of the copy. 2054 Register OtherReg = Instr.getOperand(0).getReg(); 2055 if (OtherReg == Reg) { 2056 OtherReg = Instr.getOperand(1).getReg(); 2057 if (OtherReg == Reg) 2058 continue; 2059 } 2060 // Get the current assignment. 2061 MCRegister OtherPhysReg = 2062 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg); 2063 // Push the collected information. 2064 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, 2065 OtherPhysReg)); 2066 } 2067 } 2068 2069 /// Using the given \p List, compute the cost of the broken hints if 2070 /// \p PhysReg was used. 2071 /// \return The cost of \p List for \p PhysReg. 2072 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, 2073 MCRegister PhysReg) { 2074 BlockFrequency Cost = 0; 2075 for (const HintInfo &Info : List) { 2076 if (Info.PhysReg != PhysReg) 2077 Cost += Info.Freq; 2078 } 2079 return Cost; 2080 } 2081 2082 /// Using the register assigned to \p VirtReg, try to recolor 2083 /// all the live ranges that are copy-related with \p VirtReg. 2084 /// The recoloring is then propagated to all the live-ranges that have 2085 /// been recolored and so on, until no more copies can be coalesced or 2086 /// it is not profitable. 2087 /// For a given live range, profitability is determined by the sum of the 2088 /// frequencies of the non-identity copies it would introduce with the old 2089 /// and new register. 2090 void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) { 2091 // We have a broken hint, check if it is possible to fix it by 2092 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted 2093 // some register and PhysReg may be available for the other live-ranges. 2094 SmallSet<Register, 4> Visited; 2095 SmallVector<unsigned, 2> RecoloringCandidates; 2096 HintsInfo Info; 2097 Register Reg = VirtReg.reg(); 2098 MCRegister PhysReg = VRM->getPhys(Reg); 2099 // Start the recoloring algorithm from the input live-interval, then 2100 // it will propagate to the ones that are copy-related with it. 2101 Visited.insert(Reg); 2102 RecoloringCandidates.push_back(Reg); 2103 2104 LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI) 2105 << '(' << printReg(PhysReg, TRI) << ")\n"); 2106 2107 do { 2108 Reg = RecoloringCandidates.pop_back_val(); 2109 2110 // We cannot recolor physical register. 2111 if (Register::isPhysicalRegister(Reg)) 2112 continue; 2113 2114 // This may be a skipped class 2115 if (!VRM->hasPhys(Reg)) { 2116 assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) && 2117 "We have an unallocated variable which should have been handled"); 2118 continue; 2119 } 2120 2121 // Get the live interval mapped with this virtual register to be able 2122 // to check for the interference with the new color. 2123 LiveInterval &LI = LIS->getInterval(Reg); 2124 MCRegister CurrPhys = VRM->getPhys(Reg); 2125 // Check that the new color matches the register class constraints and 2126 // that it is free for this live range. 2127 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || 2128 Matrix->checkInterference(LI, PhysReg))) 2129 continue; 2130 2131 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI) 2132 << ") is recolorable.\n"); 2133 2134 // Gather the hint info. 2135 Info.clear(); 2136 collectHintInfo(Reg, Info); 2137 // Check if recoloring the live-range will increase the cost of the 2138 // non-identity copies. 2139 if (CurrPhys != PhysReg) { 2140 LLVM_DEBUG(dbgs() << "Checking profitability:\n"); 2141 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); 2142 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); 2143 LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() 2144 << "\nNew Cost: " << NewCopiesCost.getFrequency() 2145 << '\n'); 2146 if (OldCopiesCost < NewCopiesCost) { 2147 LLVM_DEBUG(dbgs() << "=> Not profitable.\n"); 2148 continue; 2149 } 2150 // At this point, the cost is either cheaper or equal. If it is 2151 // equal, we consider this is profitable because it may expose 2152 // more recoloring opportunities. 2153 LLVM_DEBUG(dbgs() << "=> Profitable.\n"); 2154 // Recolor the live-range. 2155 Matrix->unassign(LI); 2156 Matrix->assign(LI, PhysReg); 2157 } 2158 // Push all copy-related live-ranges to keep reconciling the broken 2159 // hints. 2160 for (const HintInfo &HI : Info) { 2161 if (Visited.insert(HI.Reg).second) 2162 RecoloringCandidates.push_back(HI.Reg); 2163 } 2164 } while (!RecoloringCandidates.empty()); 2165 } 2166 2167 /// Try to recolor broken hints. 2168 /// Broken hints may be repaired by recoloring when an evicted variable 2169 /// freed up a register for a larger live-range. 2170 /// Consider the following example: 2171 /// BB1: 2172 /// a = 2173 /// b = 2174 /// BB2: 2175 /// ... 2176 /// = b 2177 /// = a 2178 /// Let us assume b gets split: 2179 /// BB1: 2180 /// a = 2181 /// b = 2182 /// BB2: 2183 /// c = b 2184 /// ... 2185 /// d = c 2186 /// = d 2187 /// = a 2188 /// Because of how the allocation work, b, c, and d may be assigned different 2189 /// colors. Now, if a gets evicted later: 2190 /// BB1: 2191 /// a = 2192 /// st a, SpillSlot 2193 /// b = 2194 /// BB2: 2195 /// c = b 2196 /// ... 2197 /// d = c 2198 /// = d 2199 /// e = ld SpillSlot 2200 /// = e 2201 /// This is likely that we can assign the same register for b, c, and d, 2202 /// getting rid of 2 copies. 2203 void RAGreedy::tryHintsRecoloring() { 2204 for (const LiveInterval *LI : SetOfBrokenHints) { 2205 assert(Register::isVirtualRegister(LI->reg()) && 2206 "Recoloring is possible only for virtual registers"); 2207 // Some dead defs may be around (e.g., because of debug uses). 2208 // Ignore those. 2209 if (!VRM->hasPhys(LI->reg())) 2210 continue; 2211 tryHintRecoloring(*LI); 2212 } 2213 } 2214 2215 MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg, 2216 SmallVectorImpl<Register> &NewVRegs, 2217 SmallVirtRegSet &FixedRegisters, 2218 RecoloringStack &RecolorStack, 2219 unsigned Depth) { 2220 uint8_t CostPerUseLimit = uint8_t(~0u); 2221 // First try assigning a free register. 2222 auto Order = 2223 AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); 2224 if (MCRegister PhysReg = 2225 tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { 2226 // When NewVRegs is not empty, we may have made decisions such as evicting 2227 // a virtual register, go with the earlier decisions and use the physical 2228 // register. 2229 if (CSRCost.getFrequency() && 2230 EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) { 2231 MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, 2232 CostPerUseLimit, NewVRegs); 2233 if (CSRReg || !NewVRegs.empty()) 2234 // Return now if we decide to use a CSR or create new vregs due to 2235 // pre-splitting. 2236 return CSRReg; 2237 } else 2238 return PhysReg; 2239 } 2240 2241 LiveRangeStage Stage = ExtraInfo->getStage(VirtReg); 2242 LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade " 2243 << ExtraInfo->getCascade(VirtReg.reg()) << '\n'); 2244 2245 // Try to evict a less worthy live range, but only for ranges from the primary 2246 // queue. The RS_Split ranges already failed to do this, and they should not 2247 // get a second chance until they have been split. 2248 if (Stage != RS_Split) 2249 if (Register PhysReg = 2250 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit, 2251 FixedRegisters)) { 2252 Register Hint = MRI->getSimpleHint(VirtReg.reg()); 2253 // If VirtReg has a hint and that hint is broken record this 2254 // virtual register as a recoloring candidate for broken hint. 2255 // Indeed, since we evicted a variable in its neighborhood it is 2256 // likely we can at least partially recolor some of the 2257 // copy-related live-ranges. 2258 if (Hint && Hint != PhysReg) 2259 SetOfBrokenHints.insert(&VirtReg); 2260 return PhysReg; 2261 } 2262 2263 assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs"); 2264 2265 // The first time we see a live range, don't try to split or spill. 2266 // Wait until the second time, when all smaller ranges have been allocated. 2267 // This gives a better picture of the interference to split around. 2268 if (Stage < RS_Split) { 2269 ExtraInfo->setStage(VirtReg, RS_Split); 2270 LLVM_DEBUG(dbgs() << "wait for second round\n"); 2271 NewVRegs.push_back(VirtReg.reg()); 2272 return 0; 2273 } 2274 2275 if (Stage < RS_Spill) { 2276 // Try splitting VirtReg or interferences. 2277 unsigned NewVRegSizeBefore = NewVRegs.size(); 2278 Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters); 2279 if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) 2280 return PhysReg; 2281 } 2282 2283 // If we couldn't allocate a register from spilling, there is probably some 2284 // invalid inline assembly. The base class will report it. 2285 if (Stage >= RS_Done || !VirtReg.isSpillable()) { 2286 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, 2287 RecolorStack, Depth); 2288 } 2289 2290 // Finally spill VirtReg itself. 2291 if ((EnableDeferredSpilling || 2292 TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) && 2293 ExtraInfo->getStage(VirtReg) < RS_Memory) { 2294 // TODO: This is experimental and in particular, we do not model 2295 // the live range splitting done by spilling correctly. 2296 // We would need a deep integration with the spiller to do the 2297 // right thing here. Anyway, that is still good for early testing. 2298 ExtraInfo->setStage(VirtReg, RS_Memory); 2299 LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n"); 2300 NewVRegs.push_back(VirtReg.reg()); 2301 } else { 2302 NamedRegionTimer T("spill", "Spiller", TimerGroupName, 2303 TimerGroupDescription, TimePassesIsEnabled); 2304 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 2305 spiller().spill(LRE); 2306 ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); 2307 2308 // Tell LiveDebugVariables about the new ranges. Ranges not being covered by 2309 // the new regs are kept in LDV (still mapping to the old register), until 2310 // we rewrite spilled locations in LDV at a later stage. 2311 DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS); 2312 2313 if (VerifyEnabled) 2314 MF->verify(this, "After spilling"); 2315 } 2316 2317 // The live virtual register requesting allocation was spilled, so tell 2318 // the caller not to allocate anything during this round. 2319 return 0; 2320 } 2321 2322 void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) { 2323 using namespace ore; 2324 if (Spills) { 2325 R << NV("NumSpills", Spills) << " spills "; 2326 R << NV("TotalSpillsCost", SpillsCost) << " total spills cost "; 2327 } 2328 if (FoldedSpills) { 2329 R << NV("NumFoldedSpills", FoldedSpills) << " folded spills "; 2330 R << NV("TotalFoldedSpillsCost", FoldedSpillsCost) 2331 << " total folded spills cost "; 2332 } 2333 if (Reloads) { 2334 R << NV("NumReloads", Reloads) << " reloads "; 2335 R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost "; 2336 } 2337 if (FoldedReloads) { 2338 R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads "; 2339 R << NV("TotalFoldedReloadsCost", FoldedReloadsCost) 2340 << " total folded reloads cost "; 2341 } 2342 if (ZeroCostFoldedReloads) 2343 R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads) 2344 << " zero cost folded reloads "; 2345 if (Copies) { 2346 R << NV("NumVRCopies", Copies) << " virtual registers copies "; 2347 R << NV("TotalCopiesCost", CopiesCost) << " total copies cost "; 2348 } 2349 } 2350 2351 RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) { 2352 RAGreedyStats Stats; 2353 const MachineFrameInfo &MFI = MF->getFrameInfo(); 2354 int FI; 2355 2356 auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) { 2357 return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>( 2358 A->getPseudoValue())->getFrameIndex()); 2359 }; 2360 auto isPatchpointInstr = [](const MachineInstr &MI) { 2361 return MI.getOpcode() == TargetOpcode::PATCHPOINT || 2362 MI.getOpcode() == TargetOpcode::STACKMAP || 2363 MI.getOpcode() == TargetOpcode::STATEPOINT; 2364 }; 2365 for (MachineInstr &MI : MBB) { 2366 if (MI.isCopy()) { 2367 MachineOperand &Dest = MI.getOperand(0); 2368 MachineOperand &Src = MI.getOperand(1); 2369 if (Dest.isReg() && Src.isReg() && Dest.getReg().isVirtual() && 2370 Src.getReg().isVirtual()) 2371 ++Stats.Copies; 2372 continue; 2373 } 2374 2375 SmallVector<const MachineMemOperand *, 2> Accesses; 2376 if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) { 2377 ++Stats.Reloads; 2378 continue; 2379 } 2380 if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) { 2381 ++Stats.Spills; 2382 continue; 2383 } 2384 if (TII->hasLoadFromStackSlot(MI, Accesses) && 2385 llvm::any_of(Accesses, isSpillSlotAccess)) { 2386 if (!isPatchpointInstr(MI)) { 2387 Stats.FoldedReloads += Accesses.size(); 2388 continue; 2389 } 2390 // For statepoint there may be folded and zero cost folded stack reloads. 2391 std::pair<unsigned, unsigned> NonZeroCostRange = 2392 TII->getPatchpointUnfoldableRange(MI); 2393 SmallSet<unsigned, 16> FoldedReloads; 2394 SmallSet<unsigned, 16> ZeroCostFoldedReloads; 2395 for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) { 2396 MachineOperand &MO = MI.getOperand(Idx); 2397 if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex())) 2398 continue; 2399 if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second) 2400 FoldedReloads.insert(MO.getIndex()); 2401 else 2402 ZeroCostFoldedReloads.insert(MO.getIndex()); 2403 } 2404 // If stack slot is used in folded reload it is not zero cost then. 2405 for (unsigned Slot : FoldedReloads) 2406 ZeroCostFoldedReloads.erase(Slot); 2407 Stats.FoldedReloads += FoldedReloads.size(); 2408 Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size(); 2409 continue; 2410 } 2411 Accesses.clear(); 2412 if (TII->hasStoreToStackSlot(MI, Accesses) && 2413 llvm::any_of(Accesses, isSpillSlotAccess)) { 2414 Stats.FoldedSpills += Accesses.size(); 2415 } 2416 } 2417 // Set cost of collected statistic by multiplication to relative frequency of 2418 // this basic block. 2419 float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB); 2420 Stats.ReloadsCost = RelFreq * Stats.Reloads; 2421 Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads; 2422 Stats.SpillsCost = RelFreq * Stats.Spills; 2423 Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills; 2424 Stats.CopiesCost = RelFreq * Stats.Copies; 2425 return Stats; 2426 } 2427 2428 RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) { 2429 RAGreedyStats Stats; 2430 2431 // Sum up the spill and reloads in subloops. 2432 for (MachineLoop *SubLoop : *L) 2433 Stats.add(reportStats(SubLoop)); 2434 2435 for (MachineBasicBlock *MBB : L->getBlocks()) 2436 // Handle blocks that were not included in subloops. 2437 if (Loops->getLoopFor(MBB) == L) 2438 Stats.add(computeStats(*MBB)); 2439 2440 if (!Stats.isEmpty()) { 2441 using namespace ore; 2442 2443 ORE->emit([&]() { 2444 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies", 2445 L->getStartLoc(), L->getHeader()); 2446 Stats.report(R); 2447 R << "generated in loop"; 2448 return R; 2449 }); 2450 } 2451 return Stats; 2452 } 2453 2454 void RAGreedy::reportStats() { 2455 if (!ORE->allowExtraAnalysis(DEBUG_TYPE)) 2456 return; 2457 RAGreedyStats Stats; 2458 for (MachineLoop *L : *Loops) 2459 Stats.add(reportStats(L)); 2460 // Process non-loop blocks. 2461 for (MachineBasicBlock &MBB : *MF) 2462 if (!Loops->getLoopFor(&MBB)) 2463 Stats.add(computeStats(MBB)); 2464 if (!Stats.isEmpty()) { 2465 using namespace ore; 2466 2467 ORE->emit([&]() { 2468 DebugLoc Loc; 2469 if (auto *SP = MF->getFunction().getSubprogram()) 2470 Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP); 2471 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc, 2472 &MF->front()); 2473 Stats.report(R); 2474 R << "generated in function"; 2475 return R; 2476 }); 2477 } 2478 } 2479 2480 bool RAGreedy::hasVirtRegAlloc() { 2481 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { 2482 Register Reg = Register::index2VirtReg(I); 2483 if (MRI->reg_nodbg_empty(Reg)) 2484 continue; 2485 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 2486 if (!RC) 2487 continue; 2488 if (ShouldAllocateClass(*TRI, *RC)) 2489 return true; 2490 } 2491 2492 return false; 2493 } 2494 2495 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { 2496 LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" 2497 << "********** Function: " << mf.getName() << '\n'); 2498 2499 MF = &mf; 2500 TII = MF->getSubtarget().getInstrInfo(); 2501 2502 if (VerifyEnabled) 2503 MF->verify(this, "Before greedy register allocator"); 2504 2505 RegAllocBase::init(getAnalysis<VirtRegMap>(), 2506 getAnalysis<LiveIntervals>(), 2507 getAnalysis<LiveRegMatrix>()); 2508 2509 // Early return if there is no virtual register to be allocated to a 2510 // physical register. 2511 if (!hasVirtRegAlloc()) 2512 return false; 2513 2514 Indexes = &getAnalysis<SlotIndexes>(); 2515 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 2516 DomTree = &getAnalysis<MachineDominatorTree>(); 2517 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); 2518 Loops = &getAnalysis<MachineLoopInfo>(); 2519 Bundles = &getAnalysis<EdgeBundles>(); 2520 SpillPlacer = &getAnalysis<SpillPlacement>(); 2521 DebugVars = &getAnalysis<LiveDebugVariables>(); 2522 2523 initializeCSRCost(); 2524 2525 RegCosts = TRI->getRegisterCosts(*MF); 2526 RegClassPriorityTrumpsGlobalness = 2527 GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences() 2528 ? GreedyRegClassPriorityTrumpsGlobalness 2529 : TRI->regClassPriorityTrumpsGlobalness(*MF); 2530 2531 ExtraInfo.emplace(); 2532 EvictAdvisor = 2533 getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this); 2534 2535 VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI); 2536 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI)); 2537 2538 VRAI->calculateSpillWeightsAndHints(); 2539 2540 LLVM_DEBUG(LIS->dump()); 2541 2542 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); 2543 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); 2544 2545 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); 2546 GlobalCand.resize(32); // This will grow as needed. 2547 SetOfBrokenHints.clear(); 2548 2549 allocatePhysRegs(); 2550 tryHintsRecoloring(); 2551 2552 if (VerifyEnabled) 2553 MF->verify(this, "Before post optimization"); 2554 postOptimization(); 2555 reportStats(); 2556 2557 releaseMemory(); 2558 return true; 2559 } 2560