1 //===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the RAGreedy function pass for register allocation in
10 // optimized builds.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "RegAllocGreedy.h"
15 #include "AllocationOrder.h"
16 #include "InterferenceCache.h"
17 #include "LiveDebugVariables.h"
18 #include "RegAllocBase.h"
19 #include "RegAllocEvictionAdvisor.h"
20 #include "SpillPlacement.h"
21 #include "SplitKit.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/IndexedMap.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/ADT/StringRef.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
33 #include "llvm/CodeGen/CalcSpillWeights.h"
34 #include "llvm/CodeGen/EdgeBundles.h"
35 #include "llvm/CodeGen/LiveInterval.h"
36 #include "llvm/CodeGen/LiveIntervalUnion.h"
37 #include "llvm/CodeGen/LiveIntervals.h"
38 #include "llvm/CodeGen/LiveRangeEdit.h"
39 #include "llvm/CodeGen/LiveRegMatrix.h"
40 #include "llvm/CodeGen/LiveStacks.h"
41 #include "llvm/CodeGen/MachineBasicBlock.h"
42 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
43 #include "llvm/CodeGen/MachineDominators.h"
44 #include "llvm/CodeGen/MachineFrameInfo.h"
45 #include "llvm/CodeGen/MachineFunction.h"
46 #include "llvm/CodeGen/MachineFunctionPass.h"
47 #include "llvm/CodeGen/MachineInstr.h"
48 #include "llvm/CodeGen/MachineLoopInfo.h"
49 #include "llvm/CodeGen/MachineOperand.h"
50 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
51 #include "llvm/CodeGen/MachineRegisterInfo.h"
52 #include "llvm/CodeGen/RegAllocRegistry.h"
53 #include "llvm/CodeGen/RegisterClassInfo.h"
54 #include "llvm/CodeGen/SlotIndexes.h"
55 #include "llvm/CodeGen/Spiller.h"
56 #include "llvm/CodeGen/TargetInstrInfo.h"
57 #include "llvm/CodeGen/TargetRegisterInfo.h"
58 #include "llvm/CodeGen/TargetSubtargetInfo.h"
59 #include "llvm/CodeGen/VirtRegMap.h"
60 #include "llvm/IR/DebugInfoMetadata.h"
61 #include "llvm/IR/Function.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/InitializePasses.h"
64 #include "llvm/MC/MCRegisterInfo.h"
65 #include "llvm/Pass.h"
66 #include "llvm/Support/BlockFrequency.h"
67 #include "llvm/Support/BranchProbability.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/Debug.h"
70 #include "llvm/Support/MathExtras.h"
71 #include "llvm/Support/Timer.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include <algorithm>
74 #include <cassert>
75 #include <cstdint>
76 #include <utility>
77
78 using namespace llvm;
79
80 #define DEBUG_TYPE "regalloc"
81
82 STATISTIC(NumGlobalSplits, "Number of split global live ranges");
83 STATISTIC(NumLocalSplits, "Number of split local live ranges");
84 STATISTIC(NumEvicted, "Number of interferences evicted");
85
86 static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
87 "split-spill-mode", cl::Hidden,
88 cl::desc("Spill mode for splitting live ranges"),
89 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
90 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
91 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
92 cl::init(SplitEditor::SM_Speed));
93
94 static cl::opt<unsigned>
95 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
96 cl::desc("Last chance recoloring max depth"),
97 cl::init(5));
98
99 static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
100 "lcr-max-interf", cl::Hidden,
101 cl::desc("Last chance recoloring maximum number of considered"
102 " interference at a time"),
103 cl::init(8));
104
105 static cl::opt<bool> ExhaustiveSearch(
106 "exhaustive-register-search", cl::NotHidden,
107 cl::desc("Exhaustive Search for registers bypassing the depth "
108 "and interference cutoffs of last chance recoloring"),
109 cl::Hidden);
110
111 static cl::opt<bool> EnableDeferredSpilling(
112 "enable-deferred-spilling", cl::Hidden,
113 cl::desc("Instead of spilling a variable right away, defer the actual "
114 "code insertion to the end of the allocation. That way the "
115 "allocator might still find a suitable coloring for this "
116 "variable because of other evicted variables."),
117 cl::init(false));
118
119 // FIXME: Find a good default for this flag and remove the flag.
120 static cl::opt<unsigned>
121 CSRFirstTimeCost("regalloc-csr-first-time-cost",
122 cl::desc("Cost for first time use of callee-saved register."),
123 cl::init(0), cl::Hidden);
124
125 static cl::opt<unsigned long> GrowRegionComplexityBudget(
126 "grow-region-complexity-budget",
127 cl::desc("growRegion() does not scale with the number of BB edges, so "
128 "limit its budget and bail out once we reach the limit."),
129 cl::init(10000), cl::Hidden);
130
131 static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness(
132 "greedy-regclass-priority-trumps-globalness",
133 cl::desc("Change the greedy register allocator's live range priority "
134 "calculation to make the AllocationPriority of the register class "
135 "more important then whether the range is global"),
136 cl::Hidden);
137
138 static cl::opt<bool> GreedyReverseLocalAssignment(
139 "greedy-reverse-local-assignment",
140 cl::desc("Reverse allocation order of local live ranges, such that "
141 "shorter local live ranges will tend to be allocated first"),
142 cl::Hidden);
143
144 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
145 createGreedyRegisterAllocator);
146
147 char RAGreedy::ID = 0;
148 char &llvm::RAGreedyID = RAGreedy::ID;
149
150 INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
151 "Greedy Register Allocator", false, false)
152 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
153 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
154 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
155 INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
156 INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
157 INITIALIZE_PASS_DEPENDENCY(LiveStacks)
158 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
159 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
160 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
161 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
162 INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
163 INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
164 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
165 INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis)
166 INITIALIZE_PASS_END(RAGreedy, "greedy",
167 "Greedy Register Allocator", false, false)
168
169 #ifndef NDEBUG
170 const char *const RAGreedy::StageName[] = {
171 "RS_New",
172 "RS_Assign",
173 "RS_Split",
174 "RS_Split2",
175 "RS_Spill",
176 "RS_Memory",
177 "RS_Done"
178 };
179 #endif
180
181 // Hysteresis to use when comparing floats.
182 // This helps stabilize decisions based on float comparisons.
183 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
184
createGreedyRegisterAllocator()185 FunctionPass* llvm::createGreedyRegisterAllocator() {
186 return new RAGreedy();
187 }
188
createGreedyRegisterAllocator(RegClassFilterFunc Ftor)189 FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) {
190 return new RAGreedy(Ftor);
191 }
192
RAGreedy(RegClassFilterFunc F)193 RAGreedy::RAGreedy(RegClassFilterFunc F):
194 MachineFunctionPass(ID),
195 RegAllocBase(F) {
196 }
197
getAnalysisUsage(AnalysisUsage & AU) const198 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
199 AU.setPreservesCFG();
200 AU.addRequired<MachineBlockFrequencyInfo>();
201 AU.addPreserved<MachineBlockFrequencyInfo>();
202 AU.addRequired<LiveIntervals>();
203 AU.addPreserved<LiveIntervals>();
204 AU.addRequired<SlotIndexes>();
205 AU.addPreserved<SlotIndexes>();
206 AU.addRequired<LiveDebugVariables>();
207 AU.addPreserved<LiveDebugVariables>();
208 AU.addRequired<LiveStacks>();
209 AU.addPreserved<LiveStacks>();
210 AU.addRequired<MachineDominatorTree>();
211 AU.addPreserved<MachineDominatorTree>();
212 AU.addRequired<MachineLoopInfo>();
213 AU.addPreserved<MachineLoopInfo>();
214 AU.addRequired<VirtRegMap>();
215 AU.addPreserved<VirtRegMap>();
216 AU.addRequired<LiveRegMatrix>();
217 AU.addPreserved<LiveRegMatrix>();
218 AU.addRequired<EdgeBundles>();
219 AU.addRequired<SpillPlacement>();
220 AU.addRequired<MachineOptimizationRemarkEmitterPass>();
221 AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
222 MachineFunctionPass::getAnalysisUsage(AU);
223 }
224
225 //===----------------------------------------------------------------------===//
226 // LiveRangeEdit delegate methods
227 //===----------------------------------------------------------------------===//
228
LRE_CanEraseVirtReg(Register VirtReg)229 bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
230 LiveInterval &LI = LIS->getInterval(VirtReg);
231 if (VRM->hasPhys(VirtReg)) {
232 Matrix->unassign(LI);
233 aboutToRemoveInterval(LI);
234 return true;
235 }
236 // Unassigned virtreg is probably in the priority queue.
237 // RegAllocBase will erase it after dequeueing.
238 // Nonetheless, clear the live-range so that the debug
239 // dump will show the right state for that VirtReg.
240 LI.clear();
241 return false;
242 }
243
LRE_WillShrinkVirtReg(Register VirtReg)244 void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
245 if (!VRM->hasPhys(VirtReg))
246 return;
247
248 // Register is assigned, put it back on the queue for reassignment.
249 LiveInterval &LI = LIS->getInterval(VirtReg);
250 Matrix->unassign(LI);
251 RegAllocBase::enqueue(&LI);
252 }
253
LRE_DidCloneVirtReg(Register New,Register Old)254 void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
255 ExtraInfo->LRE_DidCloneVirtReg(New, Old);
256 }
257
LRE_DidCloneVirtReg(Register New,Register Old)258 void RAGreedy::ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) {
259 // Cloning a register we haven't even heard about yet? Just ignore it.
260 if (!Info.inBounds(Old))
261 return;
262
263 // LRE may clone a virtual register because dead code elimination causes it to
264 // be split into connected components. The new components are much smaller
265 // than the original, so they should get a new chance at being assigned.
266 // same stage as the parent.
267 Info[Old].Stage = RS_Assign;
268 Info.grow(New.id());
269 Info[New] = Info[Old];
270 }
271
releaseMemory()272 void RAGreedy::releaseMemory() {
273 SpillerInstance.reset();
274 GlobalCand.clear();
275 }
276
enqueueImpl(const LiveInterval * LI)277 void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
278
enqueue(PQueue & CurQueue,const LiveInterval * LI)279 void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
280 // Prioritize live ranges by size, assigning larger ranges first.
281 // The queue holds (size, reg) pairs.
282 const unsigned Size = LI->getSize();
283 const Register Reg = LI->reg();
284 assert(Reg.isVirtual() && "Can only enqueue virtual registers");
285 unsigned Prio;
286
287 auto Stage = ExtraInfo->getOrInitStage(Reg);
288 if (Stage == RS_New) {
289 Stage = RS_Assign;
290 ExtraInfo->setStage(Reg, Stage);
291 }
292 if (Stage == RS_Split) {
293 // Unsplit ranges that couldn't be allocated immediately are deferred until
294 // everything else has been allocated.
295 Prio = Size;
296 } else if (Stage == RS_Memory) {
297 // Memory operand should be considered last.
298 // Change the priority such that Memory operand are assigned in
299 // the reverse order that they came in.
300 // TODO: Make this a member variable and probably do something about hints.
301 static unsigned MemOp = 0;
302 Prio = MemOp++;
303 } else {
304 // Giant live ranges fall back to the global assignment heuristic, which
305 // prevents excessive spilling in pathological cases.
306 const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
307 bool ForceGlobal = !ReverseLocalAssignment &&
308 (Size / SlotIndex::InstrDist) >
309 (2 * RegClassInfo.getNumAllocatableRegs(&RC));
310 unsigned GlobalBit = 0;
311
312 if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
313 LIS->intervalIsInOneMBB(*LI)) {
314 // Allocate original local ranges in linear instruction order. Since they
315 // are singly defined, this produces optimal coloring in the absence of
316 // global interference and other constraints.
317 if (!ReverseLocalAssignment)
318 Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
319 else {
320 // Allocating bottom up may allow many short LRGs to be assigned first
321 // to one of the cheap registers. This could be much faster for very
322 // large blocks on targets with many physical registers.
323 Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
324 }
325 } else {
326 // Allocate global and split ranges in long->short order. Long ranges that
327 // don't fit should be spilled (or split) ASAP so they don't create
328 // interference. Mark a bit to prioritize global above local ranges.
329 Prio = Size;
330 GlobalBit = 1;
331 }
332 if (RegClassPriorityTrumpsGlobalness)
333 Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
334 else
335 Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
336
337 // Mark a higher bit to prioritize global and local above RS_Split.
338 Prio |= (1u << 31);
339
340 // Boost ranges that have a physical register hint.
341 if (VRM->hasKnownPreference(Reg))
342 Prio |= (1u << 30);
343 }
344 // The virtual register number is a tie breaker for same-sized ranges.
345 // Give lower vreg numbers higher priority to assign them first.
346 CurQueue.push(std::make_pair(Prio, ~Reg));
347 }
348
dequeue()349 const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
350
dequeue(PQueue & CurQueue)351 const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
352 if (CurQueue.empty())
353 return nullptr;
354 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
355 CurQueue.pop();
356 return LI;
357 }
358
359 //===----------------------------------------------------------------------===//
360 // Direct Assignment
361 //===----------------------------------------------------------------------===//
362
363 /// tryAssign - Try to assign VirtReg to an available register.
tryAssign(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs,const SmallVirtRegSet & FixedRegisters)364 MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
365 AllocationOrder &Order,
366 SmallVectorImpl<Register> &NewVRegs,
367 const SmallVirtRegSet &FixedRegisters) {
368 MCRegister PhysReg;
369 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
370 assert(*I);
371 if (!Matrix->checkInterference(VirtReg, *I)) {
372 if (I.isHint())
373 return *I;
374 else
375 PhysReg = *I;
376 }
377 }
378 if (!PhysReg.isValid())
379 return PhysReg;
380
381 // PhysReg is available, but there may be a better choice.
382
383 // If we missed a simple hint, try to cheaply evict interference from the
384 // preferred register.
385 if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
386 if (Order.isHint(Hint)) {
387 MCRegister PhysHint = Hint.asMCReg();
388 LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
389
390 if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
391 FixedRegisters)) {
392 evictInterference(VirtReg, PhysHint, NewVRegs);
393 return PhysHint;
394 }
395 // Record the missed hint, we may be able to recover
396 // at the end if the surrounding allocation changed.
397 SetOfBrokenHints.insert(&VirtReg);
398 }
399
400 // Try to evict interference from a cheaper alternative.
401 uint8_t Cost = RegCosts[PhysReg];
402
403 // Most registers have 0 additional cost.
404 if (!Cost)
405 return PhysReg;
406
407 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
408 << (unsigned)Cost << '\n');
409 MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
410 return CheapReg ? CheapReg : PhysReg;
411 }
412
413 //===----------------------------------------------------------------------===//
414 // Interference eviction
415 //===----------------------------------------------------------------------===//
416
canReassign(const LiveInterval & VirtReg,Register PrevReg) const417 Register RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
418 Register PrevReg) const {
419 auto Order =
420 AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
421 MCRegister PhysReg;
422 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
423 if ((*I).id() == PrevReg.id())
424 continue;
425
426 MCRegUnitIterator Units(*I, TRI);
427 for (; Units.isValid(); ++Units) {
428 // Instantiate a "subquery", not to be confused with the Queries array.
429 LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
430 if (subQ.checkInterference())
431 break;
432 }
433 // If no units have interference, break out with the current PhysReg.
434 if (!Units.isValid())
435 PhysReg = *I;
436 }
437 if (PhysReg)
438 LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
439 << printReg(PrevReg, TRI) << " to "
440 << printReg(PhysReg, TRI) << '\n');
441 return PhysReg;
442 }
443
444 /// evictInterference - Evict any interferring registers that prevent VirtReg
445 /// from being assigned to Physreg. This assumes that canEvictInterference
446 /// returned true.
evictInterference(const LiveInterval & VirtReg,MCRegister PhysReg,SmallVectorImpl<Register> & NewVRegs)447 void RAGreedy::evictInterference(const LiveInterval &VirtReg,
448 MCRegister PhysReg,
449 SmallVectorImpl<Register> &NewVRegs) {
450 // Make sure that VirtReg has a cascade number, and assign that cascade
451 // number to every evicted register. These live ranges than then only be
452 // evicted by a newer cascade, preventing infinite loops.
453 unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
454
455 LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
456 << " interference: Cascade " << Cascade << '\n');
457
458 // Collect all interfering virtregs first.
459 SmallVector<const LiveInterval *, 8> Intfs;
460 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
461 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
462 // We usually have the interfering VRegs cached so collectInterferingVRegs()
463 // should be fast, we may need to recalculate if when different physregs
464 // overlap the same register unit so we had different SubRanges queried
465 // against it.
466 ArrayRef<const LiveInterval *> IVR = Q.interferingVRegs();
467 Intfs.append(IVR.begin(), IVR.end());
468 }
469
470 // Evict them second. This will invalidate the queries.
471 for (const LiveInterval *Intf : Intfs) {
472 // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
473 if (!VRM->hasPhys(Intf->reg()))
474 continue;
475
476 Matrix->unassign(*Intf);
477 assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
478 VirtReg.isSpillable() < Intf->isSpillable()) &&
479 "Cannot decrease cascade number, illegal eviction");
480 ExtraInfo->setCascade(Intf->reg(), Cascade);
481 ++NumEvicted;
482 NewVRegs.push_back(Intf->reg());
483 }
484 }
485
486 /// Returns true if the given \p PhysReg is a callee saved register and has not
487 /// been used for allocation yet.
isUnusedCalleeSavedReg(MCRegister PhysReg) const488 bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
489 MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
490 if (!CSR)
491 return false;
492
493 return !Matrix->isPhysRegUsed(PhysReg);
494 }
495
496 Optional<unsigned>
getOrderLimit(const LiveInterval & VirtReg,const AllocationOrder & Order,unsigned CostPerUseLimit) const497 RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg,
498 const AllocationOrder &Order,
499 unsigned CostPerUseLimit) const {
500 unsigned OrderLimit = Order.getOrder().size();
501
502 if (CostPerUseLimit < uint8_t(~0u)) {
503 // Check of any registers in RC are below CostPerUseLimit.
504 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
505 uint8_t MinCost = RegClassInfo.getMinCost(RC);
506 if (MinCost >= CostPerUseLimit) {
507 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
508 << MinCost << ", no cheaper registers to be found.\n");
509 return None;
510 }
511
512 // It is normal for register classes to have a long tail of registers with
513 // the same cost. We don't need to look at them if they're too expensive.
514 if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
515 OrderLimit = RegClassInfo.getLastCostChange(RC);
516 LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
517 << " regs.\n");
518 }
519 }
520 return OrderLimit;
521 }
522
canAllocatePhysReg(unsigned CostPerUseLimit,MCRegister PhysReg) const523 bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
524 MCRegister PhysReg) const {
525 if (RegCosts[PhysReg] >= CostPerUseLimit)
526 return false;
527 // The first use of a callee-saved register in a function has cost 1.
528 // Don't start using a CSR when the CostPerUseLimit is low.
529 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
530 LLVM_DEBUG(
531 dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
532 << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
533 << '\n');
534 return false;
535 }
536 return true;
537 }
538
539 /// tryEvict - Try to evict all interferences for a physreg.
540 /// @param VirtReg Currently unassigned virtual register.
541 /// @param Order Physregs to try.
542 /// @return Physreg to assign VirtReg, or 0.
tryEvict(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs,uint8_t CostPerUseLimit,const SmallVirtRegSet & FixedRegisters)543 MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
544 AllocationOrder &Order,
545 SmallVectorImpl<Register> &NewVRegs,
546 uint8_t CostPerUseLimit,
547 const SmallVirtRegSet &FixedRegisters) {
548 NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
549 TimePassesIsEnabled);
550
551 MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
552 VirtReg, Order, CostPerUseLimit, FixedRegisters);
553 if (BestPhys.isValid())
554 evictInterference(VirtReg, BestPhys, NewVRegs);
555 return BestPhys;
556 }
557
558 //===----------------------------------------------------------------------===//
559 // Region Splitting
560 //===----------------------------------------------------------------------===//
561
562 /// addSplitConstraints - Fill out the SplitConstraints vector based on the
563 /// interference pattern in Physreg and its aliases. Add the constraints to
564 /// SpillPlacement and return the static cost of this split in Cost, assuming
565 /// that all preferences in SplitConstraints are met.
566 /// Return false if there are no bundles with positive bias.
addSplitConstraints(InterferenceCache::Cursor Intf,BlockFrequency & Cost)567 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
568 BlockFrequency &Cost) {
569 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
570
571 // Reset interference dependent info.
572 SplitConstraints.resize(UseBlocks.size());
573 BlockFrequency StaticCost = 0;
574 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
575 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
576 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
577
578 BC.Number = BI.MBB->getNumber();
579 Intf.moveToBlock(BC.Number);
580 BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
581 BC.Exit = (BI.LiveOut &&
582 !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef())
583 ? SpillPlacement::PrefReg
584 : SpillPlacement::DontCare;
585 BC.ChangesValue = BI.FirstDef.isValid();
586
587 if (!Intf.hasInterference())
588 continue;
589
590 // Number of spill code instructions to insert.
591 unsigned Ins = 0;
592
593 // Interference for the live-in value.
594 if (BI.LiveIn) {
595 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
596 BC.Entry = SpillPlacement::MustSpill;
597 ++Ins;
598 } else if (Intf.first() < BI.FirstInstr) {
599 BC.Entry = SpillPlacement::PrefSpill;
600 ++Ins;
601 } else if (Intf.first() < BI.LastInstr) {
602 ++Ins;
603 }
604
605 // Abort if the spill cannot be inserted at the MBB' start
606 if (((BC.Entry == SpillPlacement::MustSpill) ||
607 (BC.Entry == SpillPlacement::PrefSpill)) &&
608 SlotIndex::isEarlierInstr(BI.FirstInstr,
609 SA->getFirstSplitPoint(BC.Number)))
610 return false;
611 }
612
613 // Interference for the live-out value.
614 if (BI.LiveOut) {
615 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
616 BC.Exit = SpillPlacement::MustSpill;
617 ++Ins;
618 } else if (Intf.last() > BI.LastInstr) {
619 BC.Exit = SpillPlacement::PrefSpill;
620 ++Ins;
621 } else if (Intf.last() > BI.FirstInstr) {
622 ++Ins;
623 }
624 }
625
626 // Accumulate the total frequency of inserted spill code.
627 while (Ins--)
628 StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
629 }
630 Cost = StaticCost;
631
632 // Add constraints for use-blocks. Note that these are the only constraints
633 // that may add a positive bias, it is downhill from here.
634 SpillPlacer->addConstraints(SplitConstraints);
635 return SpillPlacer->scanActiveBundles();
636 }
637
638 /// addThroughConstraints - Add constraints and links to SpillPlacer from the
639 /// live-through blocks in Blocks.
addThroughConstraints(InterferenceCache::Cursor Intf,ArrayRef<unsigned> Blocks)640 bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
641 ArrayRef<unsigned> Blocks) {
642 const unsigned GroupSize = 8;
643 SpillPlacement::BlockConstraint BCS[GroupSize];
644 unsigned TBS[GroupSize];
645 unsigned B = 0, T = 0;
646
647 for (unsigned Number : Blocks) {
648 Intf.moveToBlock(Number);
649
650 if (!Intf.hasInterference()) {
651 assert(T < GroupSize && "Array overflow");
652 TBS[T] = Number;
653 if (++T == GroupSize) {
654 SpillPlacer->addLinks(makeArrayRef(TBS, T));
655 T = 0;
656 }
657 continue;
658 }
659
660 assert(B < GroupSize && "Array overflow");
661 BCS[B].Number = Number;
662
663 // Abort if the spill cannot be inserted at the MBB' start
664 MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
665 auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
666 if (FirstNonDebugInstr != MBB->end() &&
667 SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
668 SA->getFirstSplitPoint(Number)))
669 return false;
670 // Interference for the live-in value.
671 if (Intf.first() <= Indexes->getMBBStartIdx(Number))
672 BCS[B].Entry = SpillPlacement::MustSpill;
673 else
674 BCS[B].Entry = SpillPlacement::PrefSpill;
675
676 // Interference for the live-out value.
677 if (Intf.last() >= SA->getLastSplitPoint(Number))
678 BCS[B].Exit = SpillPlacement::MustSpill;
679 else
680 BCS[B].Exit = SpillPlacement::PrefSpill;
681
682 if (++B == GroupSize) {
683 SpillPlacer->addConstraints(makeArrayRef(BCS, B));
684 B = 0;
685 }
686 }
687
688 SpillPlacer->addConstraints(makeArrayRef(BCS, B));
689 SpillPlacer->addLinks(makeArrayRef(TBS, T));
690 return true;
691 }
692
growRegion(GlobalSplitCandidate & Cand)693 bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
694 // Keep track of through blocks that have not been added to SpillPlacer.
695 BitVector Todo = SA->getThroughBlocks();
696 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
697 unsigned AddedTo = 0;
698 #ifndef NDEBUG
699 unsigned Visited = 0;
700 #endif
701
702 unsigned long Budget = GrowRegionComplexityBudget;
703 while (true) {
704 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
705 // Find new through blocks in the periphery of PrefRegBundles.
706 for (unsigned Bundle : NewBundles) {
707 // Look at all blocks connected to Bundle in the full graph.
708 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
709 // Limit compilation time by bailing out after we use all our budget.
710 if (Blocks.size() >= Budget)
711 return false;
712 Budget -= Blocks.size();
713 for (unsigned Block : Blocks) {
714 if (!Todo.test(Block))
715 continue;
716 Todo.reset(Block);
717 // This is a new through block. Add it to SpillPlacer later.
718 ActiveBlocks.push_back(Block);
719 #ifndef NDEBUG
720 ++Visited;
721 #endif
722 }
723 }
724 // Any new blocks to add?
725 if (ActiveBlocks.size() == AddedTo)
726 break;
727
728 // Compute through constraints from the interference, or assume that all
729 // through blocks prefer spilling when forming compact regions.
730 auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
731 if (Cand.PhysReg) {
732 if (!addThroughConstraints(Cand.Intf, NewBlocks))
733 return false;
734 } else
735 // Provide a strong negative bias on through blocks to prevent unwanted
736 // liveness on loop backedges.
737 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
738 AddedTo = ActiveBlocks.size();
739
740 // Perhaps iterating can enable more bundles?
741 SpillPlacer->iterate();
742 }
743 LLVM_DEBUG(dbgs() << ", v=" << Visited);
744 return true;
745 }
746
747 /// calcCompactRegion - Compute the set of edge bundles that should be live
748 /// when splitting the current live range into compact regions. Compact
749 /// regions can be computed without looking at interference. They are the
750 /// regions formed by removing all the live-through blocks from the live range.
751 ///
752 /// Returns false if the current live range is already compact, or if the
753 /// compact regions would form single block regions anyway.
calcCompactRegion(GlobalSplitCandidate & Cand)754 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
755 // Without any through blocks, the live range is already compact.
756 if (!SA->getNumThroughBlocks())
757 return false;
758
759 // Compact regions don't correspond to any physreg.
760 Cand.reset(IntfCache, MCRegister::NoRegister);
761
762 LLVM_DEBUG(dbgs() << "Compact region bundles");
763
764 // Use the spill placer to determine the live bundles. GrowRegion pretends
765 // that all the through blocks have interference when PhysReg is unset.
766 SpillPlacer->prepare(Cand.LiveBundles);
767
768 // The static split cost will be zero since Cand.Intf reports no interference.
769 BlockFrequency Cost;
770 if (!addSplitConstraints(Cand.Intf, Cost)) {
771 LLVM_DEBUG(dbgs() << ", none.\n");
772 return false;
773 }
774
775 if (!growRegion(Cand)) {
776 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
777 return false;
778 }
779
780 SpillPlacer->finish();
781
782 if (!Cand.LiveBundles.any()) {
783 LLVM_DEBUG(dbgs() << ", none.\n");
784 return false;
785 }
786
787 LLVM_DEBUG({
788 for (int I : Cand.LiveBundles.set_bits())
789 dbgs() << " EB#" << I;
790 dbgs() << ".\n";
791 });
792 return true;
793 }
794
795 /// calcSpillCost - Compute how expensive it would be to split the live range in
796 /// SA around all use blocks instead of forming bundle regions.
calcSpillCost()797 BlockFrequency RAGreedy::calcSpillCost() {
798 BlockFrequency Cost = 0;
799 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
800 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
801 unsigned Number = BI.MBB->getNumber();
802 // We normally only need one spill instruction - a load or a store.
803 Cost += SpillPlacer->getBlockFrequency(Number);
804
805 // Unless the value is redefined in the block.
806 if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
807 Cost += SpillPlacer->getBlockFrequency(Number);
808 }
809 return Cost;
810 }
811
812 /// calcGlobalSplitCost - Return the global split cost of following the split
813 /// pattern in LiveBundles. This cost should be added to the local cost of the
814 /// interference pattern in SplitConstraints.
815 ///
calcGlobalSplitCost(GlobalSplitCandidate & Cand,const AllocationOrder & Order)816 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
817 const AllocationOrder &Order) {
818 BlockFrequency GlobalCost = 0;
819 const BitVector &LiveBundles = Cand.LiveBundles;
820 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
821 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
822 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
823 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
824 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
825 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
826 unsigned Ins = 0;
827
828 Cand.Intf.moveToBlock(BC.Number);
829
830 if (BI.LiveIn)
831 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
832 if (BI.LiveOut)
833 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
834 while (Ins--)
835 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
836 }
837
838 for (unsigned Number : Cand.ActiveBlocks) {
839 bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
840 bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
841 if (!RegIn && !RegOut)
842 continue;
843 if (RegIn && RegOut) {
844 // We need double spill code if this block has interference.
845 Cand.Intf.moveToBlock(Number);
846 if (Cand.Intf.hasInterference()) {
847 GlobalCost += SpillPlacer->getBlockFrequency(Number);
848 GlobalCost += SpillPlacer->getBlockFrequency(Number);
849 }
850 continue;
851 }
852 // live-in / stack-out or stack-in live-out.
853 GlobalCost += SpillPlacer->getBlockFrequency(Number);
854 }
855 return GlobalCost;
856 }
857
858 /// splitAroundRegion - Split the current live range around the regions
859 /// determined by BundleCand and GlobalCand.
860 ///
861 /// Before calling this function, GlobalCand and BundleCand must be initialized
862 /// so each bundle is assigned to a valid candidate, or NoCand for the
863 /// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
864 /// objects must be initialized for the current live range, and intervals
865 /// created for the used candidates.
866 ///
867 /// @param LREdit The LiveRangeEdit object handling the current split.
868 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value
869 /// must appear in this list.
splitAroundRegion(LiveRangeEdit & LREdit,ArrayRef<unsigned> UsedCands)870 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
871 ArrayRef<unsigned> UsedCands) {
872 // These are the intervals created for new global ranges. We may create more
873 // intervals for local ranges.
874 const unsigned NumGlobalIntvs = LREdit.size();
875 LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
876 << " globals.\n");
877 assert(NumGlobalIntvs && "No global intervals configured");
878
879 // Isolate even single instructions when dealing with a proper sub-class.
880 // That guarantees register class inflation for the stack interval because it
881 // is all copies.
882 Register Reg = SA->getParent().reg();
883 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
884
885 // First handle all the blocks with uses.
886 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
887 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
888 unsigned Number = BI.MBB->getNumber();
889 unsigned IntvIn = 0, IntvOut = 0;
890 SlotIndex IntfIn, IntfOut;
891 if (BI.LiveIn) {
892 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
893 if (CandIn != NoCand) {
894 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
895 IntvIn = Cand.IntvIdx;
896 Cand.Intf.moveToBlock(Number);
897 IntfIn = Cand.Intf.first();
898 }
899 }
900 if (BI.LiveOut) {
901 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
902 if (CandOut != NoCand) {
903 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
904 IntvOut = Cand.IntvIdx;
905 Cand.Intf.moveToBlock(Number);
906 IntfOut = Cand.Intf.last();
907 }
908 }
909
910 // Create separate intervals for isolated blocks with multiple uses.
911 if (!IntvIn && !IntvOut) {
912 LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
913 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
914 SE->splitSingleBlock(BI);
915 continue;
916 }
917
918 if (IntvIn && IntvOut)
919 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
920 else if (IntvIn)
921 SE->splitRegInBlock(BI, IntvIn, IntfIn);
922 else
923 SE->splitRegOutBlock(BI, IntvOut, IntfOut);
924 }
925
926 // Handle live-through blocks. The relevant live-through blocks are stored in
927 // the ActiveBlocks list with each candidate. We need to filter out
928 // duplicates.
929 BitVector Todo = SA->getThroughBlocks();
930 for (unsigned UsedCand : UsedCands) {
931 ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
932 for (unsigned Number : Blocks) {
933 if (!Todo.test(Number))
934 continue;
935 Todo.reset(Number);
936
937 unsigned IntvIn = 0, IntvOut = 0;
938 SlotIndex IntfIn, IntfOut;
939
940 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
941 if (CandIn != NoCand) {
942 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
943 IntvIn = Cand.IntvIdx;
944 Cand.Intf.moveToBlock(Number);
945 IntfIn = Cand.Intf.first();
946 }
947
948 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
949 if (CandOut != NoCand) {
950 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
951 IntvOut = Cand.IntvIdx;
952 Cand.Intf.moveToBlock(Number);
953 IntfOut = Cand.Intf.last();
954 }
955 if (!IntvIn && !IntvOut)
956 continue;
957 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
958 }
959 }
960
961 ++NumGlobalSplits;
962
963 SmallVector<unsigned, 8> IntvMap;
964 SE->finish(&IntvMap);
965 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
966
967 unsigned OrigBlocks = SA->getNumLiveBlocks();
968
969 // Sort out the new intervals created by splitting. We get four kinds:
970 // - Remainder intervals should not be split again.
971 // - Candidate intervals can be assigned to Cand.PhysReg.
972 // - Block-local splits are candidates for local splitting.
973 // - DCE leftovers should go back on the queue.
974 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
975 const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
976
977 // Ignore old intervals from DCE.
978 if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
979 continue;
980
981 // Remainder interval. Don't try splitting again, spill if it doesn't
982 // allocate.
983 if (IntvMap[I] == 0) {
984 ExtraInfo->setStage(Reg, RS_Spill);
985 continue;
986 }
987
988 // Global intervals. Allow repeated splitting as long as the number of live
989 // blocks is strictly decreasing.
990 if (IntvMap[I] < NumGlobalIntvs) {
991 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
992 LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
993 << " blocks as original.\n");
994 // Don't allow repeated splitting as a safe guard against looping.
995 ExtraInfo->setStage(Reg, RS_Split2);
996 }
997 continue;
998 }
999
1000 // Other intervals are treated as new. This includes local intervals created
1001 // for blocks with multiple uses, and anything created by DCE.
1002 }
1003
1004 if (VerifyEnabled)
1005 MF->verify(this, "After splitting live range around region");
1006 }
1007
tryRegionSplit(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs)1008 MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
1009 AllocationOrder &Order,
1010 SmallVectorImpl<Register> &NewVRegs) {
1011 if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1012 return MCRegister::NoRegister;
1013 unsigned NumCands = 0;
1014 BlockFrequency SpillCost = calcSpillCost();
1015 BlockFrequency BestCost;
1016
1017 // Check if we can split this live range around a compact region.
1018 bool HasCompact = calcCompactRegion(GlobalCand.front());
1019 if (HasCompact) {
1020 // Yes, keep GlobalCand[0] as the compact region candidate.
1021 NumCands = 1;
1022 BestCost = BlockFrequency::getMaxFrequency();
1023 } else {
1024 // No benefit from the compact region, our fallback will be per-block
1025 // splitting. Make sure we find a solution that is cheaper than spilling.
1026 BestCost = SpillCost;
1027 LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = ";
1028 MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
1029 }
1030
1031 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
1032 NumCands, false /*IgnoreCSR*/);
1033
1034 // No solutions found, fall back to single block splitting.
1035 if (!HasCompact && BestCand == NoCand)
1036 return MCRegister::NoRegister;
1037
1038 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1039 }
1040
calculateRegionSplitCost(const LiveInterval & VirtReg,AllocationOrder & Order,BlockFrequency & BestCost,unsigned & NumCands,bool IgnoreCSR)1041 unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
1042 AllocationOrder &Order,
1043 BlockFrequency &BestCost,
1044 unsigned &NumCands,
1045 bool IgnoreCSR) {
1046 unsigned BestCand = NoCand;
1047 for (MCPhysReg PhysReg : Order) {
1048 assert(PhysReg);
1049 if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
1050 continue;
1051
1052 // Discard bad candidates before we run out of interference cache cursors.
1053 // This will only affect register classes with a lot of registers (>32).
1054 if (NumCands == IntfCache.getMaxCursors()) {
1055 unsigned WorstCount = ~0u;
1056 unsigned Worst = 0;
1057 for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1058 if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1059 continue;
1060 unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1061 if (Count < WorstCount) {
1062 Worst = CandIndex;
1063 WorstCount = Count;
1064 }
1065 }
1066 --NumCands;
1067 GlobalCand[Worst] = GlobalCand[NumCands];
1068 if (BestCand == NumCands)
1069 BestCand = Worst;
1070 }
1071
1072 if (GlobalCand.size() <= NumCands)
1073 GlobalCand.resize(NumCands+1);
1074 GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1075 Cand.reset(IntfCache, PhysReg);
1076
1077 SpillPlacer->prepare(Cand.LiveBundles);
1078 BlockFrequency Cost;
1079 if (!addSplitConstraints(Cand.Intf, Cost)) {
1080 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1081 continue;
1082 }
1083 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
1084 MBFI->printBlockFreq(dbgs(), Cost));
1085 if (Cost >= BestCost) {
1086 LLVM_DEBUG({
1087 if (BestCand == NoCand)
1088 dbgs() << " worse than no bundles\n";
1089 else
1090 dbgs() << " worse than "
1091 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1092 });
1093 continue;
1094 }
1095 if (!growRegion(Cand)) {
1096 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1097 continue;
1098 }
1099
1100 SpillPlacer->finish();
1101
1102 // No live bundles, defer to splitSingleBlocks().
1103 if (!Cand.LiveBundles.any()) {
1104 LLVM_DEBUG(dbgs() << " no bundles.\n");
1105 continue;
1106 }
1107
1108 Cost += calcGlobalSplitCost(Cand, Order);
1109 LLVM_DEBUG({
1110 dbgs() << ", total = ";
1111 MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
1112 for (int I : Cand.LiveBundles.set_bits())
1113 dbgs() << " EB#" << I;
1114 dbgs() << ".\n";
1115 });
1116 if (Cost < BestCost) {
1117 BestCand = NumCands;
1118 BestCost = Cost;
1119 }
1120 ++NumCands;
1121 }
1122
1123 return BestCand;
1124 }
1125
doRegionSplit(const LiveInterval & VirtReg,unsigned BestCand,bool HasCompact,SmallVectorImpl<Register> & NewVRegs)1126 unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
1127 bool HasCompact,
1128 SmallVectorImpl<Register> &NewVRegs) {
1129 SmallVector<unsigned, 8> UsedCands;
1130 // Prepare split editor.
1131 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1132 SE->reset(LREdit, SplitSpillMode);
1133
1134 // Assign all edge bundles to the preferred candidate, or NoCand.
1135 BundleCand.assign(Bundles->getNumBundles(), NoCand);
1136
1137 // Assign bundles for the best candidate region.
1138 if (BestCand != NoCand) {
1139 GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1140 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1141 UsedCands.push_back(BestCand);
1142 Cand.IntvIdx = SE->openIntv();
1143 LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1144 << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1145 (void)B;
1146 }
1147 }
1148
1149 // Assign bundles for the compact region.
1150 if (HasCompact) {
1151 GlobalSplitCandidate &Cand = GlobalCand.front();
1152 assert(!Cand.PhysReg && "Compact region has no physreg");
1153 if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1154 UsedCands.push_back(0);
1155 Cand.IntvIdx = SE->openIntv();
1156 LLVM_DEBUG(dbgs() << "Split for compact region in " << B
1157 << " bundles, intv " << Cand.IntvIdx << ".\n");
1158 (void)B;
1159 }
1160 }
1161
1162 splitAroundRegion(LREdit, UsedCands);
1163 return 0;
1164 }
1165
1166 //===----------------------------------------------------------------------===//
1167 // Per-Block Splitting
1168 //===----------------------------------------------------------------------===//
1169
1170 /// tryBlockSplit - Split a global live range around every block with uses. This
1171 /// creates a lot of local live ranges, that will be split by tryLocalSplit if
1172 /// they don't allocate.
tryBlockSplit(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs)1173 unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
1174 AllocationOrder &Order,
1175 SmallVectorImpl<Register> &NewVRegs) {
1176 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1177 Register Reg = VirtReg.reg();
1178 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1179 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1180 SE->reset(LREdit, SplitSpillMode);
1181 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1182 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1183 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1184 SE->splitSingleBlock(BI);
1185 }
1186 // No blocks were split.
1187 if (LREdit.empty())
1188 return 0;
1189
1190 // We did split for some blocks.
1191 SmallVector<unsigned, 8> IntvMap;
1192 SE->finish(&IntvMap);
1193
1194 // Tell LiveDebugVariables about the new ranges.
1195 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1196
1197 // Sort out the new intervals created by splitting. The remainder interval
1198 // goes straight to spilling, the new local ranges get to stay RS_New.
1199 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1200 const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
1201 if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
1202 ExtraInfo->setStage(LI, RS_Spill);
1203 }
1204
1205 if (VerifyEnabled)
1206 MF->verify(this, "After splitting live range around basic blocks");
1207 return 0;
1208 }
1209
1210 //===----------------------------------------------------------------------===//
1211 // Per-Instruction Splitting
1212 //===----------------------------------------------------------------------===//
1213
1214 /// Get the number of allocatable registers that match the constraints of \p Reg
1215 /// on \p MI and that are also in \p SuperRC.
getNumAllocatableRegsForConstraints(const MachineInstr * MI,Register Reg,const TargetRegisterClass * SuperRC,const TargetInstrInfo * TII,const TargetRegisterInfo * TRI,const RegisterClassInfo & RCI)1216 static unsigned getNumAllocatableRegsForConstraints(
1217 const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
1218 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1219 const RegisterClassInfo &RCI) {
1220 assert(SuperRC && "Invalid register class");
1221
1222 const TargetRegisterClass *ConstrainedRC =
1223 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1224 /* ExploreBundle */ true);
1225 if (!ConstrainedRC)
1226 return 0;
1227 return RCI.getNumAllocatableRegs(ConstrainedRC);
1228 }
1229
1230 /// tryInstructionSplit - Split a live range around individual instructions.
1231 /// This is normally not worthwhile since the spiller is doing essentially the
1232 /// same thing. However, when the live range is in a constrained register
1233 /// class, it may help to insert copies such that parts of the live range can
1234 /// be moved to a larger register class.
1235 ///
1236 /// This is similar to spilling to a larger register class.
tryInstructionSplit(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs)1237 unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
1238 AllocationOrder &Order,
1239 SmallVectorImpl<Register> &NewVRegs) {
1240 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1241 // There is no point to this if there are no larger sub-classes.
1242 if (!RegClassInfo.isProperSubClass(CurRC))
1243 return 0;
1244
1245 // Always enable split spill mode, since we're effectively spilling to a
1246 // register.
1247 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1248 SE->reset(LREdit, SplitEditor::SM_Size);
1249
1250 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1251 if (Uses.size() <= 1)
1252 return 0;
1253
1254 LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
1255 << " individual instrs.\n");
1256
1257 const TargetRegisterClass *SuperRC =
1258 TRI->getLargestLegalSuperClass(CurRC, *MF);
1259 unsigned SuperRCNumAllocatableRegs =
1260 RegClassInfo.getNumAllocatableRegs(SuperRC);
1261 // Split around every non-copy instruction if this split will relax
1262 // the constraints on the virtual register.
1263 // Otherwise, splitting just inserts uncoalescable copies that do not help
1264 // the allocation.
1265 for (const SlotIndex Use : Uses) {
1266 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use))
1267 if (MI->isFullCopy() ||
1268 SuperRCNumAllocatableRegs ==
1269 getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
1270 TII, TRI, RegClassInfo)) {
1271 LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
1272 continue;
1273 }
1274 SE->openIntv();
1275 SlotIndex SegStart = SE->enterIntvBefore(Use);
1276 SlotIndex SegStop = SE->leaveIntvAfter(Use);
1277 SE->useIntv(SegStart, SegStop);
1278 }
1279
1280 if (LREdit.empty()) {
1281 LLVM_DEBUG(dbgs() << "All uses were copies.\n");
1282 return 0;
1283 }
1284
1285 SmallVector<unsigned, 8> IntvMap;
1286 SE->finish(&IntvMap);
1287 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1288 // Assign all new registers to RS_Spill. This was the last chance.
1289 ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1290 return 0;
1291 }
1292
1293 //===----------------------------------------------------------------------===//
1294 // Local Splitting
1295 //===----------------------------------------------------------------------===//
1296
1297 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1298 /// in order to use PhysReg between two entries in SA->UseSlots.
1299 ///
1300 /// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
1301 ///
calcGapWeights(MCRegister PhysReg,SmallVectorImpl<float> & GapWeight)1302 void RAGreedy::calcGapWeights(MCRegister PhysReg,
1303 SmallVectorImpl<float> &GapWeight) {
1304 assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1305 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1306 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1307 const unsigned NumGaps = Uses.size()-1;
1308
1309 // Start and end points for the interference check.
1310 SlotIndex StartIdx =
1311 BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
1312 SlotIndex StopIdx =
1313 BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
1314
1315 GapWeight.assign(NumGaps, 0.0f);
1316
1317 // Add interference from each overlapping register.
1318 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1319 if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
1320 .checkInterference())
1321 continue;
1322
1323 // We know that VirtReg is a continuous interval from FirstInstr to
1324 // LastInstr, so we don't need InterferenceQuery.
1325 //
1326 // Interference that overlaps an instruction is counted in both gaps
1327 // surrounding the instruction. The exception is interference before
1328 // StartIdx and after StopIdx.
1329 //
1330 LiveIntervalUnion::SegmentIter IntI =
1331 Matrix->getLiveUnions()[*Units] .find(StartIdx);
1332 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1333 // Skip the gaps before IntI.
1334 while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1335 if (++Gap == NumGaps)
1336 break;
1337 if (Gap == NumGaps)
1338 break;
1339
1340 // Update the gaps covered by IntI.
1341 const float weight = IntI.value()->weight();
1342 for (; Gap != NumGaps; ++Gap) {
1343 GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1344 if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1345 break;
1346 }
1347 if (Gap == NumGaps)
1348 break;
1349 }
1350 }
1351
1352 // Add fixed interference.
1353 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1354 const LiveRange &LR = LIS->getRegUnit(*Units);
1355 LiveRange::const_iterator I = LR.find(StartIdx);
1356 LiveRange::const_iterator E = LR.end();
1357
1358 // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1359 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1360 while (Uses[Gap+1].getBoundaryIndex() < I->start)
1361 if (++Gap == NumGaps)
1362 break;
1363 if (Gap == NumGaps)
1364 break;
1365
1366 for (; Gap != NumGaps; ++Gap) {
1367 GapWeight[Gap] = huge_valf;
1368 if (Uses[Gap+1].getBaseIndex() >= I->end)
1369 break;
1370 }
1371 if (Gap == NumGaps)
1372 break;
1373 }
1374 }
1375 }
1376
1377 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1378 /// basic block.
1379 ///
tryLocalSplit(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs)1380 unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
1381 AllocationOrder &Order,
1382 SmallVectorImpl<Register> &NewVRegs) {
1383 // TODO: the function currently only handles a single UseBlock; it should be
1384 // possible to generalize.
1385 if (SA->getUseBlocks().size() != 1)
1386 return 0;
1387
1388 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1389
1390 // Note that it is possible to have an interval that is live-in or live-out
1391 // while only covering a single block - A phi-def can use undef values from
1392 // predecessors, and the block could be a single-block loop.
1393 // We don't bother doing anything clever about such a case, we simply assume
1394 // that the interval is continuous from FirstInstr to LastInstr. We should
1395 // make sure that we don't do anything illegal to such an interval, though.
1396
1397 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1398 if (Uses.size() <= 2)
1399 return 0;
1400 const unsigned NumGaps = Uses.size()-1;
1401
1402 LLVM_DEBUG({
1403 dbgs() << "tryLocalSplit: ";
1404 for (const auto &Use : Uses)
1405 dbgs() << ' ' << Use;
1406 dbgs() << '\n';
1407 });
1408
1409 // If VirtReg is live across any register mask operands, compute a list of
1410 // gaps with register masks.
1411 SmallVector<unsigned, 8> RegMaskGaps;
1412 if (Matrix->checkRegMaskInterference(VirtReg)) {
1413 // Get regmask slots for the whole block.
1414 ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
1415 LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1416 // Constrain to VirtReg's live range.
1417 unsigned RI =
1418 llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
1419 unsigned RE = RMS.size();
1420 for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
1421 // Look for Uses[I] <= RMS <= Uses[I + 1].
1422 assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I]));
1423 if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
1424 continue;
1425 // Skip a regmask on the same instruction as the last use. It doesn't
1426 // overlap the live range.
1427 if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
1428 break;
1429 LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
1430 << Uses[I + 1]);
1431 RegMaskGaps.push_back(I);
1432 // Advance ri to the next gap. A regmask on one of the uses counts in
1433 // both gaps.
1434 while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
1435 ++RI;
1436 }
1437 LLVM_DEBUG(dbgs() << '\n');
1438 }
1439
1440 // Since we allow local split results to be split again, there is a risk of
1441 // creating infinite loops. It is tempting to require that the new live
1442 // ranges have less instructions than the original. That would guarantee
1443 // convergence, but it is too strict. A live range with 3 instructions can be
1444 // split 2+3 (including the COPY), and we want to allow that.
1445 //
1446 // Instead we use these rules:
1447 //
1448 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1449 // noop split, of course).
1450 // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1451 // the new ranges must have fewer instructions than before the split.
1452 // 3. New ranges with the same number of instructions are marked RS_Split2,
1453 // smaller ranges are marked RS_New.
1454 //
1455 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1456 // excessive splitting and infinite loops.
1457 //
1458 bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
1459
1460 // Best split candidate.
1461 unsigned BestBefore = NumGaps;
1462 unsigned BestAfter = 0;
1463 float BestDiff = 0;
1464
1465 const float blockFreq =
1466 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1467 (1.0f / MBFI->getEntryFreq());
1468 SmallVector<float, 8> GapWeight;
1469
1470 for (MCPhysReg PhysReg : Order) {
1471 assert(PhysReg);
1472 // Keep track of the largest spill weight that would need to be evicted in
1473 // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
1474 calcGapWeights(PhysReg, GapWeight);
1475
1476 // Remove any gaps with regmask clobbers.
1477 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1478 for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
1479 GapWeight[RegMaskGaps[I]] = huge_valf;
1480
1481 // Try to find the best sequence of gaps to close.
1482 // The new spill weight must be larger than any gap interference.
1483
1484 // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1485 unsigned SplitBefore = 0, SplitAfter = 1;
1486
1487 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1488 // It is the spill weight that needs to be evicted.
1489 float MaxGap = GapWeight[0];
1490
1491 while (true) {
1492 // Live before/after split?
1493 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1494 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1495
1496 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
1497 << '-' << Uses[SplitAfter] << " I=" << MaxGap);
1498
1499 // Stop before the interval gets so big we wouldn't be making progress.
1500 if (!LiveBefore && !LiveAfter) {
1501 LLVM_DEBUG(dbgs() << " all\n");
1502 break;
1503 }
1504 // Should the interval be extended or shrunk?
1505 bool Shrink = true;
1506
1507 // How many gaps would the new range have?
1508 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1509
1510 // Legally, without causing looping?
1511 bool Legal = !ProgressRequired || NewGaps < NumGaps;
1512
1513 if (Legal && MaxGap < huge_valf) {
1514 // Estimate the new spill weight. Each instruction reads or writes the
1515 // register. Conservatively assume there are no read-modify-write
1516 // instructions.
1517 //
1518 // Try to guess the size of the new interval.
1519 const float EstWeight = normalizeSpillWeight(
1520 blockFreq * (NewGaps + 1),
1521 Uses[SplitBefore].distance(Uses[SplitAfter]) +
1522 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1523 1);
1524 // Would this split be possible to allocate?
1525 // Never allocate all gaps, we wouldn't be making progress.
1526 LLVM_DEBUG(dbgs() << " w=" << EstWeight);
1527 if (EstWeight * Hysteresis >= MaxGap) {
1528 Shrink = false;
1529 float Diff = EstWeight - MaxGap;
1530 if (Diff > BestDiff) {
1531 LLVM_DEBUG(dbgs() << " (best)");
1532 BestDiff = Hysteresis * Diff;
1533 BestBefore = SplitBefore;
1534 BestAfter = SplitAfter;
1535 }
1536 }
1537 }
1538
1539 // Try to shrink.
1540 if (Shrink) {
1541 if (++SplitBefore < SplitAfter) {
1542 LLVM_DEBUG(dbgs() << " shrink\n");
1543 // Recompute the max when necessary.
1544 if (GapWeight[SplitBefore - 1] >= MaxGap) {
1545 MaxGap = GapWeight[SplitBefore];
1546 for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
1547 MaxGap = std::max(MaxGap, GapWeight[I]);
1548 }
1549 continue;
1550 }
1551 MaxGap = 0;
1552 }
1553
1554 // Try to extend the interval.
1555 if (SplitAfter >= NumGaps) {
1556 LLVM_DEBUG(dbgs() << " end\n");
1557 break;
1558 }
1559
1560 LLVM_DEBUG(dbgs() << " extend\n");
1561 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1562 }
1563 }
1564
1565 // Didn't find any candidates?
1566 if (BestBefore == NumGaps)
1567 return 0;
1568
1569 LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
1570 << Uses[BestAfter] << ", " << BestDiff << ", "
1571 << (BestAfter - BestBefore + 1) << " instrs\n");
1572
1573 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1574 SE->reset(LREdit);
1575
1576 SE->openIntv();
1577 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1578 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
1579 SE->useIntv(SegStart, SegStop);
1580 SmallVector<unsigned, 8> IntvMap;
1581 SE->finish(&IntvMap);
1582 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1583 // If the new range has the same number of instructions as before, mark it as
1584 // RS_Split2 so the next split will be forced to make progress. Otherwise,
1585 // leave the new intervals as RS_New so they can compete.
1586 bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1587 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1588 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1589 if (NewGaps >= NumGaps) {
1590 LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
1591 assert(!ProgressRequired && "Didn't make progress when it was required.");
1592 for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
1593 if (IntvMap[I] == 1) {
1594 ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
1595 LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
1596 }
1597 LLVM_DEBUG(dbgs() << '\n');
1598 }
1599 ++NumLocalSplits;
1600
1601 return 0;
1602 }
1603
1604 //===----------------------------------------------------------------------===//
1605 // Live Range Splitting
1606 //===----------------------------------------------------------------------===//
1607
1608 /// trySplit - Try to split VirtReg or one of its interferences, making it
1609 /// assignable.
1610 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
trySplit(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs,const SmallVirtRegSet & FixedRegisters)1611 unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
1612 SmallVectorImpl<Register> &NewVRegs,
1613 const SmallVirtRegSet &FixedRegisters) {
1614 // Ranges must be Split2 or less.
1615 if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
1616 return 0;
1617
1618 // Local intervals are handled separately.
1619 if (LIS->intervalIsInOneMBB(VirtReg)) {
1620 NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
1621 TimerGroupDescription, TimePassesIsEnabled);
1622 SA->analyze(&VirtReg);
1623 Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1624 if (PhysReg || !NewVRegs.empty())
1625 return PhysReg;
1626 return tryInstructionSplit(VirtReg, Order, NewVRegs);
1627 }
1628
1629 NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
1630 TimerGroupDescription, TimePassesIsEnabled);
1631
1632 SA->analyze(&VirtReg);
1633
1634 // First try to split around a region spanning multiple blocks. RS_Split2
1635 // ranges already made dubious progress with region splitting, so they go
1636 // straight to single block splitting.
1637 if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
1638 MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1639 if (PhysReg || !NewVRegs.empty())
1640 return PhysReg;
1641 }
1642
1643 // Then isolate blocks.
1644 return tryBlockSplit(VirtReg, Order, NewVRegs);
1645 }
1646
1647 //===----------------------------------------------------------------------===//
1648 // Last Chance Recoloring
1649 //===----------------------------------------------------------------------===//
1650
1651 /// Return true if \p reg has any tied def operand.
hasTiedDef(MachineRegisterInfo * MRI,unsigned reg)1652 static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
1653 for (const MachineOperand &MO : MRI->def_operands(reg))
1654 if (MO.isTied())
1655 return true;
1656
1657 return false;
1658 }
1659
1660 /// Return true if the existing assignment of \p Intf overlaps, but is not the
1661 /// same, as \p PhysReg.
assignedRegPartiallyOverlaps(const TargetRegisterInfo & TRI,const VirtRegMap & VRM,MCRegister PhysReg,const LiveInterval & Intf)1662 static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
1663 const VirtRegMap &VRM,
1664 MCRegister PhysReg,
1665 const LiveInterval &Intf) {
1666 MCRegister AssignedReg = VRM.getPhys(Intf.reg());
1667 if (PhysReg == AssignedReg)
1668 return false;
1669 return TRI.regsOverlap(PhysReg, AssignedReg);
1670 }
1671
1672 /// mayRecolorAllInterferences - Check if the virtual registers that
1673 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
1674 /// recolored to free \p PhysReg.
1675 /// When true is returned, \p RecoloringCandidates has been augmented with all
1676 /// the live intervals that need to be recolored in order to free \p PhysReg
1677 /// for \p VirtReg.
1678 /// \p FixedRegisters contains all the virtual registers that cannot be
1679 /// recolored.
mayRecolorAllInterferences(MCRegister PhysReg,const LiveInterval & VirtReg,SmallLISet & RecoloringCandidates,const SmallVirtRegSet & FixedRegisters)1680 bool RAGreedy::mayRecolorAllInterferences(
1681 MCRegister PhysReg, const LiveInterval &VirtReg,
1682 SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
1683 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1684
1685 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1686 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
1687 // If there is LastChanceRecoloringMaxInterference or more interferences,
1688 // chances are one would not be recolorable.
1689 if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
1690 LastChanceRecoloringMaxInterference &&
1691 !ExhaustiveSearch) {
1692 LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
1693 CutOffInfo |= CO_Interf;
1694 return false;
1695 }
1696 for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
1697 // If Intf is done and sits on the same register class as VirtReg, it
1698 // would not be recolorable as it is in the same state as
1699 // VirtReg. However there are at least two exceptions.
1700 //
1701 // If VirtReg has tied defs and Intf doesn't, then
1702 // there is still a point in examining if it can be recolorable.
1703 //
1704 // Additionally, if the register class has overlapping tuple members, it
1705 // may still be recolorable using a different tuple. This is more likely
1706 // if the existing assignment aliases with the candidate.
1707 //
1708 if (((ExtraInfo->getStage(*Intf) == RS_Done &&
1709 MRI->getRegClass(Intf->reg()) == CurRC &&
1710 !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
1711 !(hasTiedDef(MRI, VirtReg.reg()) &&
1712 !hasTiedDef(MRI, Intf->reg()))) ||
1713 FixedRegisters.count(Intf->reg())) {
1714 LLVM_DEBUG(
1715 dbgs() << "Early abort: the interference is not recolorable.\n");
1716 return false;
1717 }
1718 RecoloringCandidates.insert(Intf);
1719 }
1720 }
1721 return true;
1722 }
1723
1724 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
1725 /// its interferences.
1726 /// Last chance recoloring chooses a color for \p VirtReg and recolors every
1727 /// virtual register that was using it. The recoloring process may recursively
1728 /// use the last chance recoloring. Therefore, when a virtual register has been
1729 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
1730 /// be last-chance-recolored again during this recoloring "session".
1731 /// E.g.,
1732 /// Let
1733 /// vA can use {R1, R2 }
1734 /// vB can use { R2, R3}
1735 /// vC can use {R1 }
1736 /// Where vA, vB, and vC cannot be split anymore (they are reloads for
1737 /// instance) and they all interfere.
1738 ///
1739 /// vA is assigned R1
1740 /// vB is assigned R2
1741 /// vC tries to evict vA but vA is already done.
1742 /// Regular register allocation fails.
1743 ///
1744 /// Last chance recoloring kicks in:
1745 /// vC does as if vA was evicted => vC uses R1.
1746 /// vC is marked as fixed.
1747 /// vA needs to find a color.
1748 /// None are available.
1749 /// vA cannot evict vC: vC is a fixed virtual register now.
1750 /// vA does as if vB was evicted => vA uses R2.
1751 /// vB needs to find a color.
1752 /// R3 is available.
1753 /// Recoloring => vC = R1, vA = R2, vB = R3
1754 ///
1755 /// \p Order defines the preferred allocation order for \p VirtReg.
1756 /// \p NewRegs will contain any new virtual register that have been created
1757 /// (split, spill) during the process and that must be assigned.
1758 /// \p FixedRegisters contains all the virtual registers that cannot be
1759 /// recolored.
1760 ///
1761 /// \p RecolorStack tracks the original assignments of successfully recolored
1762 /// registers.
1763 ///
1764 /// \p Depth gives the current depth of the last chance recoloring.
1765 /// \return a physical register that can be used for VirtReg or ~0u if none
1766 /// exists.
tryLastChanceRecoloring(const LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs,SmallVirtRegSet & FixedRegisters,RecoloringStack & RecolorStack,unsigned Depth)1767 unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
1768 AllocationOrder &Order,
1769 SmallVectorImpl<Register> &NewVRegs,
1770 SmallVirtRegSet &FixedRegisters,
1771 RecoloringStack &RecolorStack,
1772 unsigned Depth) {
1773 if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
1774 return ~0u;
1775
1776 LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
1777
1778 const ssize_t EntryStackSize = RecolorStack.size();
1779
1780 // Ranges must be Done.
1781 assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
1782 "Last chance recoloring should really be last chance");
1783 // Set the max depth to LastChanceRecoloringMaxDepth.
1784 // We may want to reconsider that if we end up with a too large search space
1785 // for target with hundreds of registers.
1786 // Indeed, in that case we may want to cut the search space earlier.
1787 if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
1788 LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
1789 CutOffInfo |= CO_Depth;
1790 return ~0u;
1791 }
1792
1793 // Set of Live intervals that will need to be recolored.
1794 SmallLISet RecoloringCandidates;
1795
1796 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
1797 // this recoloring "session".
1798 assert(!FixedRegisters.count(VirtReg.reg()));
1799 FixedRegisters.insert(VirtReg.reg());
1800 SmallVector<Register, 4> CurrentNewVRegs;
1801
1802 for (MCRegister PhysReg : Order) {
1803 assert(PhysReg.isValid());
1804 LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
1805 << printReg(PhysReg, TRI) << '\n');
1806 RecoloringCandidates.clear();
1807 CurrentNewVRegs.clear();
1808
1809 // It is only possible to recolor virtual register interference.
1810 if (Matrix->checkInterference(VirtReg, PhysReg) >
1811 LiveRegMatrix::IK_VirtReg) {
1812 LLVM_DEBUG(
1813 dbgs() << "Some interferences are not with virtual registers.\n");
1814
1815 continue;
1816 }
1817
1818 // Early give up on this PhysReg if it is obvious we cannot recolor all
1819 // the interferences.
1820 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
1821 FixedRegisters)) {
1822 LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
1823 continue;
1824 }
1825
1826 // RecoloringCandidates contains all the virtual registers that interfere
1827 // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
1828 // recoloring and perform the actual recoloring.
1829 PQueue RecoloringQueue;
1830 for (const LiveInterval *RC : RecoloringCandidates) {
1831 Register ItVirtReg = RC->reg();
1832 enqueue(RecoloringQueue, RC);
1833 assert(VRM->hasPhys(ItVirtReg) &&
1834 "Interferences are supposed to be with allocated variables");
1835
1836 // Record the current allocation.
1837 RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
1838
1839 // unset the related struct.
1840 Matrix->unassign(*RC);
1841 }
1842
1843 // Do as if VirtReg was assigned to PhysReg so that the underlying
1844 // recoloring has the right information about the interferes and
1845 // available colors.
1846 Matrix->assign(VirtReg, PhysReg);
1847
1848 // Save the current recoloring state.
1849 // If we cannot recolor all the interferences, we will have to start again
1850 // at this point for the next physical register.
1851 SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
1852 if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
1853 FixedRegisters, RecolorStack, Depth)) {
1854 // Push the queued vregs into the main queue.
1855 for (Register NewVReg : CurrentNewVRegs)
1856 NewVRegs.push_back(NewVReg);
1857 // Do not mess up with the global assignment process.
1858 // I.e., VirtReg must be unassigned.
1859 Matrix->unassign(VirtReg);
1860 return PhysReg;
1861 }
1862
1863 LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
1864 << printReg(PhysReg, TRI) << '\n');
1865
1866 // The recoloring attempt failed, undo the changes.
1867 FixedRegisters = SaveFixedRegisters;
1868 Matrix->unassign(VirtReg);
1869
1870 // For a newly created vreg which is also in RecoloringCandidates,
1871 // don't add it to NewVRegs because its physical register will be restored
1872 // below. Other vregs in CurrentNewVRegs are created by calling
1873 // selectOrSplit and should be added into NewVRegs.
1874 for (Register &R : CurrentNewVRegs) {
1875 if (RecoloringCandidates.count(&LIS->getInterval(R)))
1876 continue;
1877 NewVRegs.push_back(R);
1878 }
1879
1880 // Roll back our unsuccessful recoloring. Also roll back any successful
1881 // recolorings in any recursive recoloring attempts, since it's possible
1882 // they would have introduced conflicts with assignments we will be
1883 // restoring further up the stack. Perform all unassignments prior to
1884 // reassigning, since sub-recolorings may have conflicted with the registers
1885 // we are going to restore to their original assignments.
1886 for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
1887 const LiveInterval *LI;
1888 MCRegister PhysReg;
1889 std::tie(LI, PhysReg) = RecolorStack[I];
1890
1891 if (VRM->hasPhys(LI->reg()))
1892 Matrix->unassign(*LI);
1893 }
1894
1895 for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
1896 const LiveInterval *LI;
1897 MCRegister PhysReg;
1898 std::tie(LI, PhysReg) = RecolorStack[I];
1899 if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
1900 Matrix->assign(*LI, PhysReg);
1901 }
1902
1903 // Pop the stack of recoloring attempts.
1904 RecolorStack.resize(EntryStackSize);
1905 }
1906
1907 // Last chance recoloring did not worked either, give up.
1908 return ~0u;
1909 }
1910
1911 /// tryRecoloringCandidates - Try to assign a new color to every register
1912 /// in \RecoloringQueue.
1913 /// \p NewRegs will contain any new virtual register created during the
1914 /// recoloring process.
1915 /// \p FixedRegisters[in/out] contains all the registers that have been
1916 /// recolored.
1917 /// \return true if all virtual registers in RecoloringQueue were successfully
1918 /// recolored, false otherwise.
tryRecoloringCandidates(PQueue & RecoloringQueue,SmallVectorImpl<Register> & NewVRegs,SmallVirtRegSet & FixedRegisters,RecoloringStack & RecolorStack,unsigned Depth)1919 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
1920 SmallVectorImpl<Register> &NewVRegs,
1921 SmallVirtRegSet &FixedRegisters,
1922 RecoloringStack &RecolorStack,
1923 unsigned Depth) {
1924 while (!RecoloringQueue.empty()) {
1925 const LiveInterval *LI = dequeue(RecoloringQueue);
1926 LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
1927 MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
1928 RecolorStack, Depth + 1);
1929 // When splitting happens, the live-range may actually be empty.
1930 // In that case, this is okay to continue the recoloring even
1931 // if we did not find an alternative color for it. Indeed,
1932 // there will not be anything to color for LI in the end.
1933 if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
1934 return false;
1935
1936 if (!PhysReg) {
1937 assert(LI->empty() && "Only empty live-range do not require a register");
1938 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
1939 << " succeeded. Empty LI.\n");
1940 continue;
1941 }
1942 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
1943 << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
1944
1945 Matrix->assign(*LI, PhysReg);
1946 FixedRegisters.insert(LI->reg());
1947 }
1948 return true;
1949 }
1950
1951 //===----------------------------------------------------------------------===//
1952 // Main Entry Point
1953 //===----------------------------------------------------------------------===//
1954
selectOrSplit(const LiveInterval & VirtReg,SmallVectorImpl<Register> & NewVRegs)1955 MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg,
1956 SmallVectorImpl<Register> &NewVRegs) {
1957 CutOffInfo = CO_None;
1958 LLVMContext &Ctx = MF->getFunction().getContext();
1959 SmallVirtRegSet FixedRegisters;
1960 RecoloringStack RecolorStack;
1961 MCRegister Reg =
1962 selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
1963 if (Reg == ~0U && (CutOffInfo != CO_None)) {
1964 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
1965 if (CutOffEncountered == CO_Depth)
1966 Ctx.emitError("register allocation failed: maximum depth for recoloring "
1967 "reached. Use -fexhaustive-register-search to skip "
1968 "cutoffs");
1969 else if (CutOffEncountered == CO_Interf)
1970 Ctx.emitError("register allocation failed: maximum interference for "
1971 "recoloring reached. Use -fexhaustive-register-search "
1972 "to skip cutoffs");
1973 else if (CutOffEncountered == (CO_Depth | CO_Interf))
1974 Ctx.emitError("register allocation failed: maximum interference and "
1975 "depth for recoloring reached. Use "
1976 "-fexhaustive-register-search to skip cutoffs");
1977 }
1978 return Reg;
1979 }
1980
1981 /// Using a CSR for the first time has a cost because it causes push|pop
1982 /// to be added to prologue|epilogue. Splitting a cold section of the live
1983 /// range can have lower cost than using the CSR for the first time;
1984 /// Spilling a live range in the cold path can have lower cost than using
1985 /// the CSR for the first time. Returns the physical register if we decide
1986 /// to use the CSR; otherwise return 0.
tryAssignCSRFirstTime(const LiveInterval & VirtReg,AllocationOrder & Order,MCRegister PhysReg,uint8_t & CostPerUseLimit,SmallVectorImpl<Register> & NewVRegs)1987 MCRegister RAGreedy::tryAssignCSRFirstTime(
1988 const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
1989 uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
1990 if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
1991 // We choose spill over using the CSR for the first time if the spill cost
1992 // is lower than CSRCost.
1993 SA->analyze(&VirtReg);
1994 if (calcSpillCost() >= CSRCost)
1995 return PhysReg;
1996
1997 // We are going to spill, set CostPerUseLimit to 1 to make sure that
1998 // we will not use a callee-saved register in tryEvict.
1999 CostPerUseLimit = 1;
2000 return 0;
2001 }
2002 if (ExtraInfo->getStage(VirtReg) < RS_Split) {
2003 // We choose pre-splitting over using the CSR for the first time if
2004 // the cost of splitting is lower than CSRCost.
2005 SA->analyze(&VirtReg);
2006 unsigned NumCands = 0;
2007 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2008 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2009 NumCands, true /*IgnoreCSR*/);
2010 if (BestCand == NoCand)
2011 // Use the CSR if we can't find a region split below CSRCost.
2012 return PhysReg;
2013
2014 // Perform the actual pre-splitting.
2015 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2016 return 0;
2017 }
2018 return PhysReg;
2019 }
2020
aboutToRemoveInterval(const LiveInterval & LI)2021 void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) {
2022 // Do not keep invalid information around.
2023 SetOfBrokenHints.remove(&LI);
2024 }
2025
initializeCSRCost()2026 void RAGreedy::initializeCSRCost() {
2027 // We use the larger one out of the command-line option and the value report
2028 // by TRI.
2029 CSRCost = BlockFrequency(
2030 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2031 if (!CSRCost.getFrequency())
2032 return;
2033
2034 // Raw cost is relative to Entry == 2^14; scale it appropriately.
2035 uint64_t ActualEntry = MBFI->getEntryFreq();
2036 if (!ActualEntry) {
2037 CSRCost = 0;
2038 return;
2039 }
2040 uint64_t FixedEntry = 1 << 14;
2041 if (ActualEntry < FixedEntry)
2042 CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2043 else if (ActualEntry <= UINT32_MAX)
2044 // Invert the fraction and divide.
2045 CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2046 else
2047 // Can't use BranchProbability in general, since it takes 32-bit numbers.
2048 CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
2049 }
2050
2051 /// Collect the hint info for \p Reg.
2052 /// The results are stored into \p Out.
2053 /// \p Out is not cleared before being populated.
collectHintInfo(Register Reg,HintsInfo & Out)2054 void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2055 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2056 if (!Instr.isFullCopy())
2057 continue;
2058 // Look for the other end of the copy.
2059 Register OtherReg = Instr.getOperand(0).getReg();
2060 if (OtherReg == Reg) {
2061 OtherReg = Instr.getOperand(1).getReg();
2062 if (OtherReg == Reg)
2063 continue;
2064 }
2065 // Get the current assignment.
2066 MCRegister OtherPhysReg =
2067 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
2068 // Push the collected information.
2069 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2070 OtherPhysReg));
2071 }
2072 }
2073
2074 /// Using the given \p List, compute the cost of the broken hints if
2075 /// \p PhysReg was used.
2076 /// \return The cost of \p List for \p PhysReg.
getBrokenHintFreq(const HintsInfo & List,MCRegister PhysReg)2077 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2078 MCRegister PhysReg) {
2079 BlockFrequency Cost = 0;
2080 for (const HintInfo &Info : List) {
2081 if (Info.PhysReg != PhysReg)
2082 Cost += Info.Freq;
2083 }
2084 return Cost;
2085 }
2086
2087 /// Using the register assigned to \p VirtReg, try to recolor
2088 /// all the live ranges that are copy-related with \p VirtReg.
2089 /// The recoloring is then propagated to all the live-ranges that have
2090 /// been recolored and so on, until no more copies can be coalesced or
2091 /// it is not profitable.
2092 /// For a given live range, profitability is determined by the sum of the
2093 /// frequencies of the non-identity copies it would introduce with the old
2094 /// and new register.
tryHintRecoloring(const LiveInterval & VirtReg)2095 void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
2096 // We have a broken hint, check if it is possible to fix it by
2097 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2098 // some register and PhysReg may be available for the other live-ranges.
2099 SmallSet<Register, 4> Visited;
2100 SmallVector<unsigned, 2> RecoloringCandidates;
2101 HintsInfo Info;
2102 Register Reg = VirtReg.reg();
2103 MCRegister PhysReg = VRM->getPhys(Reg);
2104 // Start the recoloring algorithm from the input live-interval, then
2105 // it will propagate to the ones that are copy-related with it.
2106 Visited.insert(Reg);
2107 RecoloringCandidates.push_back(Reg);
2108
2109 LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2110 << '(' << printReg(PhysReg, TRI) << ")\n");
2111
2112 do {
2113 Reg = RecoloringCandidates.pop_back_val();
2114
2115 // We cannot recolor physical register.
2116 if (Register::isPhysicalRegister(Reg))
2117 continue;
2118
2119 // This may be a skipped class
2120 if (!VRM->hasPhys(Reg)) {
2121 assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) &&
2122 "We have an unallocated variable which should have been handled");
2123 continue;
2124 }
2125
2126 // Get the live interval mapped with this virtual register to be able
2127 // to check for the interference with the new color.
2128 LiveInterval &LI = LIS->getInterval(Reg);
2129 MCRegister CurrPhys = VRM->getPhys(Reg);
2130 // Check that the new color matches the register class constraints and
2131 // that it is free for this live range.
2132 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2133 Matrix->checkInterference(LI, PhysReg)))
2134 continue;
2135
2136 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2137 << ") is recolorable.\n");
2138
2139 // Gather the hint info.
2140 Info.clear();
2141 collectHintInfo(Reg, Info);
2142 // Check if recoloring the live-range will increase the cost of the
2143 // non-identity copies.
2144 if (CurrPhys != PhysReg) {
2145 LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2146 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2147 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2148 LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
2149 << "\nNew Cost: " << NewCopiesCost.getFrequency()
2150 << '\n');
2151 if (OldCopiesCost < NewCopiesCost) {
2152 LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2153 continue;
2154 }
2155 // At this point, the cost is either cheaper or equal. If it is
2156 // equal, we consider this is profitable because it may expose
2157 // more recoloring opportunities.
2158 LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2159 // Recolor the live-range.
2160 Matrix->unassign(LI);
2161 Matrix->assign(LI, PhysReg);
2162 }
2163 // Push all copy-related live-ranges to keep reconciling the broken
2164 // hints.
2165 for (const HintInfo &HI : Info) {
2166 if (Visited.insert(HI.Reg).second)
2167 RecoloringCandidates.push_back(HI.Reg);
2168 }
2169 } while (!RecoloringCandidates.empty());
2170 }
2171
2172 /// Try to recolor broken hints.
2173 /// Broken hints may be repaired by recoloring when an evicted variable
2174 /// freed up a register for a larger live-range.
2175 /// Consider the following example:
2176 /// BB1:
2177 /// a =
2178 /// b =
2179 /// BB2:
2180 /// ...
2181 /// = b
2182 /// = a
2183 /// Let us assume b gets split:
2184 /// BB1:
2185 /// a =
2186 /// b =
2187 /// BB2:
2188 /// c = b
2189 /// ...
2190 /// d = c
2191 /// = d
2192 /// = a
2193 /// Because of how the allocation work, b, c, and d may be assigned different
2194 /// colors. Now, if a gets evicted later:
2195 /// BB1:
2196 /// a =
2197 /// st a, SpillSlot
2198 /// b =
2199 /// BB2:
2200 /// c = b
2201 /// ...
2202 /// d = c
2203 /// = d
2204 /// e = ld SpillSlot
2205 /// = e
2206 /// This is likely that we can assign the same register for b, c, and d,
2207 /// getting rid of 2 copies.
tryHintsRecoloring()2208 void RAGreedy::tryHintsRecoloring() {
2209 for (const LiveInterval *LI : SetOfBrokenHints) {
2210 assert(Register::isVirtualRegister(LI->reg()) &&
2211 "Recoloring is possible only for virtual registers");
2212 // Some dead defs may be around (e.g., because of debug uses).
2213 // Ignore those.
2214 if (!VRM->hasPhys(LI->reg()))
2215 continue;
2216 tryHintRecoloring(*LI);
2217 }
2218 }
2219
selectOrSplitImpl(const LiveInterval & VirtReg,SmallVectorImpl<Register> & NewVRegs,SmallVirtRegSet & FixedRegisters,RecoloringStack & RecolorStack,unsigned Depth)2220 MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
2221 SmallVectorImpl<Register> &NewVRegs,
2222 SmallVirtRegSet &FixedRegisters,
2223 RecoloringStack &RecolorStack,
2224 unsigned Depth) {
2225 uint8_t CostPerUseLimit = uint8_t(~0u);
2226 // First try assigning a free register.
2227 auto Order =
2228 AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
2229 if (MCRegister PhysReg =
2230 tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
2231 // When NewVRegs is not empty, we may have made decisions such as evicting
2232 // a virtual register, go with the earlier decisions and use the physical
2233 // register.
2234 if (CSRCost.getFrequency() &&
2235 EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
2236 MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2237 CostPerUseLimit, NewVRegs);
2238 if (CSRReg || !NewVRegs.empty())
2239 // Return now if we decide to use a CSR or create new vregs due to
2240 // pre-splitting.
2241 return CSRReg;
2242 } else
2243 return PhysReg;
2244 }
2245
2246 LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
2247 LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
2248 << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
2249
2250 // Try to evict a less worthy live range, but only for ranges from the primary
2251 // queue. The RS_Split ranges already failed to do this, and they should not
2252 // get a second chance until they have been split.
2253 if (Stage != RS_Split)
2254 if (Register PhysReg =
2255 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
2256 FixedRegisters)) {
2257 Register Hint = MRI->getSimpleHint(VirtReg.reg());
2258 // If VirtReg has a hint and that hint is broken record this
2259 // virtual register as a recoloring candidate for broken hint.
2260 // Indeed, since we evicted a variable in its neighborhood it is
2261 // likely we can at least partially recolor some of the
2262 // copy-related live-ranges.
2263 if (Hint && Hint != PhysReg)
2264 SetOfBrokenHints.insert(&VirtReg);
2265 return PhysReg;
2266 }
2267
2268 assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
2269
2270 // The first time we see a live range, don't try to split or spill.
2271 // Wait until the second time, when all smaller ranges have been allocated.
2272 // This gives a better picture of the interference to split around.
2273 if (Stage < RS_Split) {
2274 ExtraInfo->setStage(VirtReg, RS_Split);
2275 LLVM_DEBUG(dbgs() << "wait for second round\n");
2276 NewVRegs.push_back(VirtReg.reg());
2277 return 0;
2278 }
2279
2280 if (Stage < RS_Spill) {
2281 // Try splitting VirtReg or interferences.
2282 unsigned NewVRegSizeBefore = NewVRegs.size();
2283 Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
2284 if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
2285 return PhysReg;
2286 }
2287
2288 // If we couldn't allocate a register from spilling, there is probably some
2289 // invalid inline assembly. The base class will report it.
2290 if (Stage >= RS_Done || !VirtReg.isSpillable()) {
2291 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2292 RecolorStack, Depth);
2293 }
2294
2295 // Finally spill VirtReg itself.
2296 if ((EnableDeferredSpilling ||
2297 TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
2298 ExtraInfo->getStage(VirtReg) < RS_Memory) {
2299 // TODO: This is experimental and in particular, we do not model
2300 // the live range splitting done by spilling correctly.
2301 // We would need a deep integration with the spiller to do the
2302 // right thing here. Anyway, that is still good for early testing.
2303 ExtraInfo->setStage(VirtReg, RS_Memory);
2304 LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
2305 NewVRegs.push_back(VirtReg.reg());
2306 } else {
2307 NamedRegionTimer T("spill", "Spiller", TimerGroupName,
2308 TimerGroupDescription, TimePassesIsEnabled);
2309 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2310 spiller().spill(LRE);
2311 ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2312
2313 // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
2314 // the new regs are kept in LDV (still mapping to the old register), until
2315 // we rewrite spilled locations in LDV at a later stage.
2316 DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
2317
2318 if (VerifyEnabled)
2319 MF->verify(this, "After spilling");
2320 }
2321
2322 // The live virtual register requesting allocation was spilled, so tell
2323 // the caller not to allocate anything during this round.
2324 return 0;
2325 }
2326
report(MachineOptimizationRemarkMissed & R)2327 void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
2328 using namespace ore;
2329 if (Spills) {
2330 R << NV("NumSpills", Spills) << " spills ";
2331 R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
2332 }
2333 if (FoldedSpills) {
2334 R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
2335 R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
2336 << " total folded spills cost ";
2337 }
2338 if (Reloads) {
2339 R << NV("NumReloads", Reloads) << " reloads ";
2340 R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
2341 }
2342 if (FoldedReloads) {
2343 R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
2344 R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
2345 << " total folded reloads cost ";
2346 }
2347 if (ZeroCostFoldedReloads)
2348 R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
2349 << " zero cost folded reloads ";
2350 if (Copies) {
2351 R << NV("NumVRCopies", Copies) << " virtual registers copies ";
2352 R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
2353 }
2354 }
2355
computeStats(MachineBasicBlock & MBB)2356 RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
2357 RAGreedyStats Stats;
2358 const MachineFrameInfo &MFI = MF->getFrameInfo();
2359 int FI;
2360
2361 auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
2362 return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
2363 A->getPseudoValue())->getFrameIndex());
2364 };
2365 auto isPatchpointInstr = [](const MachineInstr &MI) {
2366 return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
2367 MI.getOpcode() == TargetOpcode::STACKMAP ||
2368 MI.getOpcode() == TargetOpcode::STATEPOINT;
2369 };
2370 for (MachineInstr &MI : MBB) {
2371 if (MI.isCopy()) {
2372 MachineOperand &Dest = MI.getOperand(0);
2373 MachineOperand &Src = MI.getOperand(1);
2374 if (Dest.isReg() && Src.isReg() && Dest.getReg().isVirtual() &&
2375 Src.getReg().isVirtual())
2376 ++Stats.Copies;
2377 continue;
2378 }
2379
2380 SmallVector<const MachineMemOperand *, 2> Accesses;
2381 if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2382 ++Stats.Reloads;
2383 continue;
2384 }
2385 if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2386 ++Stats.Spills;
2387 continue;
2388 }
2389 if (TII->hasLoadFromStackSlot(MI, Accesses) &&
2390 llvm::any_of(Accesses, isSpillSlotAccess)) {
2391 if (!isPatchpointInstr(MI)) {
2392 Stats.FoldedReloads += Accesses.size();
2393 continue;
2394 }
2395 // For statepoint there may be folded and zero cost folded stack reloads.
2396 std::pair<unsigned, unsigned> NonZeroCostRange =
2397 TII->getPatchpointUnfoldableRange(MI);
2398 SmallSet<unsigned, 16> FoldedReloads;
2399 SmallSet<unsigned, 16> ZeroCostFoldedReloads;
2400 for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
2401 MachineOperand &MO = MI.getOperand(Idx);
2402 if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
2403 continue;
2404 if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
2405 FoldedReloads.insert(MO.getIndex());
2406 else
2407 ZeroCostFoldedReloads.insert(MO.getIndex());
2408 }
2409 // If stack slot is used in folded reload it is not zero cost then.
2410 for (unsigned Slot : FoldedReloads)
2411 ZeroCostFoldedReloads.erase(Slot);
2412 Stats.FoldedReloads += FoldedReloads.size();
2413 Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
2414 continue;
2415 }
2416 Accesses.clear();
2417 if (TII->hasStoreToStackSlot(MI, Accesses) &&
2418 llvm::any_of(Accesses, isSpillSlotAccess)) {
2419 Stats.FoldedSpills += Accesses.size();
2420 }
2421 }
2422 // Set cost of collected statistic by multiplication to relative frequency of
2423 // this basic block.
2424 float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
2425 Stats.ReloadsCost = RelFreq * Stats.Reloads;
2426 Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
2427 Stats.SpillsCost = RelFreq * Stats.Spills;
2428 Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
2429 Stats.CopiesCost = RelFreq * Stats.Copies;
2430 return Stats;
2431 }
2432
reportStats(MachineLoop * L)2433 RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
2434 RAGreedyStats Stats;
2435
2436 // Sum up the spill and reloads in subloops.
2437 for (MachineLoop *SubLoop : *L)
2438 Stats.add(reportStats(SubLoop));
2439
2440 for (MachineBasicBlock *MBB : L->getBlocks())
2441 // Handle blocks that were not included in subloops.
2442 if (Loops->getLoopFor(MBB) == L)
2443 Stats.add(computeStats(*MBB));
2444
2445 if (!Stats.isEmpty()) {
2446 using namespace ore;
2447
2448 ORE->emit([&]() {
2449 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
2450 L->getStartLoc(), L->getHeader());
2451 Stats.report(R);
2452 R << "generated in loop";
2453 return R;
2454 });
2455 }
2456 return Stats;
2457 }
2458
reportStats()2459 void RAGreedy::reportStats() {
2460 if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
2461 return;
2462 RAGreedyStats Stats;
2463 for (MachineLoop *L : *Loops)
2464 Stats.add(reportStats(L));
2465 // Process non-loop blocks.
2466 for (MachineBasicBlock &MBB : *MF)
2467 if (!Loops->getLoopFor(&MBB))
2468 Stats.add(computeStats(MBB));
2469 if (!Stats.isEmpty()) {
2470 using namespace ore;
2471
2472 ORE->emit([&]() {
2473 DebugLoc Loc;
2474 if (auto *SP = MF->getFunction().getSubprogram())
2475 Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
2476 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
2477 &MF->front());
2478 Stats.report(R);
2479 R << "generated in function";
2480 return R;
2481 });
2482 }
2483 }
2484
hasVirtRegAlloc()2485 bool RAGreedy::hasVirtRegAlloc() {
2486 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
2487 Register Reg = Register::index2VirtReg(I);
2488 if (MRI->reg_nodbg_empty(Reg))
2489 continue;
2490 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2491 if (!RC)
2492 continue;
2493 if (ShouldAllocateClass(*TRI, *RC))
2494 return true;
2495 }
2496
2497 return false;
2498 }
2499
runOnMachineFunction(MachineFunction & mf)2500 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
2501 LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2502 << "********** Function: " << mf.getName() << '\n');
2503
2504 MF = &mf;
2505 TII = MF->getSubtarget().getInstrInfo();
2506
2507 if (VerifyEnabled)
2508 MF->verify(this, "Before greedy register allocator");
2509
2510 RegAllocBase::init(getAnalysis<VirtRegMap>(),
2511 getAnalysis<LiveIntervals>(),
2512 getAnalysis<LiveRegMatrix>());
2513
2514 // Early return if there is no virtual register to be allocated to a
2515 // physical register.
2516 if (!hasVirtRegAlloc())
2517 return false;
2518
2519 Indexes = &getAnalysis<SlotIndexes>();
2520 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
2521 DomTree = &getAnalysis<MachineDominatorTree>();
2522 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
2523 Loops = &getAnalysis<MachineLoopInfo>();
2524 Bundles = &getAnalysis<EdgeBundles>();
2525 SpillPlacer = &getAnalysis<SpillPlacement>();
2526 DebugVars = &getAnalysis<LiveDebugVariables>();
2527
2528 initializeCSRCost();
2529
2530 RegCosts = TRI->getRegisterCosts(*MF);
2531 RegClassPriorityTrumpsGlobalness =
2532 GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2533 ? GreedyRegClassPriorityTrumpsGlobalness
2534 : TRI->regClassPriorityTrumpsGlobalness(*MF);
2535
2536 ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
2537 ? GreedyReverseLocalAssignment
2538 : TRI->reverseLocalAssignment();
2539
2540 ExtraInfo.emplace();
2541 EvictAdvisor =
2542 getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
2543
2544 VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2545 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
2546
2547 VRAI->calculateSpillWeightsAndHints();
2548
2549 LLVM_DEBUG(LIS->dump());
2550
2551 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2552 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
2553
2554 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2555 GlobalCand.resize(32); // This will grow as needed.
2556 SetOfBrokenHints.clear();
2557
2558 allocatePhysRegs();
2559 tryHintsRecoloring();
2560
2561 if (VerifyEnabled)
2562 MF->verify(this, "Before post optimization");
2563 postOptimization();
2564 reportStats();
2565
2566 releaseMemory();
2567 return true;
2568 }
2569