1 //===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the RAGreedy function pass for register allocation in
10 // optimized builds.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RegAllocGreedy.h"
15 #include "AllocationOrder.h"
16 #include "InterferenceCache.h"
17 #include "LiveDebugVariables.h"
18 #include "RegAllocBase.h"
19 #include "RegAllocEvictionAdvisor.h"
20 #include "SpillPlacement.h"
21 #include "SplitKit.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/IndexedMap.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/ADT/StringRef.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
33 #include "llvm/CodeGen/CalcSpillWeights.h"
34 #include "llvm/CodeGen/EdgeBundles.h"
35 #include "llvm/CodeGen/LiveInterval.h"
36 #include "llvm/CodeGen/LiveIntervalUnion.h"
37 #include "llvm/CodeGen/LiveIntervals.h"
38 #include "llvm/CodeGen/LiveRangeEdit.h"
39 #include "llvm/CodeGen/LiveRegMatrix.h"
40 #include "llvm/CodeGen/LiveStacks.h"
41 #include "llvm/CodeGen/MachineBasicBlock.h"
42 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
43 #include "llvm/CodeGen/MachineDominators.h"
44 #include "llvm/CodeGen/MachineFrameInfo.h"
45 #include "llvm/CodeGen/MachineFunction.h"
46 #include "llvm/CodeGen/MachineFunctionPass.h"
47 #include "llvm/CodeGen/MachineInstr.h"
48 #include "llvm/CodeGen/MachineLoopInfo.h"
49 #include "llvm/CodeGen/MachineOperand.h"
50 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
51 #include "llvm/CodeGen/MachineRegisterInfo.h"
52 #include "llvm/CodeGen/RegAllocRegistry.h"
53 #include "llvm/CodeGen/RegisterClassInfo.h"
54 #include "llvm/CodeGen/SlotIndexes.h"
55 #include "llvm/CodeGen/Spiller.h"
56 #include "llvm/CodeGen/TargetInstrInfo.h"
57 #include "llvm/CodeGen/TargetRegisterInfo.h"
58 #include "llvm/CodeGen/TargetSubtargetInfo.h"
59 #include "llvm/CodeGen/VirtRegMap.h"
60 #include "llvm/IR/DebugInfoMetadata.h"
61 #include "llvm/IR/Function.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/InitializePasses.h"
64 #include "llvm/MC/MCRegisterInfo.h"
65 #include "llvm/Pass.h"
66 #include "llvm/Support/BlockFrequency.h"
67 #include "llvm/Support/BranchProbability.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/Debug.h"
70 #include "llvm/Support/MathExtras.h"
71 #include "llvm/Support/Timer.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include <algorithm>
74 #include <cassert>
75 #include <cstdint>
76 #include <utility>
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "regalloc"
81 
82 STATISTIC(NumGlobalSplits, "Number of split global live ranges");
83 STATISTIC(NumLocalSplits,  "Number of split local live ranges");
84 STATISTIC(NumEvicted,      "Number of interferences evicted");
85 
86 static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
87     "split-spill-mode", cl::Hidden,
88     cl::desc("Spill mode for splitting live ranges"),
89     cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
90                clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
91                clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
92     cl::init(SplitEditor::SM_Speed));
93 
94 static cl::opt<unsigned>
95 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
96                              cl::desc("Last chance recoloring max depth"),
97                              cl::init(5));
98 
99 static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
100     "lcr-max-interf", cl::Hidden,
101     cl::desc("Last chance recoloring maximum number of considered"
102              " interference at a time"),
103     cl::init(8));
104 
105 static cl::opt<bool> ExhaustiveSearch(
106     "exhaustive-register-search", cl::NotHidden,
107     cl::desc("Exhaustive Search for registers bypassing the depth "
108              "and interference cutoffs of last chance recoloring"),
109     cl::Hidden);
110 
111 static cl::opt<bool> EnableDeferredSpilling(
112     "enable-deferred-spilling", cl::Hidden,
113     cl::desc("Instead of spilling a variable right away, defer the actual "
114              "code insertion to the end of the allocation. That way the "
115              "allocator might still find a suitable coloring for this "
116              "variable because of other evicted variables."),
117     cl::init(false));
118 
119 // FIXME: Find a good default for this flag and remove the flag.
120 static cl::opt<unsigned>
121 CSRFirstTimeCost("regalloc-csr-first-time-cost",
122               cl::desc("Cost for first time use of callee-saved register."),
123               cl::init(0), cl::Hidden);
124 
125 static cl::opt<unsigned long> GrowRegionComplexityBudget(
126     "grow-region-complexity-budget",
127     cl::desc("growRegion() does not scale with the number of BB edges, so "
128              "limit its budget and bail out once we reach the limit."),
129     cl::init(10000), cl::Hidden);
130 
131 static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness(
132     "greedy-regclass-priority-trumps-globalness",
133     cl::desc("Change the greedy register allocator's live range priority "
134              "calculation to make the AllocationPriority of the register class "
135              "more important then whether the range is global"),
136     cl::Hidden);
137 
138 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
139                                        createGreedyRegisterAllocator);
140 
141 char RAGreedy::ID = 0;
142 char &llvm::RAGreedyID = RAGreedy::ID;
143 
144 INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
145                 "Greedy Register Allocator", false, false)
146 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
147 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
148 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
149 INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
150 INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
151 INITIALIZE_PASS_DEPENDENCY(LiveStacks)
152 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
153 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
154 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
155 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
156 INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
157 INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
158 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
159 INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis)
160 INITIALIZE_PASS_END(RAGreedy, "greedy",
161                 "Greedy Register Allocator", false, false)
162 
163 #ifndef NDEBUG
164 const char *const RAGreedy::StageName[] = {
165     "RS_New",
166     "RS_Assign",
167     "RS_Split",
168     "RS_Split2",
169     "RS_Spill",
170     "RS_Memory",
171     "RS_Done"
172 };
173 #endif
174 
175 // Hysteresis to use when comparing floats.
176 // This helps stabilize decisions based on float comparisons.
177 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
178 
179 FunctionPass* llvm::createGreedyRegisterAllocator() {
180   return new RAGreedy();
181 }
182 
183 namespace llvm {
184 FunctionPass *createGreedyRegisterAllocator(RegClassFilterFunc Ftor);
185 }
186 
187 FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) {
188   return new RAGreedy(Ftor);
189 }
190 
191 RAGreedy::RAGreedy(RegClassFilterFunc F):
192   MachineFunctionPass(ID),
193   RegAllocBase(F) {
194 }
195 
196 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
197   AU.setPreservesCFG();
198   AU.addRequired<MachineBlockFrequencyInfo>();
199   AU.addPreserved<MachineBlockFrequencyInfo>();
200   AU.addRequired<AAResultsWrapperPass>();
201   AU.addPreserved<AAResultsWrapperPass>();
202   AU.addRequired<LiveIntervals>();
203   AU.addPreserved<LiveIntervals>();
204   AU.addRequired<SlotIndexes>();
205   AU.addPreserved<SlotIndexes>();
206   AU.addRequired<LiveDebugVariables>();
207   AU.addPreserved<LiveDebugVariables>();
208   AU.addRequired<LiveStacks>();
209   AU.addPreserved<LiveStacks>();
210   AU.addRequired<MachineDominatorTree>();
211   AU.addPreserved<MachineDominatorTree>();
212   AU.addRequired<MachineLoopInfo>();
213   AU.addPreserved<MachineLoopInfo>();
214   AU.addRequired<VirtRegMap>();
215   AU.addPreserved<VirtRegMap>();
216   AU.addRequired<LiveRegMatrix>();
217   AU.addPreserved<LiveRegMatrix>();
218   AU.addRequired<EdgeBundles>();
219   AU.addRequired<SpillPlacement>();
220   AU.addRequired<MachineOptimizationRemarkEmitterPass>();
221   AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
222   MachineFunctionPass::getAnalysisUsage(AU);
223 }
224 
225 //===----------------------------------------------------------------------===//
226 //                     LiveRangeEdit delegate methods
227 //===----------------------------------------------------------------------===//
228 
229 bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
230   LiveInterval &LI = LIS->getInterval(VirtReg);
231   if (VRM->hasPhys(VirtReg)) {
232     Matrix->unassign(LI);
233     aboutToRemoveInterval(LI);
234     return true;
235   }
236   // Unassigned virtreg is probably in the priority queue.
237   // RegAllocBase will erase it after dequeueing.
238   // Nonetheless, clear the live-range so that the debug
239   // dump will show the right state for that VirtReg.
240   LI.clear();
241   return false;
242 }
243 
244 void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
245   if (!VRM->hasPhys(VirtReg))
246     return;
247 
248   // Register is assigned, put it back on the queue for reassignment.
249   LiveInterval &LI = LIS->getInterval(VirtReg);
250   Matrix->unassign(LI);
251   RegAllocBase::enqueue(&LI);
252 }
253 
254 void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
255   ExtraInfo->LRE_DidCloneVirtReg(New, Old);
256 }
257 
258 void RAGreedy::ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) {
259   // Cloning a register we haven't even heard about yet?  Just ignore it.
260   if (!Info.inBounds(Old))
261     return;
262 
263   // LRE may clone a virtual register because dead code elimination causes it to
264   // be split into connected components. The new components are much smaller
265   // than the original, so they should get a new chance at being assigned.
266   // same stage as the parent.
267   Info[Old].Stage = RS_Assign;
268   Info.grow(New.id());
269   Info[New] = Info[Old];
270 }
271 
272 void RAGreedy::releaseMemory() {
273   SpillerInstance.reset();
274   GlobalCand.clear();
275 }
276 
277 void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
278 
279 void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
280   // Prioritize live ranges by size, assigning larger ranges first.
281   // The queue holds (size, reg) pairs.
282   const unsigned Size = LI->getSize();
283   const Register Reg = LI->reg();
284   assert(Reg.isVirtual() && "Can only enqueue virtual registers");
285   unsigned Prio;
286 
287   auto Stage = ExtraInfo->getOrInitStage(Reg);
288   if (Stage == RS_New) {
289     Stage = RS_Assign;
290     ExtraInfo->setStage(Reg, Stage);
291   }
292   if (Stage == RS_Split) {
293     // Unsplit ranges that couldn't be allocated immediately are deferred until
294     // everything else has been allocated.
295     Prio = Size;
296   } else if (Stage == RS_Memory) {
297     // Memory operand should be considered last.
298     // Change the priority such that Memory operand are assigned in
299     // the reverse order that they came in.
300     // TODO: Make this a member variable and probably do something about hints.
301     static unsigned MemOp = 0;
302     Prio = MemOp++;
303   } else {
304     // Giant live ranges fall back to the global assignment heuristic, which
305     // prevents excessive spilling in pathological cases.
306     bool ReverseLocal = TRI->reverseLocalAssignment();
307     const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
308     bool ForceGlobal =
309         !ReverseLocal && (Size / SlotIndex::InstrDist) >
310                              (2 * RegClassInfo.getNumAllocatableRegs(&RC));
311     unsigned GlobalBit = 0;
312 
313     if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
314         LIS->intervalIsInOneMBB(*LI)) {
315       // Allocate original local ranges in linear instruction order. Since they
316       // are singly defined, this produces optimal coloring in the absence of
317       // global interference and other constraints.
318       if (!ReverseLocal)
319         Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
320       else {
321         // Allocating bottom up may allow many short LRGs to be assigned first
322         // to one of the cheap registers. This could be much faster for very
323         // large blocks on targets with many physical registers.
324         Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
325       }
326     } else {
327       // Allocate global and split ranges in long->short order. Long ranges that
328       // don't fit should be spilled (or split) ASAP so they don't create
329       // interference.  Mark a bit to prioritize global above local ranges.
330       Prio = Size;
331       GlobalBit = 1;
332     }
333     if (RegClassPriorityTrumpsGlobalness)
334       Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
335     else
336       Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
337 
338     // Mark a higher bit to prioritize global and local above RS_Split.
339     Prio |= (1u << 31);
340 
341     // Boost ranges that have a physical register hint.
342     if (VRM->hasKnownPreference(Reg))
343       Prio |= (1u << 30);
344   }
345   // The virtual register number is a tie breaker for same-sized ranges.
346   // Give lower vreg numbers higher priority to assign them first.
347   CurQueue.push(std::make_pair(Prio, ~Reg));
348 }
349 
350 const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
351 
352 const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
353   if (CurQueue.empty())
354     return nullptr;
355   LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
356   CurQueue.pop();
357   return LI;
358 }
359 
360 //===----------------------------------------------------------------------===//
361 //                            Direct Assignment
362 //===----------------------------------------------------------------------===//
363 
364 /// tryAssign - Try to assign VirtReg to an available register.
365 MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
366                                AllocationOrder &Order,
367                                SmallVectorImpl<Register> &NewVRegs,
368                                const SmallVirtRegSet &FixedRegisters) {
369   MCRegister PhysReg;
370   for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
371     assert(*I);
372     if (!Matrix->checkInterference(VirtReg, *I)) {
373       if (I.isHint())
374         return *I;
375       else
376         PhysReg = *I;
377     }
378   }
379   if (!PhysReg.isValid())
380     return PhysReg;
381 
382   // PhysReg is available, but there may be a better choice.
383 
384   // If we missed a simple hint, try to cheaply evict interference from the
385   // preferred register.
386   if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
387     if (Order.isHint(Hint)) {
388       MCRegister PhysHint = Hint.asMCReg();
389       LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
390 
391       if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
392                                                  FixedRegisters)) {
393         evictInterference(VirtReg, PhysHint, NewVRegs);
394         return PhysHint;
395       }
396       // Record the missed hint, we may be able to recover
397       // at the end if the surrounding allocation changed.
398       SetOfBrokenHints.insert(&VirtReg);
399     }
400 
401   // Try to evict interference from a cheaper alternative.
402   uint8_t Cost = RegCosts[PhysReg];
403 
404   // Most registers have 0 additional cost.
405   if (!Cost)
406     return PhysReg;
407 
408   LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
409                     << (unsigned)Cost << '\n');
410   MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
411   return CheapReg ? CheapReg : PhysReg;
412 }
413 
414 //===----------------------------------------------------------------------===//
415 //                         Interference eviction
416 //===----------------------------------------------------------------------===//
417 
418 Register RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
419                                               Register PrevReg) const {
420   auto Order =
421       AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
422   MCRegister PhysReg;
423   for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
424     if ((*I).id() == PrevReg.id())
425       continue;
426 
427     MCRegUnitIterator Units(*I, TRI);
428     for (; Units.isValid(); ++Units) {
429       // Instantiate a "subquery", not to be confused with the Queries array.
430       LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
431       if (subQ.checkInterference())
432         break;
433     }
434     // If no units have interference, break out with the current PhysReg.
435     if (!Units.isValid())
436       PhysReg = *I;
437   }
438   if (PhysReg)
439     LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
440                       << printReg(PrevReg, TRI) << " to "
441                       << printReg(PhysReg, TRI) << '\n');
442   return PhysReg;
443 }
444 
445 /// evictInterference - Evict any interferring registers that prevent VirtReg
446 /// from being assigned to Physreg. This assumes that canEvictInterference
447 /// returned true.
448 void RAGreedy::evictInterference(const LiveInterval &VirtReg,
449                                  MCRegister PhysReg,
450                                  SmallVectorImpl<Register> &NewVRegs) {
451   // Make sure that VirtReg has a cascade number, and assign that cascade
452   // number to every evicted register. These live ranges than then only be
453   // evicted by a newer cascade, preventing infinite loops.
454   unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
455 
456   LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
457                     << " interference: Cascade " << Cascade << '\n');
458 
459   // Collect all interfering virtregs first.
460   SmallVector<const LiveInterval *, 8> Intfs;
461   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
462     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
463     // We usually have the interfering VRegs cached so collectInterferingVRegs()
464     // should be fast, we may need to recalculate if when different physregs
465     // overlap the same register unit so we had different SubRanges queried
466     // against it.
467     ArrayRef<const LiveInterval *> IVR = Q.interferingVRegs();
468     Intfs.append(IVR.begin(), IVR.end());
469   }
470 
471   // Evict them second. This will invalidate the queries.
472   for (const LiveInterval *Intf : Intfs) {
473     // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
474     if (!VRM->hasPhys(Intf->reg()))
475       continue;
476 
477     Matrix->unassign(*Intf);
478     assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
479             VirtReg.isSpillable() < Intf->isSpillable()) &&
480            "Cannot decrease cascade number, illegal eviction");
481     ExtraInfo->setCascade(Intf->reg(), Cascade);
482     ++NumEvicted;
483     NewVRegs.push_back(Intf->reg());
484   }
485 }
486 
487 /// Returns true if the given \p PhysReg is a callee saved register and has not
488 /// been used for allocation yet.
489 bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
490   MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
491   if (!CSR)
492     return false;
493 
494   return !Matrix->isPhysRegUsed(PhysReg);
495 }
496 
497 Optional<unsigned>
498 RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg,
499                                        const AllocationOrder &Order,
500                                        unsigned CostPerUseLimit) const {
501   unsigned OrderLimit = Order.getOrder().size();
502 
503   if (CostPerUseLimit < uint8_t(~0u)) {
504     // Check of any registers in RC are below CostPerUseLimit.
505     const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
506     uint8_t MinCost = RegClassInfo.getMinCost(RC);
507     if (MinCost >= CostPerUseLimit) {
508       LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
509                         << MinCost << ", no cheaper registers to be found.\n");
510       return None;
511     }
512 
513     // It is normal for register classes to have a long tail of registers with
514     // the same cost. We don't need to look at them if they're too expensive.
515     if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
516       OrderLimit = RegClassInfo.getLastCostChange(RC);
517       LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
518                         << " regs.\n");
519     }
520   }
521   return OrderLimit;
522 }
523 
524 bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
525                                                  MCRegister PhysReg) const {
526   if (RegCosts[PhysReg] >= CostPerUseLimit)
527     return false;
528   // The first use of a callee-saved register in a function has cost 1.
529   // Don't start using a CSR when the CostPerUseLimit is low.
530   if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
531     LLVM_DEBUG(
532         dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
533                << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
534                << '\n');
535     return false;
536   }
537   return true;
538 }
539 
540 /// tryEvict - Try to evict all interferences for a physreg.
541 /// @param  VirtReg Currently unassigned virtual register.
542 /// @param  Order   Physregs to try.
543 /// @return         Physreg to assign VirtReg, or 0.
544 MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
545                               AllocationOrder &Order,
546                               SmallVectorImpl<Register> &NewVRegs,
547                               uint8_t CostPerUseLimit,
548                               const SmallVirtRegSet &FixedRegisters) {
549   NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
550                      TimePassesIsEnabled);
551 
552   MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
553       VirtReg, Order, CostPerUseLimit, FixedRegisters);
554   if (BestPhys.isValid())
555     evictInterference(VirtReg, BestPhys, NewVRegs);
556   return BestPhys;
557 }
558 
559 //===----------------------------------------------------------------------===//
560 //                              Region Splitting
561 //===----------------------------------------------------------------------===//
562 
563 /// addSplitConstraints - Fill out the SplitConstraints vector based on the
564 /// interference pattern in Physreg and its aliases. Add the constraints to
565 /// SpillPlacement and return the static cost of this split in Cost, assuming
566 /// that all preferences in SplitConstraints are met.
567 /// Return false if there are no bundles with positive bias.
568 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
569                                    BlockFrequency &Cost) {
570   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
571 
572   // Reset interference dependent info.
573   SplitConstraints.resize(UseBlocks.size());
574   BlockFrequency StaticCost = 0;
575   for (unsigned I = 0; I != UseBlocks.size(); ++I) {
576     const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
577     SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
578 
579     BC.Number = BI.MBB->getNumber();
580     Intf.moveToBlock(BC.Number);
581     BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
582     BC.Exit = (BI.LiveOut &&
583                !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef())
584                   ? SpillPlacement::PrefReg
585                   : SpillPlacement::DontCare;
586     BC.ChangesValue = BI.FirstDef.isValid();
587 
588     if (!Intf.hasInterference())
589       continue;
590 
591     // Number of spill code instructions to insert.
592     unsigned Ins = 0;
593 
594     // Interference for the live-in value.
595     if (BI.LiveIn) {
596       if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
597         BC.Entry = SpillPlacement::MustSpill;
598         ++Ins;
599       } else if (Intf.first() < BI.FirstInstr) {
600         BC.Entry = SpillPlacement::PrefSpill;
601         ++Ins;
602       } else if (Intf.first() < BI.LastInstr) {
603         ++Ins;
604       }
605 
606       // Abort if the spill cannot be inserted at the MBB' start
607       if (((BC.Entry == SpillPlacement::MustSpill) ||
608            (BC.Entry == SpillPlacement::PrefSpill)) &&
609           SlotIndex::isEarlierInstr(BI.FirstInstr,
610                                     SA->getFirstSplitPoint(BC.Number)))
611         return false;
612     }
613 
614     // Interference for the live-out value.
615     if (BI.LiveOut) {
616       if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
617         BC.Exit = SpillPlacement::MustSpill;
618         ++Ins;
619       } else if (Intf.last() > BI.LastInstr) {
620         BC.Exit = SpillPlacement::PrefSpill;
621         ++Ins;
622       } else if (Intf.last() > BI.FirstInstr) {
623         ++Ins;
624       }
625     }
626 
627     // Accumulate the total frequency of inserted spill code.
628     while (Ins--)
629       StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
630   }
631   Cost = StaticCost;
632 
633   // Add constraints for use-blocks. Note that these are the only constraints
634   // that may add a positive bias, it is downhill from here.
635   SpillPlacer->addConstraints(SplitConstraints);
636   return SpillPlacer->scanActiveBundles();
637 }
638 
639 /// addThroughConstraints - Add constraints and links to SpillPlacer from the
640 /// live-through blocks in Blocks.
641 bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
642                                      ArrayRef<unsigned> Blocks) {
643   const unsigned GroupSize = 8;
644   SpillPlacement::BlockConstraint BCS[GroupSize];
645   unsigned TBS[GroupSize];
646   unsigned B = 0, T = 0;
647 
648   for (unsigned Number : Blocks) {
649     Intf.moveToBlock(Number);
650 
651     if (!Intf.hasInterference()) {
652       assert(T < GroupSize && "Array overflow");
653       TBS[T] = Number;
654       if (++T == GroupSize) {
655         SpillPlacer->addLinks(makeArrayRef(TBS, T));
656         T = 0;
657       }
658       continue;
659     }
660 
661     assert(B < GroupSize && "Array overflow");
662     BCS[B].Number = Number;
663 
664     // Abort if the spill cannot be inserted at the MBB' start
665     MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
666     auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
667     if (FirstNonDebugInstr != MBB->end() &&
668         SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
669                                   SA->getFirstSplitPoint(Number)))
670       return false;
671     // Interference for the live-in value.
672     if (Intf.first() <= Indexes->getMBBStartIdx(Number))
673       BCS[B].Entry = SpillPlacement::MustSpill;
674     else
675       BCS[B].Entry = SpillPlacement::PrefSpill;
676 
677     // Interference for the live-out value.
678     if (Intf.last() >= SA->getLastSplitPoint(Number))
679       BCS[B].Exit = SpillPlacement::MustSpill;
680     else
681       BCS[B].Exit = SpillPlacement::PrefSpill;
682 
683     if (++B == GroupSize) {
684       SpillPlacer->addConstraints(makeArrayRef(BCS, B));
685       B = 0;
686     }
687   }
688 
689   SpillPlacer->addConstraints(makeArrayRef(BCS, B));
690   SpillPlacer->addLinks(makeArrayRef(TBS, T));
691   return true;
692 }
693 
694 bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
695   // Keep track of through blocks that have not been added to SpillPlacer.
696   BitVector Todo = SA->getThroughBlocks();
697   SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
698   unsigned AddedTo = 0;
699 #ifndef NDEBUG
700   unsigned Visited = 0;
701 #endif
702 
703   unsigned long Budget = GrowRegionComplexityBudget;
704   while (true) {
705     ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
706     // Find new through blocks in the periphery of PrefRegBundles.
707     for (unsigned Bundle : NewBundles) {
708       // Look at all blocks connected to Bundle in the full graph.
709       ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
710       // Limit compilation time by bailing out after we use all our budget.
711       if (Blocks.size() >= Budget)
712         return false;
713       Budget -= Blocks.size();
714       for (unsigned Block : Blocks) {
715         if (!Todo.test(Block))
716           continue;
717         Todo.reset(Block);
718         // This is a new through block. Add it to SpillPlacer later.
719         ActiveBlocks.push_back(Block);
720 #ifndef NDEBUG
721         ++Visited;
722 #endif
723       }
724     }
725     // Any new blocks to add?
726     if (ActiveBlocks.size() == AddedTo)
727       break;
728 
729     // Compute through constraints from the interference, or assume that all
730     // through blocks prefer spilling when forming compact regions.
731     auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
732     if (Cand.PhysReg) {
733       if (!addThroughConstraints(Cand.Intf, NewBlocks))
734         return false;
735     } else
736       // Provide a strong negative bias on through blocks to prevent unwanted
737       // liveness on loop backedges.
738       SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
739     AddedTo = ActiveBlocks.size();
740 
741     // Perhaps iterating can enable more bundles?
742     SpillPlacer->iterate();
743   }
744   LLVM_DEBUG(dbgs() << ", v=" << Visited);
745   return true;
746 }
747 
748 /// calcCompactRegion - Compute the set of edge bundles that should be live
749 /// when splitting the current live range into compact regions.  Compact
750 /// regions can be computed without looking at interference.  They are the
751 /// regions formed by removing all the live-through blocks from the live range.
752 ///
753 /// Returns false if the current live range is already compact, or if the
754 /// compact regions would form single block regions anyway.
755 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
756   // Without any through blocks, the live range is already compact.
757   if (!SA->getNumThroughBlocks())
758     return false;
759 
760   // Compact regions don't correspond to any physreg.
761   Cand.reset(IntfCache, MCRegister::NoRegister);
762 
763   LLVM_DEBUG(dbgs() << "Compact region bundles");
764 
765   // Use the spill placer to determine the live bundles. GrowRegion pretends
766   // that all the through blocks have interference when PhysReg is unset.
767   SpillPlacer->prepare(Cand.LiveBundles);
768 
769   // The static split cost will be zero since Cand.Intf reports no interference.
770   BlockFrequency Cost;
771   if (!addSplitConstraints(Cand.Intf, Cost)) {
772     LLVM_DEBUG(dbgs() << ", none.\n");
773     return false;
774   }
775 
776   if (!growRegion(Cand)) {
777     LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
778     return false;
779   }
780 
781   SpillPlacer->finish();
782 
783   if (!Cand.LiveBundles.any()) {
784     LLVM_DEBUG(dbgs() << ", none.\n");
785     return false;
786   }
787 
788   LLVM_DEBUG({
789     for (int I : Cand.LiveBundles.set_bits())
790       dbgs() << " EB#" << I;
791     dbgs() << ".\n";
792   });
793   return true;
794 }
795 
796 /// calcSpillCost - Compute how expensive it would be to split the live range in
797 /// SA around all use blocks instead of forming bundle regions.
798 BlockFrequency RAGreedy::calcSpillCost() {
799   BlockFrequency Cost = 0;
800   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
801   for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
802     unsigned Number = BI.MBB->getNumber();
803     // We normally only need one spill instruction - a load or a store.
804     Cost += SpillPlacer->getBlockFrequency(Number);
805 
806     // Unless the value is redefined in the block.
807     if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
808       Cost += SpillPlacer->getBlockFrequency(Number);
809   }
810   return Cost;
811 }
812 
813 /// calcGlobalSplitCost - Return the global split cost of following the split
814 /// pattern in LiveBundles. This cost should be added to the local cost of the
815 /// interference pattern in SplitConstraints.
816 ///
817 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
818                                              const AllocationOrder &Order) {
819   BlockFrequency GlobalCost = 0;
820   const BitVector &LiveBundles = Cand.LiveBundles;
821   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
822   for (unsigned I = 0; I != UseBlocks.size(); ++I) {
823     const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
824     SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
825     bool RegIn  = LiveBundles[Bundles->getBundle(BC.Number, false)];
826     bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
827     unsigned Ins = 0;
828 
829     Cand.Intf.moveToBlock(BC.Number);
830 
831     if (BI.LiveIn)
832       Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
833     if (BI.LiveOut)
834       Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
835     while (Ins--)
836       GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
837   }
838 
839   for (unsigned Number : Cand.ActiveBlocks) {
840     bool RegIn  = LiveBundles[Bundles->getBundle(Number, false)];
841     bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
842     if (!RegIn && !RegOut)
843       continue;
844     if (RegIn && RegOut) {
845       // We need double spill code if this block has interference.
846       Cand.Intf.moveToBlock(Number);
847       if (Cand.Intf.hasInterference()) {
848         GlobalCost += SpillPlacer->getBlockFrequency(Number);
849         GlobalCost += SpillPlacer->getBlockFrequency(Number);
850       }
851       continue;
852     }
853     // live-in / stack-out or stack-in live-out.
854     GlobalCost += SpillPlacer->getBlockFrequency(Number);
855   }
856   return GlobalCost;
857 }
858 
859 /// splitAroundRegion - Split the current live range around the regions
860 /// determined by BundleCand and GlobalCand.
861 ///
862 /// Before calling this function, GlobalCand and BundleCand must be initialized
863 /// so each bundle is assigned to a valid candidate, or NoCand for the
864 /// stack-bound bundles.  The shared SA/SE SplitAnalysis and SplitEditor
865 /// objects must be initialized for the current live range, and intervals
866 /// created for the used candidates.
867 ///
868 /// @param LREdit    The LiveRangeEdit object handling the current split.
869 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value
870 ///                  must appear in this list.
871 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
872                                  ArrayRef<unsigned> UsedCands) {
873   // These are the intervals created for new global ranges. We may create more
874   // intervals for local ranges.
875   const unsigned NumGlobalIntvs = LREdit.size();
876   LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
877                     << " globals.\n");
878   assert(NumGlobalIntvs && "No global intervals configured");
879 
880   // Isolate even single instructions when dealing with a proper sub-class.
881   // That guarantees register class inflation for the stack interval because it
882   // is all copies.
883   Register Reg = SA->getParent().reg();
884   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
885 
886   // First handle all the blocks with uses.
887   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
888   for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
889     unsigned Number = BI.MBB->getNumber();
890     unsigned IntvIn = 0, IntvOut = 0;
891     SlotIndex IntfIn, IntfOut;
892     if (BI.LiveIn) {
893       unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
894       if (CandIn != NoCand) {
895         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
896         IntvIn = Cand.IntvIdx;
897         Cand.Intf.moveToBlock(Number);
898         IntfIn = Cand.Intf.first();
899       }
900     }
901     if (BI.LiveOut) {
902       unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
903       if (CandOut != NoCand) {
904         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
905         IntvOut = Cand.IntvIdx;
906         Cand.Intf.moveToBlock(Number);
907         IntfOut = Cand.Intf.last();
908       }
909     }
910 
911     // Create separate intervals for isolated blocks with multiple uses.
912     if (!IntvIn && !IntvOut) {
913       LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
914       if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
915         SE->splitSingleBlock(BI);
916       continue;
917     }
918 
919     if (IntvIn && IntvOut)
920       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
921     else if (IntvIn)
922       SE->splitRegInBlock(BI, IntvIn, IntfIn);
923     else
924       SE->splitRegOutBlock(BI, IntvOut, IntfOut);
925   }
926 
927   // Handle live-through blocks. The relevant live-through blocks are stored in
928   // the ActiveBlocks list with each candidate. We need to filter out
929   // duplicates.
930   BitVector Todo = SA->getThroughBlocks();
931   for (unsigned UsedCand : UsedCands) {
932     ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
933     for (unsigned Number : Blocks) {
934       if (!Todo.test(Number))
935         continue;
936       Todo.reset(Number);
937 
938       unsigned IntvIn = 0, IntvOut = 0;
939       SlotIndex IntfIn, IntfOut;
940 
941       unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
942       if (CandIn != NoCand) {
943         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
944         IntvIn = Cand.IntvIdx;
945         Cand.Intf.moveToBlock(Number);
946         IntfIn = Cand.Intf.first();
947       }
948 
949       unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
950       if (CandOut != NoCand) {
951         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
952         IntvOut = Cand.IntvIdx;
953         Cand.Intf.moveToBlock(Number);
954         IntfOut = Cand.Intf.last();
955       }
956       if (!IntvIn && !IntvOut)
957         continue;
958       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
959     }
960   }
961 
962   ++NumGlobalSplits;
963 
964   SmallVector<unsigned, 8> IntvMap;
965   SE->finish(&IntvMap);
966   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
967 
968   unsigned OrigBlocks = SA->getNumLiveBlocks();
969 
970   // Sort out the new intervals created by splitting. We get four kinds:
971   // - Remainder intervals should not be split again.
972   // - Candidate intervals can be assigned to Cand.PhysReg.
973   // - Block-local splits are candidates for local splitting.
974   // - DCE leftovers should go back on the queue.
975   for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
976     const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
977 
978     // Ignore old intervals from DCE.
979     if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
980       continue;
981 
982     // Remainder interval. Don't try splitting again, spill if it doesn't
983     // allocate.
984     if (IntvMap[I] == 0) {
985       ExtraInfo->setStage(Reg, RS_Spill);
986       continue;
987     }
988 
989     // Global intervals. Allow repeated splitting as long as the number of live
990     // blocks is strictly decreasing.
991     if (IntvMap[I] < NumGlobalIntvs) {
992       if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
993         LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
994                           << " blocks as original.\n");
995         // Don't allow repeated splitting as a safe guard against looping.
996         ExtraInfo->setStage(Reg, RS_Split2);
997       }
998       continue;
999     }
1000 
1001     // Other intervals are treated as new. This includes local intervals created
1002     // for blocks with multiple uses, and anything created by DCE.
1003   }
1004 
1005   if (VerifyEnabled)
1006     MF->verify(this, "After splitting live range around region");
1007 }
1008 
1009 MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
1010                                     AllocationOrder &Order,
1011                                     SmallVectorImpl<Register> &NewVRegs) {
1012   if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1013     return MCRegister::NoRegister;
1014   unsigned NumCands = 0;
1015   BlockFrequency SpillCost = calcSpillCost();
1016   BlockFrequency BestCost;
1017 
1018   // Check if we can split this live range around a compact region.
1019   bool HasCompact = calcCompactRegion(GlobalCand.front());
1020   if (HasCompact) {
1021     // Yes, keep GlobalCand[0] as the compact region candidate.
1022     NumCands = 1;
1023     BestCost = BlockFrequency::getMaxFrequency();
1024   } else {
1025     // No benefit from the compact region, our fallback will be per-block
1026     // splitting. Make sure we find a solution that is cheaper than spilling.
1027     BestCost = SpillCost;
1028     LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = ";
1029                MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
1030   }
1031 
1032   unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
1033                                                NumCands, false /*IgnoreCSR*/);
1034 
1035   // No solutions found, fall back to single block splitting.
1036   if (!HasCompact && BestCand == NoCand)
1037     return MCRegister::NoRegister;
1038 
1039   return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1040 }
1041 
1042 unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
1043                                             AllocationOrder &Order,
1044                                             BlockFrequency &BestCost,
1045                                             unsigned &NumCands,
1046                                             bool IgnoreCSR) {
1047   unsigned BestCand = NoCand;
1048   for (MCPhysReg PhysReg : Order) {
1049     assert(PhysReg);
1050     if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
1051       continue;
1052 
1053     // Discard bad candidates before we run out of interference cache cursors.
1054     // This will only affect register classes with a lot of registers (>32).
1055     if (NumCands == IntfCache.getMaxCursors()) {
1056       unsigned WorstCount = ~0u;
1057       unsigned Worst = 0;
1058       for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1059         if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1060           continue;
1061         unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1062         if (Count < WorstCount) {
1063           Worst = CandIndex;
1064           WorstCount = Count;
1065         }
1066       }
1067       --NumCands;
1068       GlobalCand[Worst] = GlobalCand[NumCands];
1069       if (BestCand == NumCands)
1070         BestCand = Worst;
1071     }
1072 
1073     if (GlobalCand.size() <= NumCands)
1074       GlobalCand.resize(NumCands+1);
1075     GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1076     Cand.reset(IntfCache, PhysReg);
1077 
1078     SpillPlacer->prepare(Cand.LiveBundles);
1079     BlockFrequency Cost;
1080     if (!addSplitConstraints(Cand.Intf, Cost)) {
1081       LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1082       continue;
1083     }
1084     LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
1085                MBFI->printBlockFreq(dbgs(), Cost));
1086     if (Cost >= BestCost) {
1087       LLVM_DEBUG({
1088         if (BestCand == NoCand)
1089           dbgs() << " worse than no bundles\n";
1090         else
1091           dbgs() << " worse than "
1092                  << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1093       });
1094       continue;
1095     }
1096     if (!growRegion(Cand)) {
1097       LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1098       continue;
1099     }
1100 
1101     SpillPlacer->finish();
1102 
1103     // No live bundles, defer to splitSingleBlocks().
1104     if (!Cand.LiveBundles.any()) {
1105       LLVM_DEBUG(dbgs() << " no bundles.\n");
1106       continue;
1107     }
1108 
1109     Cost += calcGlobalSplitCost(Cand, Order);
1110     LLVM_DEBUG({
1111       dbgs() << ", total = ";
1112       MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
1113       for (int I : Cand.LiveBundles.set_bits())
1114         dbgs() << " EB#" << I;
1115       dbgs() << ".\n";
1116     });
1117     if (Cost < BestCost) {
1118       BestCand = NumCands;
1119       BestCost = Cost;
1120     }
1121     ++NumCands;
1122   }
1123 
1124   return BestCand;
1125 }
1126 
1127 unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
1128                                  bool HasCompact,
1129                                  SmallVectorImpl<Register> &NewVRegs) {
1130   SmallVector<unsigned, 8> UsedCands;
1131   // Prepare split editor.
1132   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1133   SE->reset(LREdit, SplitSpillMode);
1134 
1135   // Assign all edge bundles to the preferred candidate, or NoCand.
1136   BundleCand.assign(Bundles->getNumBundles(), NoCand);
1137 
1138   // Assign bundles for the best candidate region.
1139   if (BestCand != NoCand) {
1140     GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1141     if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1142       UsedCands.push_back(BestCand);
1143       Cand.IntvIdx = SE->openIntv();
1144       LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1145                         << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1146       (void)B;
1147     }
1148   }
1149 
1150   // Assign bundles for the compact region.
1151   if (HasCompact) {
1152     GlobalSplitCandidate &Cand = GlobalCand.front();
1153     assert(!Cand.PhysReg && "Compact region has no physreg");
1154     if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1155       UsedCands.push_back(0);
1156       Cand.IntvIdx = SE->openIntv();
1157       LLVM_DEBUG(dbgs() << "Split for compact region in " << B
1158                         << " bundles, intv " << Cand.IntvIdx << ".\n");
1159       (void)B;
1160     }
1161   }
1162 
1163   splitAroundRegion(LREdit, UsedCands);
1164   return 0;
1165 }
1166 
1167 //===----------------------------------------------------------------------===//
1168 //                            Per-Block Splitting
1169 //===----------------------------------------------------------------------===//
1170 
1171 /// tryBlockSplit - Split a global live range around every block with uses. This
1172 /// creates a lot of local live ranges, that will be split by tryLocalSplit if
1173 /// they don't allocate.
1174 unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
1175                                  AllocationOrder &Order,
1176                                  SmallVectorImpl<Register> &NewVRegs) {
1177   assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1178   Register Reg = VirtReg.reg();
1179   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1180   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1181   SE->reset(LREdit, SplitSpillMode);
1182   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1183   for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1184     if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1185       SE->splitSingleBlock(BI);
1186   }
1187   // No blocks were split.
1188   if (LREdit.empty())
1189     return 0;
1190 
1191   // We did split for some blocks.
1192   SmallVector<unsigned, 8> IntvMap;
1193   SE->finish(&IntvMap);
1194 
1195   // Tell LiveDebugVariables about the new ranges.
1196   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1197 
1198   // Sort out the new intervals created by splitting. The remainder interval
1199   // goes straight to spilling, the new local ranges get to stay RS_New.
1200   for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1201     const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
1202     if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
1203       ExtraInfo->setStage(LI, RS_Spill);
1204   }
1205 
1206   if (VerifyEnabled)
1207     MF->verify(this, "After splitting live range around basic blocks");
1208   return 0;
1209 }
1210 
1211 //===----------------------------------------------------------------------===//
1212 //                         Per-Instruction Splitting
1213 //===----------------------------------------------------------------------===//
1214 
1215 /// Get the number of allocatable registers that match the constraints of \p Reg
1216 /// on \p MI and that are also in \p SuperRC.
1217 static unsigned getNumAllocatableRegsForConstraints(
1218     const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
1219     const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1220     const RegisterClassInfo &RCI) {
1221   assert(SuperRC && "Invalid register class");
1222 
1223   const TargetRegisterClass *ConstrainedRC =
1224       MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1225                                              /* ExploreBundle */ true);
1226   if (!ConstrainedRC)
1227     return 0;
1228   return RCI.getNumAllocatableRegs(ConstrainedRC);
1229 }
1230 
1231 /// tryInstructionSplit - Split a live range around individual instructions.
1232 /// This is normally not worthwhile since the spiller is doing essentially the
1233 /// same thing. However, when the live range is in a constrained register
1234 /// class, it may help to insert copies such that parts of the live range can
1235 /// be moved to a larger register class.
1236 ///
1237 /// This is similar to spilling to a larger register class.
1238 unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
1239                                        AllocationOrder &Order,
1240                                        SmallVectorImpl<Register> &NewVRegs) {
1241   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1242   // There is no point to this if there are no larger sub-classes.
1243   if (!RegClassInfo.isProperSubClass(CurRC))
1244     return 0;
1245 
1246   // Always enable split spill mode, since we're effectively spilling to a
1247   // register.
1248   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1249   SE->reset(LREdit, SplitEditor::SM_Size);
1250 
1251   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1252   if (Uses.size() <= 1)
1253     return 0;
1254 
1255   LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
1256                     << " individual instrs.\n");
1257 
1258   const TargetRegisterClass *SuperRC =
1259       TRI->getLargestLegalSuperClass(CurRC, *MF);
1260   unsigned SuperRCNumAllocatableRegs =
1261       RegClassInfo.getNumAllocatableRegs(SuperRC);
1262   // Split around every non-copy instruction if this split will relax
1263   // the constraints on the virtual register.
1264   // Otherwise, splitting just inserts uncoalescable copies that do not help
1265   // the allocation.
1266   for (const SlotIndex Use : Uses) {
1267     if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use))
1268       if (MI->isFullCopy() ||
1269           SuperRCNumAllocatableRegs ==
1270               getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
1271                                                   TII, TRI, RegClassInfo)) {
1272         LLVM_DEBUG(dbgs() << "    skip:\t" << Use << '\t' << *MI);
1273         continue;
1274       }
1275     SE->openIntv();
1276     SlotIndex SegStart = SE->enterIntvBefore(Use);
1277     SlotIndex SegStop = SE->leaveIntvAfter(Use);
1278     SE->useIntv(SegStart, SegStop);
1279   }
1280 
1281   if (LREdit.empty()) {
1282     LLVM_DEBUG(dbgs() << "All uses were copies.\n");
1283     return 0;
1284   }
1285 
1286   SmallVector<unsigned, 8> IntvMap;
1287   SE->finish(&IntvMap);
1288   DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1289   // Assign all new registers to RS_Spill. This was the last chance.
1290   ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1291   return 0;
1292 }
1293 
1294 //===----------------------------------------------------------------------===//
1295 //                             Local Splitting
1296 //===----------------------------------------------------------------------===//
1297 
1298 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1299 /// in order to use PhysReg between two entries in SA->UseSlots.
1300 ///
1301 /// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
1302 ///
1303 void RAGreedy::calcGapWeights(MCRegister PhysReg,
1304                               SmallVectorImpl<float> &GapWeight) {
1305   assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1306   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1307   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1308   const unsigned NumGaps = Uses.size()-1;
1309 
1310   // Start and end points for the interference check.
1311   SlotIndex StartIdx =
1312     BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
1313   SlotIndex StopIdx =
1314     BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
1315 
1316   GapWeight.assign(NumGaps, 0.0f);
1317 
1318   // Add interference from each overlapping register.
1319   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1320     if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
1321           .checkInterference())
1322       continue;
1323 
1324     // We know that VirtReg is a continuous interval from FirstInstr to
1325     // LastInstr, so we don't need InterferenceQuery.
1326     //
1327     // Interference that overlaps an instruction is counted in both gaps
1328     // surrounding the instruction. The exception is interference before
1329     // StartIdx and after StopIdx.
1330     //
1331     LiveIntervalUnion::SegmentIter IntI =
1332       Matrix->getLiveUnions()[*Units] .find(StartIdx);
1333     for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1334       // Skip the gaps before IntI.
1335       while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1336         if (++Gap == NumGaps)
1337           break;
1338       if (Gap == NumGaps)
1339         break;
1340 
1341       // Update the gaps covered by IntI.
1342       const float weight = IntI.value()->weight();
1343       for (; Gap != NumGaps; ++Gap) {
1344         GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1345         if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1346           break;
1347       }
1348       if (Gap == NumGaps)
1349         break;
1350     }
1351   }
1352 
1353   // Add fixed interference.
1354   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1355     const LiveRange &LR = LIS->getRegUnit(*Units);
1356     LiveRange::const_iterator I = LR.find(StartIdx);
1357     LiveRange::const_iterator E = LR.end();
1358 
1359     // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1360     for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1361       while (Uses[Gap+1].getBoundaryIndex() < I->start)
1362         if (++Gap == NumGaps)
1363           break;
1364       if (Gap == NumGaps)
1365         break;
1366 
1367       for (; Gap != NumGaps; ++Gap) {
1368         GapWeight[Gap] = huge_valf;
1369         if (Uses[Gap+1].getBaseIndex() >= I->end)
1370           break;
1371       }
1372       if (Gap == NumGaps)
1373         break;
1374     }
1375   }
1376 }
1377 
1378 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1379 /// basic block.
1380 ///
1381 unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
1382                                  AllocationOrder &Order,
1383                                  SmallVectorImpl<Register> &NewVRegs) {
1384   // TODO: the function currently only handles a single UseBlock; it should be
1385   // possible to generalize.
1386   if (SA->getUseBlocks().size() != 1)
1387     return 0;
1388 
1389   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1390 
1391   // Note that it is possible to have an interval that is live-in or live-out
1392   // while only covering a single block - A phi-def can use undef values from
1393   // predecessors, and the block could be a single-block loop.
1394   // We don't bother doing anything clever about such a case, we simply assume
1395   // that the interval is continuous from FirstInstr to LastInstr. We should
1396   // make sure that we don't do anything illegal to such an interval, though.
1397 
1398   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1399   if (Uses.size() <= 2)
1400     return 0;
1401   const unsigned NumGaps = Uses.size()-1;
1402 
1403   LLVM_DEBUG({
1404     dbgs() << "tryLocalSplit: ";
1405     for (const auto &Use : Uses)
1406       dbgs() << ' ' << Use;
1407     dbgs() << '\n';
1408   });
1409 
1410   // If VirtReg is live across any register mask operands, compute a list of
1411   // gaps with register masks.
1412   SmallVector<unsigned, 8> RegMaskGaps;
1413   if (Matrix->checkRegMaskInterference(VirtReg)) {
1414     // Get regmask slots for the whole block.
1415     ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
1416     LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1417     // Constrain to VirtReg's live range.
1418     unsigned RI =
1419         llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
1420     unsigned RE = RMS.size();
1421     for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
1422       // Look for Uses[I] <= RMS <= Uses[I + 1].
1423       assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I]));
1424       if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
1425         continue;
1426       // Skip a regmask on the same instruction as the last use. It doesn't
1427       // overlap the live range.
1428       if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
1429         break;
1430       LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
1431                         << Uses[I + 1]);
1432       RegMaskGaps.push_back(I);
1433       // Advance ri to the next gap. A regmask on one of the uses counts in
1434       // both gaps.
1435       while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
1436         ++RI;
1437     }
1438     LLVM_DEBUG(dbgs() << '\n');
1439   }
1440 
1441   // Since we allow local split results to be split again, there is a risk of
1442   // creating infinite loops. It is tempting to require that the new live
1443   // ranges have less instructions than the original. That would guarantee
1444   // convergence, but it is too strict. A live range with 3 instructions can be
1445   // split 2+3 (including the COPY), and we want to allow that.
1446   //
1447   // Instead we use these rules:
1448   //
1449   // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1450   //    noop split, of course).
1451   // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1452   //    the new ranges must have fewer instructions than before the split.
1453   // 3. New ranges with the same number of instructions are marked RS_Split2,
1454   //    smaller ranges are marked RS_New.
1455   //
1456   // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1457   // excessive splitting and infinite loops.
1458   //
1459   bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
1460 
1461   // Best split candidate.
1462   unsigned BestBefore = NumGaps;
1463   unsigned BestAfter = 0;
1464   float BestDiff = 0;
1465 
1466   const float blockFreq =
1467     SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1468     (1.0f / MBFI->getEntryFreq());
1469   SmallVector<float, 8> GapWeight;
1470 
1471   for (MCPhysReg PhysReg : Order) {
1472     assert(PhysReg);
1473     // Keep track of the largest spill weight that would need to be evicted in
1474     // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
1475     calcGapWeights(PhysReg, GapWeight);
1476 
1477     // Remove any gaps with regmask clobbers.
1478     if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1479       for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
1480         GapWeight[RegMaskGaps[I]] = huge_valf;
1481 
1482     // Try to find the best sequence of gaps to close.
1483     // The new spill weight must be larger than any gap interference.
1484 
1485     // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1486     unsigned SplitBefore = 0, SplitAfter = 1;
1487 
1488     // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1489     // It is the spill weight that needs to be evicted.
1490     float MaxGap = GapWeight[0];
1491 
1492     while (true) {
1493       // Live before/after split?
1494       const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1495       const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1496 
1497       LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
1498                         << '-' << Uses[SplitAfter] << " I=" << MaxGap);
1499 
1500       // Stop before the interval gets so big we wouldn't be making progress.
1501       if (!LiveBefore && !LiveAfter) {
1502         LLVM_DEBUG(dbgs() << " all\n");
1503         break;
1504       }
1505       // Should the interval be extended or shrunk?
1506       bool Shrink = true;
1507 
1508       // How many gaps would the new range have?
1509       unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1510 
1511       // Legally, without causing looping?
1512       bool Legal = !ProgressRequired || NewGaps < NumGaps;
1513 
1514       if (Legal && MaxGap < huge_valf) {
1515         // Estimate the new spill weight. Each instruction reads or writes the
1516         // register. Conservatively assume there are no read-modify-write
1517         // instructions.
1518         //
1519         // Try to guess the size of the new interval.
1520         const float EstWeight = normalizeSpillWeight(
1521             blockFreq * (NewGaps + 1),
1522             Uses[SplitBefore].distance(Uses[SplitAfter]) +
1523                 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1524             1);
1525         // Would this split be possible to allocate?
1526         // Never allocate all gaps, we wouldn't be making progress.
1527         LLVM_DEBUG(dbgs() << " w=" << EstWeight);
1528         if (EstWeight * Hysteresis >= MaxGap) {
1529           Shrink = false;
1530           float Diff = EstWeight - MaxGap;
1531           if (Diff > BestDiff) {
1532             LLVM_DEBUG(dbgs() << " (best)");
1533             BestDiff = Hysteresis * Diff;
1534             BestBefore = SplitBefore;
1535             BestAfter = SplitAfter;
1536           }
1537         }
1538       }
1539 
1540       // Try to shrink.
1541       if (Shrink) {
1542         if (++SplitBefore < SplitAfter) {
1543           LLVM_DEBUG(dbgs() << " shrink\n");
1544           // Recompute the max when necessary.
1545           if (GapWeight[SplitBefore - 1] >= MaxGap) {
1546             MaxGap = GapWeight[SplitBefore];
1547             for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
1548               MaxGap = std::max(MaxGap, GapWeight[I]);
1549           }
1550           continue;
1551         }
1552         MaxGap = 0;
1553       }
1554 
1555       // Try to extend the interval.
1556       if (SplitAfter >= NumGaps) {
1557         LLVM_DEBUG(dbgs() << " end\n");
1558         break;
1559       }
1560 
1561       LLVM_DEBUG(dbgs() << " extend\n");
1562       MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1563     }
1564   }
1565 
1566   // Didn't find any candidates?
1567   if (BestBefore == NumGaps)
1568     return 0;
1569 
1570   LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
1571                     << Uses[BestAfter] << ", " << BestDiff << ", "
1572                     << (BestAfter - BestBefore + 1) << " instrs\n");
1573 
1574   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1575   SE->reset(LREdit);
1576 
1577   SE->openIntv();
1578   SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1579   SlotIndex SegStop  = SE->leaveIntvAfter(Uses[BestAfter]);
1580   SE->useIntv(SegStart, SegStop);
1581   SmallVector<unsigned, 8> IntvMap;
1582   SE->finish(&IntvMap);
1583   DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1584   // If the new range has the same number of instructions as before, mark it as
1585   // RS_Split2 so the next split will be forced to make progress. Otherwise,
1586   // leave the new intervals as RS_New so they can compete.
1587   bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1588   bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1589   unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1590   if (NewGaps >= NumGaps) {
1591     LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
1592     assert(!ProgressRequired && "Didn't make progress when it was required.");
1593     for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
1594       if (IntvMap[I] == 1) {
1595         ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
1596         LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
1597       }
1598     LLVM_DEBUG(dbgs() << '\n');
1599   }
1600   ++NumLocalSplits;
1601 
1602   return 0;
1603 }
1604 
1605 //===----------------------------------------------------------------------===//
1606 //                          Live Range Splitting
1607 //===----------------------------------------------------------------------===//
1608 
1609 /// trySplit - Try to split VirtReg or one of its interferences, making it
1610 /// assignable.
1611 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
1612 unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
1613                             SmallVectorImpl<Register> &NewVRegs,
1614                             const SmallVirtRegSet &FixedRegisters) {
1615   // Ranges must be Split2 or less.
1616   if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
1617     return 0;
1618 
1619   // Local intervals are handled separately.
1620   if (LIS->intervalIsInOneMBB(VirtReg)) {
1621     NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
1622                        TimerGroupDescription, TimePassesIsEnabled);
1623     SA->analyze(&VirtReg);
1624     Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1625     if (PhysReg || !NewVRegs.empty())
1626       return PhysReg;
1627     return tryInstructionSplit(VirtReg, Order, NewVRegs);
1628   }
1629 
1630   NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
1631                      TimerGroupDescription, TimePassesIsEnabled);
1632 
1633   SA->analyze(&VirtReg);
1634 
1635   // First try to split around a region spanning multiple blocks. RS_Split2
1636   // ranges already made dubious progress with region splitting, so they go
1637   // straight to single block splitting.
1638   if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
1639     MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1640     if (PhysReg || !NewVRegs.empty())
1641       return PhysReg;
1642   }
1643 
1644   // Then isolate blocks.
1645   return tryBlockSplit(VirtReg, Order, NewVRegs);
1646 }
1647 
1648 //===----------------------------------------------------------------------===//
1649 //                          Last Chance Recoloring
1650 //===----------------------------------------------------------------------===//
1651 
1652 /// Return true if \p reg has any tied def operand.
1653 static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
1654   for (const MachineOperand &MO : MRI->def_operands(reg))
1655     if (MO.isTied())
1656       return true;
1657 
1658   return false;
1659 }
1660 
1661 /// Return true if the existing assignment of \p Intf overlaps, but is not the
1662 /// same, as \p PhysReg.
1663 static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
1664                                          const VirtRegMap &VRM,
1665                                          MCRegister PhysReg,
1666                                          const LiveInterval &Intf) {
1667   MCRegister AssignedReg = VRM.getPhys(Intf.reg());
1668   if (PhysReg == AssignedReg)
1669     return false;
1670   return TRI.regsOverlap(PhysReg, AssignedReg);
1671 }
1672 
1673 /// mayRecolorAllInterferences - Check if the virtual registers that
1674 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
1675 /// recolored to free \p PhysReg.
1676 /// When true is returned, \p RecoloringCandidates has been augmented with all
1677 /// the live intervals that need to be recolored in order to free \p PhysReg
1678 /// for \p VirtReg.
1679 /// \p FixedRegisters contains all the virtual registers that cannot be
1680 /// recolored.
1681 bool RAGreedy::mayRecolorAllInterferences(
1682     MCRegister PhysReg, const LiveInterval &VirtReg,
1683     SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
1684   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1685 
1686   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1687     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
1688     // If there is LastChanceRecoloringMaxInterference or more interferences,
1689     // chances are one would not be recolorable.
1690     if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
1691             LastChanceRecoloringMaxInterference &&
1692         !ExhaustiveSearch) {
1693       LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
1694       CutOffInfo |= CO_Interf;
1695       return false;
1696     }
1697     for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
1698       // If Intf is done and sits on the same register class as VirtReg, it
1699       // would not be recolorable as it is in the same state as
1700       // VirtReg. However there are at least two exceptions.
1701       //
1702       // If VirtReg has tied defs and Intf doesn't, then
1703       // there is still a point in examining if it can be recolorable.
1704       //
1705       // Additionally, if the register class has overlapping tuple members, it
1706       // may still be recolorable using a different tuple. This is more likely
1707       // if the existing assignment aliases with the candidate.
1708       //
1709       if (((ExtraInfo->getStage(*Intf) == RS_Done &&
1710             MRI->getRegClass(Intf->reg()) == CurRC &&
1711             !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
1712            !(hasTiedDef(MRI, VirtReg.reg()) &&
1713              !hasTiedDef(MRI, Intf->reg()))) ||
1714           FixedRegisters.count(Intf->reg())) {
1715         LLVM_DEBUG(
1716             dbgs() << "Early abort: the interference is not recolorable.\n");
1717         return false;
1718       }
1719       RecoloringCandidates.insert(Intf);
1720     }
1721   }
1722   return true;
1723 }
1724 
1725 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
1726 /// its interferences.
1727 /// Last chance recoloring chooses a color for \p VirtReg and recolors every
1728 /// virtual register that was using it. The recoloring process may recursively
1729 /// use the last chance recoloring. Therefore, when a virtual register has been
1730 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
1731 /// be last-chance-recolored again during this recoloring "session".
1732 /// E.g.,
1733 /// Let
1734 /// vA can use {R1, R2    }
1735 /// vB can use {    R2, R3}
1736 /// vC can use {R1        }
1737 /// Where vA, vB, and vC cannot be split anymore (they are reloads for
1738 /// instance) and they all interfere.
1739 ///
1740 /// vA is assigned R1
1741 /// vB is assigned R2
1742 /// vC tries to evict vA but vA is already done.
1743 /// Regular register allocation fails.
1744 ///
1745 /// Last chance recoloring kicks in:
1746 /// vC does as if vA was evicted => vC uses R1.
1747 /// vC is marked as fixed.
1748 /// vA needs to find a color.
1749 /// None are available.
1750 /// vA cannot evict vC: vC is a fixed virtual register now.
1751 /// vA does as if vB was evicted => vA uses R2.
1752 /// vB needs to find a color.
1753 /// R3 is available.
1754 /// Recoloring => vC = R1, vA = R2, vB = R3
1755 ///
1756 /// \p Order defines the preferred allocation order for \p VirtReg.
1757 /// \p NewRegs will contain any new virtual register that have been created
1758 /// (split, spill) during the process and that must be assigned.
1759 /// \p FixedRegisters contains all the virtual registers that cannot be
1760 /// recolored.
1761 ///
1762 /// \p RecolorStack tracks the original assignments of successfully recolored
1763 /// registers.
1764 ///
1765 /// \p Depth gives the current depth of the last chance recoloring.
1766 /// \return a physical register that can be used for VirtReg or ~0u if none
1767 /// exists.
1768 unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
1769                                            AllocationOrder &Order,
1770                                            SmallVectorImpl<Register> &NewVRegs,
1771                                            SmallVirtRegSet &FixedRegisters,
1772                                            RecoloringStack &RecolorStack,
1773                                            unsigned Depth) {
1774   if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
1775     return ~0u;
1776 
1777   LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
1778 
1779   const ssize_t EntryStackSize = RecolorStack.size();
1780 
1781   // Ranges must be Done.
1782   assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
1783          "Last chance recoloring should really be last chance");
1784   // Set the max depth to LastChanceRecoloringMaxDepth.
1785   // We may want to reconsider that if we end up with a too large search space
1786   // for target with hundreds of registers.
1787   // Indeed, in that case we may want to cut the search space earlier.
1788   if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
1789     LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
1790     CutOffInfo |= CO_Depth;
1791     return ~0u;
1792   }
1793 
1794   // Set of Live intervals that will need to be recolored.
1795   SmallLISet RecoloringCandidates;
1796 
1797   // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
1798   // this recoloring "session".
1799   assert(!FixedRegisters.count(VirtReg.reg()));
1800   FixedRegisters.insert(VirtReg.reg());
1801   SmallVector<Register, 4> CurrentNewVRegs;
1802 
1803   for (MCRegister PhysReg : Order) {
1804     assert(PhysReg.isValid());
1805     LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
1806                       << printReg(PhysReg, TRI) << '\n');
1807     RecoloringCandidates.clear();
1808     CurrentNewVRegs.clear();
1809 
1810     // It is only possible to recolor virtual register interference.
1811     if (Matrix->checkInterference(VirtReg, PhysReg) >
1812         LiveRegMatrix::IK_VirtReg) {
1813       LLVM_DEBUG(
1814           dbgs() << "Some interferences are not with virtual registers.\n");
1815 
1816       continue;
1817     }
1818 
1819     // Early give up on this PhysReg if it is obvious we cannot recolor all
1820     // the interferences.
1821     if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
1822                                     FixedRegisters)) {
1823       LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
1824       continue;
1825     }
1826 
1827     // RecoloringCandidates contains all the virtual registers that interfere
1828     // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
1829     // recoloring and perform the actual recoloring.
1830     PQueue RecoloringQueue;
1831     for (const LiveInterval *RC : RecoloringCandidates) {
1832       Register ItVirtReg = RC->reg();
1833       enqueue(RecoloringQueue, RC);
1834       assert(VRM->hasPhys(ItVirtReg) &&
1835              "Interferences are supposed to be with allocated variables");
1836 
1837       // Record the current allocation.
1838       RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
1839 
1840       // unset the related struct.
1841       Matrix->unassign(*RC);
1842     }
1843 
1844     // Do as if VirtReg was assigned to PhysReg so that the underlying
1845     // recoloring has the right information about the interferes and
1846     // available colors.
1847     Matrix->assign(VirtReg, PhysReg);
1848 
1849     // Save the current recoloring state.
1850     // If we cannot recolor all the interferences, we will have to start again
1851     // at this point for the next physical register.
1852     SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
1853     if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
1854                                 FixedRegisters, RecolorStack, Depth)) {
1855       // Push the queued vregs into the main queue.
1856       for (Register NewVReg : CurrentNewVRegs)
1857         NewVRegs.push_back(NewVReg);
1858       // Do not mess up with the global assignment process.
1859       // I.e., VirtReg must be unassigned.
1860       Matrix->unassign(VirtReg);
1861       return PhysReg;
1862     }
1863 
1864     LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
1865                       << printReg(PhysReg, TRI) << '\n');
1866 
1867     // The recoloring attempt failed, undo the changes.
1868     FixedRegisters = SaveFixedRegisters;
1869     Matrix->unassign(VirtReg);
1870 
1871     // For a newly created vreg which is also in RecoloringCandidates,
1872     // don't add it to NewVRegs because its physical register will be restored
1873     // below. Other vregs in CurrentNewVRegs are created by calling
1874     // selectOrSplit and should be added into NewVRegs.
1875     for (Register &R : CurrentNewVRegs) {
1876       if (RecoloringCandidates.count(&LIS->getInterval(R)))
1877         continue;
1878       NewVRegs.push_back(R);
1879     }
1880 
1881     // Roll back our unsuccessful recoloring. Also roll back any successful
1882     // recolorings in any recursive recoloring attempts, since it's possible
1883     // they would have introduced conflicts with assignments we will be
1884     // restoring further up the stack. Perform all unassignments prior to
1885     // reassigning, since sub-recolorings may have conflicted with the registers
1886     // we are going to restore to their original assignments.
1887     for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
1888       const LiveInterval *LI;
1889       MCRegister PhysReg;
1890       std::tie(LI, PhysReg) = RecolorStack[I];
1891 
1892       if (VRM->hasPhys(LI->reg()))
1893         Matrix->unassign(*LI);
1894     }
1895 
1896     for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
1897       const LiveInterval *LI;
1898       MCRegister PhysReg;
1899       std::tie(LI, PhysReg) = RecolorStack[I];
1900       if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
1901         Matrix->assign(*LI, PhysReg);
1902     }
1903 
1904     // Pop the stack of recoloring attempts.
1905     RecolorStack.resize(EntryStackSize);
1906   }
1907 
1908   // Last chance recoloring did not worked either, give up.
1909   return ~0u;
1910 }
1911 
1912 /// tryRecoloringCandidates - Try to assign a new color to every register
1913 /// in \RecoloringQueue.
1914 /// \p NewRegs will contain any new virtual register created during the
1915 /// recoloring process.
1916 /// \p FixedRegisters[in/out] contains all the registers that have been
1917 /// recolored.
1918 /// \return true if all virtual registers in RecoloringQueue were successfully
1919 /// recolored, false otherwise.
1920 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
1921                                        SmallVectorImpl<Register> &NewVRegs,
1922                                        SmallVirtRegSet &FixedRegisters,
1923                                        RecoloringStack &RecolorStack,
1924                                        unsigned Depth) {
1925   while (!RecoloringQueue.empty()) {
1926     const LiveInterval *LI = dequeue(RecoloringQueue);
1927     LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
1928     MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
1929                                            RecolorStack, Depth + 1);
1930     // When splitting happens, the live-range may actually be empty.
1931     // In that case, this is okay to continue the recoloring even
1932     // if we did not find an alternative color for it. Indeed,
1933     // there will not be anything to color for LI in the end.
1934     if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
1935       return false;
1936 
1937     if (!PhysReg) {
1938       assert(LI->empty() && "Only empty live-range do not require a register");
1939       LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
1940                         << " succeeded. Empty LI.\n");
1941       continue;
1942     }
1943     LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
1944                       << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
1945 
1946     Matrix->assign(*LI, PhysReg);
1947     FixedRegisters.insert(LI->reg());
1948   }
1949   return true;
1950 }
1951 
1952 //===----------------------------------------------------------------------===//
1953 //                            Main Entry Point
1954 //===----------------------------------------------------------------------===//
1955 
1956 MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg,
1957                                    SmallVectorImpl<Register> &NewVRegs) {
1958   CutOffInfo = CO_None;
1959   LLVMContext &Ctx = MF->getFunction().getContext();
1960   SmallVirtRegSet FixedRegisters;
1961   RecoloringStack RecolorStack;
1962   MCRegister Reg =
1963       selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
1964   if (Reg == ~0U && (CutOffInfo != CO_None)) {
1965     uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
1966     if (CutOffEncountered == CO_Depth)
1967       Ctx.emitError("register allocation failed: maximum depth for recoloring "
1968                     "reached. Use -fexhaustive-register-search to skip "
1969                     "cutoffs");
1970     else if (CutOffEncountered == CO_Interf)
1971       Ctx.emitError("register allocation failed: maximum interference for "
1972                     "recoloring reached. Use -fexhaustive-register-search "
1973                     "to skip cutoffs");
1974     else if (CutOffEncountered == (CO_Depth | CO_Interf))
1975       Ctx.emitError("register allocation failed: maximum interference and "
1976                     "depth for recoloring reached. Use "
1977                     "-fexhaustive-register-search to skip cutoffs");
1978   }
1979   return Reg;
1980 }
1981 
1982 /// Using a CSR for the first time has a cost because it causes push|pop
1983 /// to be added to prologue|epilogue. Splitting a cold section of the live
1984 /// range can have lower cost than using the CSR for the first time;
1985 /// Spilling a live range in the cold path can have lower cost than using
1986 /// the CSR for the first time. Returns the physical register if we decide
1987 /// to use the CSR; otherwise return 0.
1988 MCRegister RAGreedy::tryAssignCSRFirstTime(
1989     const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
1990     uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
1991   if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
1992     // We choose spill over using the CSR for the first time if the spill cost
1993     // is lower than CSRCost.
1994     SA->analyze(&VirtReg);
1995     if (calcSpillCost() >= CSRCost)
1996       return PhysReg;
1997 
1998     // We are going to spill, set CostPerUseLimit to 1 to make sure that
1999     // we will not use a callee-saved register in tryEvict.
2000     CostPerUseLimit = 1;
2001     return 0;
2002   }
2003   if (ExtraInfo->getStage(VirtReg) < RS_Split) {
2004     // We choose pre-splitting over using the CSR for the first time if
2005     // the cost of splitting is lower than CSRCost.
2006     SA->analyze(&VirtReg);
2007     unsigned NumCands = 0;
2008     BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2009     unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2010                                                  NumCands, true /*IgnoreCSR*/);
2011     if (BestCand == NoCand)
2012       // Use the CSR if we can't find a region split below CSRCost.
2013       return PhysReg;
2014 
2015     // Perform the actual pre-splitting.
2016     doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2017     return 0;
2018   }
2019   return PhysReg;
2020 }
2021 
2022 void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) {
2023   // Do not keep invalid information around.
2024   SetOfBrokenHints.remove(&LI);
2025 }
2026 
2027 void RAGreedy::initializeCSRCost() {
2028   // We use the larger one out of the command-line option and the value report
2029   // by TRI.
2030   CSRCost = BlockFrequency(
2031       std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2032   if (!CSRCost.getFrequency())
2033     return;
2034 
2035   // Raw cost is relative to Entry == 2^14; scale it appropriately.
2036   uint64_t ActualEntry = MBFI->getEntryFreq();
2037   if (!ActualEntry) {
2038     CSRCost = 0;
2039     return;
2040   }
2041   uint64_t FixedEntry = 1 << 14;
2042   if (ActualEntry < FixedEntry)
2043     CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2044   else if (ActualEntry <= UINT32_MAX)
2045     // Invert the fraction and divide.
2046     CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2047   else
2048     // Can't use BranchProbability in general, since it takes 32-bit numbers.
2049     CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
2050 }
2051 
2052 /// Collect the hint info for \p Reg.
2053 /// The results are stored into \p Out.
2054 /// \p Out is not cleared before being populated.
2055 void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2056   for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2057     if (!Instr.isFullCopy())
2058       continue;
2059     // Look for the other end of the copy.
2060     Register OtherReg = Instr.getOperand(0).getReg();
2061     if (OtherReg == Reg) {
2062       OtherReg = Instr.getOperand(1).getReg();
2063       if (OtherReg == Reg)
2064         continue;
2065     }
2066     // Get the current assignment.
2067     MCRegister OtherPhysReg =
2068         OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
2069     // Push the collected information.
2070     Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2071                            OtherPhysReg));
2072   }
2073 }
2074 
2075 /// Using the given \p List, compute the cost of the broken hints if
2076 /// \p PhysReg was used.
2077 /// \return The cost of \p List for \p PhysReg.
2078 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2079                                            MCRegister PhysReg) {
2080   BlockFrequency Cost = 0;
2081   for (const HintInfo &Info : List) {
2082     if (Info.PhysReg != PhysReg)
2083       Cost += Info.Freq;
2084   }
2085   return Cost;
2086 }
2087 
2088 /// Using the register assigned to \p VirtReg, try to recolor
2089 /// all the live ranges that are copy-related with \p VirtReg.
2090 /// The recoloring is then propagated to all the live-ranges that have
2091 /// been recolored and so on, until no more copies can be coalesced or
2092 /// it is not profitable.
2093 /// For a given live range, profitability is determined by the sum of the
2094 /// frequencies of the non-identity copies it would introduce with the old
2095 /// and new register.
2096 void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
2097   // We have a broken hint, check if it is possible to fix it by
2098   // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2099   // some register and PhysReg may be available for the other live-ranges.
2100   SmallSet<Register, 4> Visited;
2101   SmallVector<unsigned, 2> RecoloringCandidates;
2102   HintsInfo Info;
2103   Register Reg = VirtReg.reg();
2104   MCRegister PhysReg = VRM->getPhys(Reg);
2105   // Start the recoloring algorithm from the input live-interval, then
2106   // it will propagate to the ones that are copy-related with it.
2107   Visited.insert(Reg);
2108   RecoloringCandidates.push_back(Reg);
2109 
2110   LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2111                     << '(' << printReg(PhysReg, TRI) << ")\n");
2112 
2113   do {
2114     Reg = RecoloringCandidates.pop_back_val();
2115 
2116     // We cannot recolor physical register.
2117     if (Register::isPhysicalRegister(Reg))
2118       continue;
2119 
2120     // This may be a skipped class
2121     if (!VRM->hasPhys(Reg)) {
2122       assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) &&
2123              "We have an unallocated variable which should have been handled");
2124       continue;
2125     }
2126 
2127     // Get the live interval mapped with this virtual register to be able
2128     // to check for the interference with the new color.
2129     LiveInterval &LI = LIS->getInterval(Reg);
2130     MCRegister CurrPhys = VRM->getPhys(Reg);
2131     // Check that the new color matches the register class constraints and
2132     // that it is free for this live range.
2133     if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2134                                 Matrix->checkInterference(LI, PhysReg)))
2135       continue;
2136 
2137     LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2138                       << ") is recolorable.\n");
2139 
2140     // Gather the hint info.
2141     Info.clear();
2142     collectHintInfo(Reg, Info);
2143     // Check if recoloring the live-range will increase the cost of the
2144     // non-identity copies.
2145     if (CurrPhys != PhysReg) {
2146       LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2147       BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2148       BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2149       LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
2150                         << "\nNew Cost: " << NewCopiesCost.getFrequency()
2151                         << '\n');
2152       if (OldCopiesCost < NewCopiesCost) {
2153         LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2154         continue;
2155       }
2156       // At this point, the cost is either cheaper or equal. If it is
2157       // equal, we consider this is profitable because it may expose
2158       // more recoloring opportunities.
2159       LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2160       // Recolor the live-range.
2161       Matrix->unassign(LI);
2162       Matrix->assign(LI, PhysReg);
2163     }
2164     // Push all copy-related live-ranges to keep reconciling the broken
2165     // hints.
2166     for (const HintInfo &HI : Info) {
2167       if (Visited.insert(HI.Reg).second)
2168         RecoloringCandidates.push_back(HI.Reg);
2169     }
2170   } while (!RecoloringCandidates.empty());
2171 }
2172 
2173 /// Try to recolor broken hints.
2174 /// Broken hints may be repaired by recoloring when an evicted variable
2175 /// freed up a register for a larger live-range.
2176 /// Consider the following example:
2177 /// BB1:
2178 ///   a =
2179 ///   b =
2180 /// BB2:
2181 ///   ...
2182 ///   = b
2183 ///   = a
2184 /// Let us assume b gets split:
2185 /// BB1:
2186 ///   a =
2187 ///   b =
2188 /// BB2:
2189 ///   c = b
2190 ///   ...
2191 ///   d = c
2192 ///   = d
2193 ///   = a
2194 /// Because of how the allocation work, b, c, and d may be assigned different
2195 /// colors. Now, if a gets evicted later:
2196 /// BB1:
2197 ///   a =
2198 ///   st a, SpillSlot
2199 ///   b =
2200 /// BB2:
2201 ///   c = b
2202 ///   ...
2203 ///   d = c
2204 ///   = d
2205 ///   e = ld SpillSlot
2206 ///   = e
2207 /// This is likely that we can assign the same register for b, c, and d,
2208 /// getting rid of 2 copies.
2209 void RAGreedy::tryHintsRecoloring() {
2210   for (const LiveInterval *LI : SetOfBrokenHints) {
2211     assert(Register::isVirtualRegister(LI->reg()) &&
2212            "Recoloring is possible only for virtual registers");
2213     // Some dead defs may be around (e.g., because of debug uses).
2214     // Ignore those.
2215     if (!VRM->hasPhys(LI->reg()))
2216       continue;
2217     tryHintRecoloring(*LI);
2218   }
2219 }
2220 
2221 MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
2222                                        SmallVectorImpl<Register> &NewVRegs,
2223                                        SmallVirtRegSet &FixedRegisters,
2224                                        RecoloringStack &RecolorStack,
2225                                        unsigned Depth) {
2226   uint8_t CostPerUseLimit = uint8_t(~0u);
2227   // First try assigning a free register.
2228   auto Order =
2229       AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
2230   if (MCRegister PhysReg =
2231           tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
2232     // When NewVRegs is not empty, we may have made decisions such as evicting
2233     // a virtual register, go with the earlier decisions and use the physical
2234     // register.
2235     if (CSRCost.getFrequency() &&
2236         EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
2237       MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2238                                                 CostPerUseLimit, NewVRegs);
2239       if (CSRReg || !NewVRegs.empty())
2240         // Return now if we decide to use a CSR or create new vregs due to
2241         // pre-splitting.
2242         return CSRReg;
2243     } else
2244       return PhysReg;
2245   }
2246 
2247   LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
2248   LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
2249                     << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
2250 
2251   // Try to evict a less worthy live range, but only for ranges from the primary
2252   // queue. The RS_Split ranges already failed to do this, and they should not
2253   // get a second chance until they have been split.
2254   if (Stage != RS_Split)
2255     if (Register PhysReg =
2256             tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
2257                      FixedRegisters)) {
2258       Register Hint = MRI->getSimpleHint(VirtReg.reg());
2259       // If VirtReg has a hint and that hint is broken record this
2260       // virtual register as a recoloring candidate for broken hint.
2261       // Indeed, since we evicted a variable in its neighborhood it is
2262       // likely we can at least partially recolor some of the
2263       // copy-related live-ranges.
2264       if (Hint && Hint != PhysReg)
2265         SetOfBrokenHints.insert(&VirtReg);
2266       return PhysReg;
2267     }
2268 
2269   assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
2270 
2271   // The first time we see a live range, don't try to split or spill.
2272   // Wait until the second time, when all smaller ranges have been allocated.
2273   // This gives a better picture of the interference to split around.
2274   if (Stage < RS_Split) {
2275     ExtraInfo->setStage(VirtReg, RS_Split);
2276     LLVM_DEBUG(dbgs() << "wait for second round\n");
2277     NewVRegs.push_back(VirtReg.reg());
2278     return 0;
2279   }
2280 
2281   if (Stage < RS_Spill) {
2282     // Try splitting VirtReg or interferences.
2283     unsigned NewVRegSizeBefore = NewVRegs.size();
2284     Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
2285     if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
2286       return PhysReg;
2287   }
2288 
2289   // If we couldn't allocate a register from spilling, there is probably some
2290   // invalid inline assembly. The base class will report it.
2291   if (Stage >= RS_Done || !VirtReg.isSpillable()) {
2292     return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2293                                    RecolorStack, Depth);
2294   }
2295 
2296   // Finally spill VirtReg itself.
2297   if ((EnableDeferredSpilling ||
2298        TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
2299       ExtraInfo->getStage(VirtReg) < RS_Memory) {
2300     // TODO: This is experimental and in particular, we do not model
2301     // the live range splitting done by spilling correctly.
2302     // We would need a deep integration with the spiller to do the
2303     // right thing here. Anyway, that is still good for early testing.
2304     ExtraInfo->setStage(VirtReg, RS_Memory);
2305     LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
2306     NewVRegs.push_back(VirtReg.reg());
2307   } else {
2308     NamedRegionTimer T("spill", "Spiller", TimerGroupName,
2309                        TimerGroupDescription, TimePassesIsEnabled);
2310     LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2311     spiller().spill(LRE);
2312     ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2313 
2314     // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
2315     // the new regs are kept in LDV (still mapping to the old register), until
2316     // we rewrite spilled locations in LDV at a later stage.
2317     DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
2318 
2319     if (VerifyEnabled)
2320       MF->verify(this, "After spilling");
2321   }
2322 
2323   // The live virtual register requesting allocation was spilled, so tell
2324   // the caller not to allocate anything during this round.
2325   return 0;
2326 }
2327 
2328 void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
2329   using namespace ore;
2330   if (Spills) {
2331     R << NV("NumSpills", Spills) << " spills ";
2332     R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
2333   }
2334   if (FoldedSpills) {
2335     R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
2336     R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
2337       << " total folded spills cost ";
2338   }
2339   if (Reloads) {
2340     R << NV("NumReloads", Reloads) << " reloads ";
2341     R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
2342   }
2343   if (FoldedReloads) {
2344     R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
2345     R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
2346       << " total folded reloads cost ";
2347   }
2348   if (ZeroCostFoldedReloads)
2349     R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
2350       << " zero cost folded reloads ";
2351   if (Copies) {
2352     R << NV("NumVRCopies", Copies) << " virtual registers copies ";
2353     R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
2354   }
2355 }
2356 
2357 RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
2358   RAGreedyStats Stats;
2359   const MachineFrameInfo &MFI = MF->getFrameInfo();
2360   int FI;
2361 
2362   auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
2363     return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
2364         A->getPseudoValue())->getFrameIndex());
2365   };
2366   auto isPatchpointInstr = [](const MachineInstr &MI) {
2367     return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
2368            MI.getOpcode() == TargetOpcode::STACKMAP ||
2369            MI.getOpcode() == TargetOpcode::STATEPOINT;
2370   };
2371   for (MachineInstr &MI : MBB) {
2372     if (MI.isCopy()) {
2373       MachineOperand &Dest = MI.getOperand(0);
2374       MachineOperand &Src = MI.getOperand(1);
2375       if (Dest.isReg() && Src.isReg() && Dest.getReg().isVirtual() &&
2376           Src.getReg().isVirtual())
2377         ++Stats.Copies;
2378       continue;
2379     }
2380 
2381     SmallVector<const MachineMemOperand *, 2> Accesses;
2382     if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2383       ++Stats.Reloads;
2384       continue;
2385     }
2386     if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2387       ++Stats.Spills;
2388       continue;
2389     }
2390     if (TII->hasLoadFromStackSlot(MI, Accesses) &&
2391         llvm::any_of(Accesses, isSpillSlotAccess)) {
2392       if (!isPatchpointInstr(MI)) {
2393         Stats.FoldedReloads += Accesses.size();
2394         continue;
2395       }
2396       // For statepoint there may be folded and zero cost folded stack reloads.
2397       std::pair<unsigned, unsigned> NonZeroCostRange =
2398           TII->getPatchpointUnfoldableRange(MI);
2399       SmallSet<unsigned, 16> FoldedReloads;
2400       SmallSet<unsigned, 16> ZeroCostFoldedReloads;
2401       for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
2402         MachineOperand &MO = MI.getOperand(Idx);
2403         if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
2404           continue;
2405         if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
2406           FoldedReloads.insert(MO.getIndex());
2407         else
2408           ZeroCostFoldedReloads.insert(MO.getIndex());
2409       }
2410       // If stack slot is used in folded reload it is not zero cost then.
2411       for (unsigned Slot : FoldedReloads)
2412         ZeroCostFoldedReloads.erase(Slot);
2413       Stats.FoldedReloads += FoldedReloads.size();
2414       Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
2415       continue;
2416     }
2417     Accesses.clear();
2418     if (TII->hasStoreToStackSlot(MI, Accesses) &&
2419         llvm::any_of(Accesses, isSpillSlotAccess)) {
2420       Stats.FoldedSpills += Accesses.size();
2421     }
2422   }
2423   // Set cost of collected statistic by multiplication to relative frequency of
2424   // this basic block.
2425   float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
2426   Stats.ReloadsCost = RelFreq * Stats.Reloads;
2427   Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
2428   Stats.SpillsCost = RelFreq * Stats.Spills;
2429   Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
2430   Stats.CopiesCost = RelFreq * Stats.Copies;
2431   return Stats;
2432 }
2433 
2434 RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
2435   RAGreedyStats Stats;
2436 
2437   // Sum up the spill and reloads in subloops.
2438   for (MachineLoop *SubLoop : *L)
2439     Stats.add(reportStats(SubLoop));
2440 
2441   for (MachineBasicBlock *MBB : L->getBlocks())
2442     // Handle blocks that were not included in subloops.
2443     if (Loops->getLoopFor(MBB) == L)
2444       Stats.add(computeStats(*MBB));
2445 
2446   if (!Stats.isEmpty()) {
2447     using namespace ore;
2448 
2449     ORE->emit([&]() {
2450       MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
2451                                         L->getStartLoc(), L->getHeader());
2452       Stats.report(R);
2453       R << "generated in loop";
2454       return R;
2455     });
2456   }
2457   return Stats;
2458 }
2459 
2460 void RAGreedy::reportStats() {
2461   if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
2462     return;
2463   RAGreedyStats Stats;
2464   for (MachineLoop *L : *Loops)
2465     Stats.add(reportStats(L));
2466   // Process non-loop blocks.
2467   for (MachineBasicBlock &MBB : *MF)
2468     if (!Loops->getLoopFor(&MBB))
2469       Stats.add(computeStats(MBB));
2470   if (!Stats.isEmpty()) {
2471     using namespace ore;
2472 
2473     ORE->emit([&]() {
2474       DebugLoc Loc;
2475       if (auto *SP = MF->getFunction().getSubprogram())
2476         Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
2477       MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
2478                                         &MF->front());
2479       Stats.report(R);
2480       R << "generated in function";
2481       return R;
2482     });
2483   }
2484 }
2485 
2486 bool RAGreedy::hasVirtRegAlloc() {
2487   for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
2488     Register Reg = Register::index2VirtReg(I);
2489     if (MRI->reg_nodbg_empty(Reg))
2490       continue;
2491     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2492     if (!RC)
2493       continue;
2494     if (ShouldAllocateClass(*TRI, *RC))
2495       return true;
2496   }
2497 
2498   return false;
2499 }
2500 
2501 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
2502   LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2503                     << "********** Function: " << mf.getName() << '\n');
2504 
2505   MF = &mf;
2506   TII = MF->getSubtarget().getInstrInfo();
2507 
2508   if (VerifyEnabled)
2509     MF->verify(this, "Before greedy register allocator");
2510 
2511   RegAllocBase::init(getAnalysis<VirtRegMap>(),
2512                      getAnalysis<LiveIntervals>(),
2513                      getAnalysis<LiveRegMatrix>());
2514 
2515   // Early return if there is no virtual register to be allocated to a
2516   // physical register.
2517   if (!hasVirtRegAlloc())
2518     return false;
2519 
2520   Indexes = &getAnalysis<SlotIndexes>();
2521   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
2522   DomTree = &getAnalysis<MachineDominatorTree>();
2523   ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
2524   Loops = &getAnalysis<MachineLoopInfo>();
2525   Bundles = &getAnalysis<EdgeBundles>();
2526   SpillPlacer = &getAnalysis<SpillPlacement>();
2527   DebugVars = &getAnalysis<LiveDebugVariables>();
2528   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2529 
2530   initializeCSRCost();
2531 
2532   RegCosts = TRI->getRegisterCosts(*MF);
2533   RegClassPriorityTrumpsGlobalness =
2534       GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2535           ? GreedyRegClassPriorityTrumpsGlobalness
2536           : TRI->regClassPriorityTrumpsGlobalness(*MF);
2537 
2538   ExtraInfo.emplace();
2539   EvictAdvisor =
2540       getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
2541 
2542   VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2543   SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
2544 
2545   VRAI->calculateSpillWeightsAndHints();
2546 
2547   LLVM_DEBUG(LIS->dump());
2548 
2549   SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2550   SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
2551 
2552   IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2553   GlobalCand.resize(32);  // This will grow as needed.
2554   SetOfBrokenHints.clear();
2555 
2556   allocatePhysRegs();
2557   tryHintsRecoloring();
2558 
2559   if (VerifyEnabled)
2560     MF->verify(this, "Before post optimization");
2561   postOptimization();
2562   reportStats();
2563 
2564   releaseMemory();
2565   return true;
2566 }
2567