1 //===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the RAGreedy function pass for register allocation in
11 // optimized builds.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AllocationOrder.h"
16 #include "InterferenceCache.h"
17 #include "LiveDebugVariables.h"
18 #include "RegAllocBase.h"
19 #include "SpillPlacement.h"
20 #include "Spiller.h"
21 #include "SplitKit.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/AliasAnalysis.h"
24 #include "llvm/CodeGen/CalcSpillWeights.h"
25 #include "llvm/CodeGen/EdgeBundles.h"
26 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
27 #include "llvm/CodeGen/LiveRangeEdit.h"
28 #include "llvm/CodeGen/LiveRegMatrix.h"
29 #include "llvm/CodeGen/LiveStackAnalysis.h"
30 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
31 #include "llvm/CodeGen/MachineDominators.h"
32 #include "llvm/CodeGen/MachineFunctionPass.h"
33 #include "llvm/CodeGen/MachineLoopInfo.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/Passes.h"
36 #include "llvm/CodeGen/RegAllocRegistry.h"
37 #include "llvm/CodeGen/RegisterClassInfo.h"
38 #include "llvm/CodeGen/VirtRegMap.h"
39 #include "llvm/IR/LLVMContext.h"
40 #include "llvm/PassAnalysisSupport.h"
41 #include "llvm/Support/BranchProbability.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Debug.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/Timer.h"
46 #include "llvm/Support/raw_ostream.h"
47 #include "llvm/Target/TargetInstrInfo.h"
48 #include "llvm/Target/TargetSubtargetInfo.h"
49 #include <queue>
50 
51 using namespace llvm;
52 
53 #define DEBUG_TYPE "regalloc"
54 
55 STATISTIC(NumGlobalSplits, "Number of split global live ranges");
56 STATISTIC(NumLocalSplits,  "Number of split local live ranges");
57 STATISTIC(NumEvicted,      "Number of interferences evicted");
58 
59 static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
60     "split-spill-mode", cl::Hidden,
61     cl::desc("Spill mode for splitting live ranges"),
62     cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
63                clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
64                clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
65                clEnumValEnd),
66     cl::init(SplitEditor::SM_Speed));
67 
68 static cl::opt<unsigned>
69 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
70                              cl::desc("Last chance recoloring max depth"),
71                              cl::init(5));
72 
73 static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
74     "lcr-max-interf", cl::Hidden,
75     cl::desc("Last chance recoloring maximum number of considered"
76              " interference at a time"),
77     cl::init(8));
78 
79 static cl::opt<bool>
80 ExhaustiveSearch("exhaustive-register-search", cl::NotHidden,
81                  cl::desc("Exhaustive Search for registers bypassing the depth "
82                           "and interference cutoffs of last chance recoloring"));
83 
84 static cl::opt<bool> EnableLocalReassignment(
85     "enable-local-reassign", cl::Hidden,
86     cl::desc("Local reassignment can yield better allocation decisions, but "
87              "may be compile time intensive"),
88     cl::init(false));
89 
90 static cl::opt<bool> EnableDeferredSpilling(
91     "enable-deferred-spilling", cl::Hidden,
92     cl::desc("Instead of spilling a variable right away, defer the actual "
93              "code insertion to the end of the allocation. That way the "
94              "allocator might still find a suitable coloring for this "
95              "variable because of other evicted variables."),
96     cl::init(false));
97 
98 // FIXME: Find a good default for this flag and remove the flag.
99 static cl::opt<unsigned>
100 CSRFirstTimeCost("regalloc-csr-first-time-cost",
101               cl::desc("Cost for first time use of callee-saved register."),
102               cl::init(0), cl::Hidden);
103 
104 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
105                                        createGreedyRegisterAllocator);
106 
107 namespace {
108 class RAGreedy : public MachineFunctionPass,
109                  public RegAllocBase,
110                  private LiveRangeEdit::Delegate {
111   // Convenient shortcuts.
112   typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue;
113   typedef SmallPtrSet<LiveInterval *, 4> SmallLISet;
114   typedef SmallSet<unsigned, 16> SmallVirtRegSet;
115 
116   // context
117   MachineFunction *MF;
118 
119   // Shortcuts to some useful interface.
120   const TargetInstrInfo *TII;
121   const TargetRegisterInfo *TRI;
122   RegisterClassInfo RCI;
123 
124   // analyses
125   SlotIndexes *Indexes;
126   MachineBlockFrequencyInfo *MBFI;
127   MachineDominatorTree *DomTree;
128   MachineLoopInfo *Loops;
129   EdgeBundles *Bundles;
130   SpillPlacement *SpillPlacer;
131   LiveDebugVariables *DebugVars;
132 
133   // state
134   std::unique_ptr<Spiller> SpillerInstance;
135   PQueue Queue;
136   unsigned NextCascade;
137 
138   // Live ranges pass through a number of stages as we try to allocate them.
139   // Some of the stages may also create new live ranges:
140   //
141   // - Region splitting.
142   // - Per-block splitting.
143   // - Local splitting.
144   // - Spilling.
145   //
146   // Ranges produced by one of the stages skip the previous stages when they are
147   // dequeued. This improves performance because we can skip interference checks
148   // that are unlikely to give any results. It also guarantees that the live
149   // range splitting algorithm terminates, something that is otherwise hard to
150   // ensure.
151   enum LiveRangeStage {
152     /// Newly created live range that has never been queued.
153     RS_New,
154 
155     /// Only attempt assignment and eviction. Then requeue as RS_Split.
156     RS_Assign,
157 
158     /// Attempt live range splitting if assignment is impossible.
159     RS_Split,
160 
161     /// Attempt more aggressive live range splitting that is guaranteed to make
162     /// progress.  This is used for split products that may not be making
163     /// progress.
164     RS_Split2,
165 
166     /// Live range will be spilled.  No more splitting will be attempted.
167     RS_Spill,
168 
169 
170     /// Live range is in memory. Because of other evictions, it might get moved
171     /// in a register in the end.
172     RS_Memory,
173 
174     /// There is nothing more we can do to this live range.  Abort compilation
175     /// if it can't be assigned.
176     RS_Done
177   };
178 
179   // Enum CutOffStage to keep a track whether the register allocation failed
180   // because of the cutoffs encountered in last chance recoloring.
181   // Note: This is used as bitmask. New value should be next power of 2.
182   enum CutOffStage {
183     // No cutoffs encountered
184     CO_None = 0,
185 
186     // lcr-max-depth cutoff encountered
187     CO_Depth = 1,
188 
189     // lcr-max-interf cutoff encountered
190     CO_Interf = 2
191   };
192 
193   uint8_t CutOffInfo;
194 
195 #ifndef NDEBUG
196   static const char *const StageName[];
197 #endif
198 
199   // RegInfo - Keep additional information about each live range.
200   struct RegInfo {
201     LiveRangeStage Stage;
202 
203     // Cascade - Eviction loop prevention. See canEvictInterference().
204     unsigned Cascade;
205 
206     RegInfo() : Stage(RS_New), Cascade(0) {}
207   };
208 
209   IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
210 
211   LiveRangeStage getStage(const LiveInterval &VirtReg) const {
212     return ExtraRegInfo[VirtReg.reg].Stage;
213   }
214 
215   void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
216     ExtraRegInfo.resize(MRI->getNumVirtRegs());
217     ExtraRegInfo[VirtReg.reg].Stage = Stage;
218   }
219 
220   template<typename Iterator>
221   void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
222     ExtraRegInfo.resize(MRI->getNumVirtRegs());
223     for (;Begin != End; ++Begin) {
224       unsigned Reg = *Begin;
225       if (ExtraRegInfo[Reg].Stage == RS_New)
226         ExtraRegInfo[Reg].Stage = NewStage;
227     }
228   }
229 
230   /// Cost of evicting interference.
231   struct EvictionCost {
232     unsigned BrokenHints; ///< Total number of broken hints.
233     float MaxWeight;      ///< Maximum spill weight evicted.
234 
235     EvictionCost(): BrokenHints(0), MaxWeight(0) {}
236 
237     bool isMax() const { return BrokenHints == ~0u; }
238 
239     void setMax() { BrokenHints = ~0u; }
240 
241     void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
242 
243     bool operator<(const EvictionCost &O) const {
244       return std::tie(BrokenHints, MaxWeight) <
245              std::tie(O.BrokenHints, O.MaxWeight);
246     }
247   };
248 
249   // splitting state.
250   std::unique_ptr<SplitAnalysis> SA;
251   std::unique_ptr<SplitEditor> SE;
252 
253   /// Cached per-block interference maps
254   InterferenceCache IntfCache;
255 
256   /// All basic blocks where the current register has uses.
257   SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
258 
259   /// Global live range splitting candidate info.
260   struct GlobalSplitCandidate {
261     // Register intended for assignment, or 0.
262     unsigned PhysReg;
263 
264     // SplitKit interval index for this candidate.
265     unsigned IntvIdx;
266 
267     // Interference for PhysReg.
268     InterferenceCache::Cursor Intf;
269 
270     // Bundles where this candidate should be live.
271     BitVector LiveBundles;
272     SmallVector<unsigned, 8> ActiveBlocks;
273 
274     void reset(InterferenceCache &Cache, unsigned Reg) {
275       PhysReg = Reg;
276       IntvIdx = 0;
277       Intf.setPhysReg(Cache, Reg);
278       LiveBundles.clear();
279       ActiveBlocks.clear();
280     }
281 
282     // Set B[i] = C for every live bundle where B[i] was NoCand.
283     unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
284       unsigned Count = 0;
285       for (int i = LiveBundles.find_first(); i >= 0;
286            i = LiveBundles.find_next(i))
287         if (B[i] == NoCand) {
288           B[i] = C;
289           Count++;
290         }
291       return Count;
292     }
293   };
294 
295   /// Candidate info for each PhysReg in AllocationOrder.
296   /// This vector never shrinks, but grows to the size of the largest register
297   /// class.
298   SmallVector<GlobalSplitCandidate, 32> GlobalCand;
299 
300   enum : unsigned { NoCand = ~0u };
301 
302   /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
303   /// NoCand which indicates the stack interval.
304   SmallVector<unsigned, 32> BundleCand;
305 
306   /// Callee-save register cost, calculated once per machine function.
307   BlockFrequency CSRCost;
308 
309   /// Run or not the local reassignment heuristic. This information is
310   /// obtained from the TargetSubtargetInfo.
311   bool EnableLocalReassign;
312 
313   /// Set of broken hints that may be reconciled later because of eviction.
314   SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
315 
316 public:
317   RAGreedy();
318 
319   /// Return the pass name.
320   const char* getPassName() const override {
321     return "Greedy Register Allocator";
322   }
323 
324   /// RAGreedy analysis usage.
325   void getAnalysisUsage(AnalysisUsage &AU) const override;
326   void releaseMemory() override;
327   Spiller &spiller() override { return *SpillerInstance; }
328   void enqueue(LiveInterval *LI) override;
329   LiveInterval *dequeue() override;
330   unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override;
331   void aboutToRemoveInterval(LiveInterval &) override;
332 
333   /// Perform register allocation.
334   bool runOnMachineFunction(MachineFunction &mf) override;
335 
336   static char ID;
337 
338 private:
339   unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &,
340                              SmallVirtRegSet &, unsigned = 0);
341 
342   bool LRE_CanEraseVirtReg(unsigned) override;
343   void LRE_WillShrinkVirtReg(unsigned) override;
344   void LRE_DidCloneVirtReg(unsigned, unsigned) override;
345   void enqueue(PQueue &CurQueue, LiveInterval *LI);
346   LiveInterval *dequeue(PQueue &CurQueue);
347 
348   BlockFrequency calcSpillCost();
349   bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
350   void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
351   void growRegion(GlobalSplitCandidate &Cand);
352   BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&);
353   bool calcCompactRegion(GlobalSplitCandidate&);
354   void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
355   void calcGapWeights(unsigned, SmallVectorImpl<float>&);
356   unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg);
357   bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
358   bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
359   void evictInterference(LiveInterval&, unsigned,
360                          SmallVectorImpl<unsigned>&);
361   bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
362                                   SmallLISet &RecoloringCandidates,
363                                   const SmallVirtRegSet &FixedRegisters);
364 
365   unsigned tryAssign(LiveInterval&, AllocationOrder&,
366                      SmallVectorImpl<unsigned>&);
367   unsigned tryEvict(LiveInterval&, AllocationOrder&,
368                     SmallVectorImpl<unsigned>&, unsigned = ~0u);
369   unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
370                           SmallVectorImpl<unsigned>&);
371   /// Calculate cost of region splitting.
372   unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
373                                     AllocationOrder &Order,
374                                     BlockFrequency &BestCost,
375                                     unsigned &NumCands, bool IgnoreCSR);
376   /// Perform region splitting.
377   unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
378                          bool HasCompact,
379                          SmallVectorImpl<unsigned> &NewVRegs);
380   /// Check other options before using a callee-saved register for the first
381   /// time.
382   unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
383                                  unsigned PhysReg, unsigned &CostPerUseLimit,
384                                  SmallVectorImpl<unsigned> &NewVRegs);
385   void initializeCSRCost();
386   unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
387                          SmallVectorImpl<unsigned>&);
388   unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
389                                SmallVectorImpl<unsigned>&);
390   unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
391     SmallVectorImpl<unsigned>&);
392   unsigned trySplit(LiveInterval&, AllocationOrder&,
393                     SmallVectorImpl<unsigned>&);
394   unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
395                                    SmallVectorImpl<unsigned> &,
396                                    SmallVirtRegSet &, unsigned);
397   bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &,
398                                SmallVirtRegSet &, unsigned);
399   void tryHintRecoloring(LiveInterval &);
400   void tryHintsRecoloring();
401 
402   /// Model the information carried by one end of a copy.
403   struct HintInfo {
404     /// The frequency of the copy.
405     BlockFrequency Freq;
406     /// The virtual register or physical register.
407     unsigned Reg;
408     /// Its currently assigned register.
409     /// In case of a physical register Reg == PhysReg.
410     unsigned PhysReg;
411     HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg)
412         : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {}
413   };
414   typedef SmallVector<HintInfo, 4> HintsInfo;
415   BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned);
416   void collectHintInfo(unsigned, HintsInfo &);
417 
418   bool isUnusedCalleeSavedReg(unsigned PhysReg) const;
419 };
420 } // end anonymous namespace
421 
422 char RAGreedy::ID = 0;
423 
424 #ifndef NDEBUG
425 const char *const RAGreedy::StageName[] = {
426     "RS_New",
427     "RS_Assign",
428     "RS_Split",
429     "RS_Split2",
430     "RS_Spill",
431     "RS_Memory",
432     "RS_Done"
433 };
434 #endif
435 
436 // Hysteresis to use when comparing floats.
437 // This helps stabilize decisions based on float comparisons.
438 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
439 
440 
441 FunctionPass* llvm::createGreedyRegisterAllocator() {
442   return new RAGreedy();
443 }
444 
445 RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
446   initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
447   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
448   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
449   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
450   initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
451   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
452   initializeLiveStacksPass(*PassRegistry::getPassRegistry());
453   initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
454   initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
455   initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
456   initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
457   initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
458   initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
459 }
460 
461 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
462   AU.setPreservesCFG();
463   AU.addRequired<MachineBlockFrequencyInfo>();
464   AU.addPreserved<MachineBlockFrequencyInfo>();
465   AU.addRequired<AAResultsWrapperPass>();
466   AU.addPreserved<AAResultsWrapperPass>();
467   AU.addRequired<LiveIntervals>();
468   AU.addPreserved<LiveIntervals>();
469   AU.addRequired<SlotIndexes>();
470   AU.addPreserved<SlotIndexes>();
471   AU.addRequired<LiveDebugVariables>();
472   AU.addPreserved<LiveDebugVariables>();
473   AU.addRequired<LiveStacks>();
474   AU.addPreserved<LiveStacks>();
475   AU.addRequired<MachineDominatorTree>();
476   AU.addPreserved<MachineDominatorTree>();
477   AU.addRequired<MachineLoopInfo>();
478   AU.addPreserved<MachineLoopInfo>();
479   AU.addRequired<VirtRegMap>();
480   AU.addPreserved<VirtRegMap>();
481   AU.addRequired<LiveRegMatrix>();
482   AU.addPreserved<LiveRegMatrix>();
483   AU.addRequired<EdgeBundles>();
484   AU.addRequired<SpillPlacement>();
485   MachineFunctionPass::getAnalysisUsage(AU);
486 }
487 
488 
489 //===----------------------------------------------------------------------===//
490 //                     LiveRangeEdit delegate methods
491 //===----------------------------------------------------------------------===//
492 
493 bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
494   if (VRM->hasPhys(VirtReg)) {
495     LiveInterval &LI = LIS->getInterval(VirtReg);
496     Matrix->unassign(LI);
497     aboutToRemoveInterval(LI);
498     return true;
499   }
500   // Unassigned virtreg is probably in the priority queue.
501   // RegAllocBase will erase it after dequeueing.
502   return false;
503 }
504 
505 void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
506   if (!VRM->hasPhys(VirtReg))
507     return;
508 
509   // Register is assigned, put it back on the queue for reassignment.
510   LiveInterval &LI = LIS->getInterval(VirtReg);
511   Matrix->unassign(LI);
512   enqueue(&LI);
513 }
514 
515 void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
516   // Cloning a register we haven't even heard about yet?  Just ignore it.
517   if (!ExtraRegInfo.inBounds(Old))
518     return;
519 
520   // LRE may clone a virtual register because dead code elimination causes it to
521   // be split into connected components. The new components are much smaller
522   // than the original, so they should get a new chance at being assigned.
523   // same stage as the parent.
524   ExtraRegInfo[Old].Stage = RS_Assign;
525   ExtraRegInfo.grow(New);
526   ExtraRegInfo[New] = ExtraRegInfo[Old];
527 }
528 
529 void RAGreedy::releaseMemory() {
530   SpillerInstance.reset();
531   ExtraRegInfo.clear();
532   GlobalCand.clear();
533 }
534 
535 void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); }
536 
537 void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
538   // Prioritize live ranges by size, assigning larger ranges first.
539   // The queue holds (size, reg) pairs.
540   const unsigned Size = LI->getSize();
541   const unsigned Reg = LI->reg;
542   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
543          "Can only enqueue virtual registers");
544   unsigned Prio;
545 
546   ExtraRegInfo.grow(Reg);
547   if (ExtraRegInfo[Reg].Stage == RS_New)
548     ExtraRegInfo[Reg].Stage = RS_Assign;
549 
550   if (ExtraRegInfo[Reg].Stage == RS_Split) {
551     // Unsplit ranges that couldn't be allocated immediately are deferred until
552     // everything else has been allocated.
553     Prio = Size;
554   } else if (ExtraRegInfo[Reg].Stage == RS_Memory) {
555     // Memory operand should be considered last.
556     // Change the priority such that Memory operand are assigned in
557     // the reverse order that they came in.
558     // TODO: Make this a member variable and probably do something about hints.
559     static unsigned MemOp = 0;
560     Prio = MemOp++;
561   } else {
562     // Giant live ranges fall back to the global assignment heuristic, which
563     // prevents excessive spilling in pathological cases.
564     bool ReverseLocal = TRI->reverseLocalAssignment();
565     const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
566     bool ForceGlobal = !ReverseLocal &&
567       (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
568 
569     if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
570         LIS->intervalIsInOneMBB(*LI)) {
571       // Allocate original local ranges in linear instruction order. Since they
572       // are singly defined, this produces optimal coloring in the absence of
573       // global interference and other constraints.
574       if (!ReverseLocal)
575         Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
576       else {
577         // Allocating bottom up may allow many short LRGs to be assigned first
578         // to one of the cheap registers. This could be much faster for very
579         // large blocks on targets with many physical registers.
580         Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
581       }
582       Prio |= RC.AllocationPriority << 24;
583     } else {
584       // Allocate global and split ranges in long->short order. Long ranges that
585       // don't fit should be spilled (or split) ASAP so they don't create
586       // interference.  Mark a bit to prioritize global above local ranges.
587       Prio = (1u << 29) + Size;
588     }
589     // Mark a higher bit to prioritize global and local above RS_Split.
590     Prio |= (1u << 31);
591 
592     // Boost ranges that have a physical register hint.
593     if (VRM->hasKnownPreference(Reg))
594       Prio |= (1u << 30);
595   }
596   // The virtual register number is a tie breaker for same-sized ranges.
597   // Give lower vreg numbers higher priority to assign them first.
598   CurQueue.push(std::make_pair(Prio, ~Reg));
599 }
600 
601 LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
602 
603 LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
604   if (CurQueue.empty())
605     return nullptr;
606   LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
607   CurQueue.pop();
608   return LI;
609 }
610 
611 
612 //===----------------------------------------------------------------------===//
613 //                            Direct Assignment
614 //===----------------------------------------------------------------------===//
615 
616 /// tryAssign - Try to assign VirtReg to an available register.
617 unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
618                              AllocationOrder &Order,
619                              SmallVectorImpl<unsigned> &NewVRegs) {
620   Order.rewind();
621   unsigned PhysReg;
622   while ((PhysReg = Order.next()))
623     if (!Matrix->checkInterference(VirtReg, PhysReg))
624       break;
625   if (!PhysReg || Order.isHint())
626     return PhysReg;
627 
628   // PhysReg is available, but there may be a better choice.
629 
630   // If we missed a simple hint, try to cheaply evict interference from the
631   // preferred register.
632   if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
633     if (Order.isHint(Hint)) {
634       DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
635       EvictionCost MaxCost;
636       MaxCost.setBrokenHints(1);
637       if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
638         evictInterference(VirtReg, Hint, NewVRegs);
639         return Hint;
640       }
641     }
642 
643   // Try to evict interference from a cheaper alternative.
644   unsigned Cost = TRI->getCostPerUse(PhysReg);
645 
646   // Most registers have 0 additional cost.
647   if (!Cost)
648     return PhysReg;
649 
650   DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
651                << '\n');
652   unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
653   return CheapReg ? CheapReg : PhysReg;
654 }
655 
656 
657 //===----------------------------------------------------------------------===//
658 //                         Interference eviction
659 //===----------------------------------------------------------------------===//
660 
661 unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
662   AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
663   unsigned PhysReg;
664   while ((PhysReg = Order.next())) {
665     if (PhysReg == PrevReg)
666       continue;
667 
668     MCRegUnitIterator Units(PhysReg, TRI);
669     for (; Units.isValid(); ++Units) {
670       // Instantiate a "subquery", not to be confused with the Queries array.
671       LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]);
672       if (subQ.checkInterference())
673         break;
674     }
675     // If no units have interference, break out with the current PhysReg.
676     if (!Units.isValid())
677       break;
678   }
679   if (PhysReg)
680     DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
681           << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI)
682           << '\n');
683   return PhysReg;
684 }
685 
686 /// shouldEvict - determine if A should evict the assigned live range B. The
687 /// eviction policy defined by this function together with the allocation order
688 /// defined by enqueue() decides which registers ultimately end up being split
689 /// and spilled.
690 ///
691 /// Cascade numbers are used to prevent infinite loops if this function is a
692 /// cyclic relation.
693 ///
694 /// @param A          The live range to be assigned.
695 /// @param IsHint     True when A is about to be assigned to its preferred
696 ///                   register.
697 /// @param B          The live range to be evicted.
698 /// @param BreaksHint True when B is already assigned to its preferred register.
699 bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
700                            LiveInterval &B, bool BreaksHint) {
701   bool CanSplit = getStage(B) < RS_Spill;
702 
703   // Be fairly aggressive about following hints as long as the evictee can be
704   // split.
705   if (CanSplit && IsHint && !BreaksHint)
706     return true;
707 
708   if (A.weight > B.weight) {
709     DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n');
710     return true;
711   }
712   return false;
713 }
714 
715 /// canEvictInterference - Return true if all interferences between VirtReg and
716 /// PhysReg can be evicted.
717 ///
718 /// @param VirtReg Live range that is about to be assigned.
719 /// @param PhysReg Desired register for assignment.
720 /// @param IsHint  True when PhysReg is VirtReg's preferred register.
721 /// @param MaxCost Only look for cheaper candidates and update with new cost
722 ///                when returning true.
723 /// @returns True when interference can be evicted cheaper than MaxCost.
724 bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
725                                     bool IsHint, EvictionCost &MaxCost) {
726   // It is only possible to evict virtual register interference.
727   if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
728     return false;
729 
730   bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
731 
732   // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
733   // involved in an eviction before. If a cascade number was assigned, deny
734   // evicting anything with the same or a newer cascade number. This prevents
735   // infinite eviction loops.
736   //
737   // This works out so a register without a cascade number is allowed to evict
738   // anything, and it can be evicted by anything.
739   unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
740   if (!Cascade)
741     Cascade = NextCascade;
742 
743   EvictionCost Cost;
744   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
745     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
746     // If there is 10 or more interferences, chances are one is heavier.
747     if (Q.collectInterferingVRegs(10) >= 10)
748       return false;
749 
750     // Check if any interfering live range is heavier than MaxWeight.
751     for (unsigned i = Q.interferingVRegs().size(); i; --i) {
752       LiveInterval *Intf = Q.interferingVRegs()[i - 1];
753       assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) &&
754              "Only expecting virtual register interference from query");
755       // Never evict spill products. They cannot split or spill.
756       if (getStage(*Intf) == RS_Done)
757         return false;
758       // Once a live range becomes small enough, it is urgent that we find a
759       // register for it. This is indicated by an infinite spill weight. These
760       // urgent live ranges get to evict almost anything.
761       //
762       // Also allow urgent evictions of unspillable ranges from a strictly
763       // larger allocation order.
764       bool Urgent = !VirtReg.isSpillable() &&
765         (Intf->isSpillable() ||
766          RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) <
767          RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg)));
768       // Only evict older cascades or live ranges without a cascade.
769       unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
770       if (Cascade <= IntfCascade) {
771         if (!Urgent)
772           return false;
773         // We permit breaking cascades for urgent evictions. It should be the
774         // last resort, though, so make it really expensive.
775         Cost.BrokenHints += 10;
776       }
777       // Would this break a satisfied hint?
778       bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
779       // Update eviction cost.
780       Cost.BrokenHints += BreaksHint;
781       Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
782       // Abort if this would be too expensive.
783       if (!(Cost < MaxCost))
784         return false;
785       if (Urgent)
786         continue;
787       // Apply the eviction policy for non-urgent evictions.
788       if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
789         return false;
790       // If !MaxCost.isMax(), then we're just looking for a cheap register.
791       // Evicting another local live range in this case could lead to suboptimal
792       // coloring.
793       if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
794           (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
795         return false;
796       }
797     }
798   }
799   MaxCost = Cost;
800   return true;
801 }
802 
803 /// evictInterference - Evict any interferring registers that prevent VirtReg
804 /// from being assigned to Physreg. This assumes that canEvictInterference
805 /// returned true.
806 void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
807                                  SmallVectorImpl<unsigned> &NewVRegs) {
808   // Make sure that VirtReg has a cascade number, and assign that cascade
809   // number to every evicted register. These live ranges than then only be
810   // evicted by a newer cascade, preventing infinite loops.
811   unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
812   if (!Cascade)
813     Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
814 
815   DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
816                << " interference: Cascade " << Cascade << '\n');
817 
818   // Collect all interfering virtregs first.
819   SmallVector<LiveInterval*, 8> Intfs;
820   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
821     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
822     assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
823     ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
824     Intfs.append(IVR.begin(), IVR.end());
825   }
826 
827   // Evict them second. This will invalidate the queries.
828   for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
829     LiveInterval *Intf = Intfs[i];
830     // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
831     if (!VRM->hasPhys(Intf->reg))
832       continue;
833     Matrix->unassign(*Intf);
834     assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
835             VirtReg.isSpillable() < Intf->isSpillable()) &&
836            "Cannot decrease cascade number, illegal eviction");
837     ExtraRegInfo[Intf->reg].Cascade = Cascade;
838     ++NumEvicted;
839     NewVRegs.push_back(Intf->reg);
840   }
841 }
842 
843 /// Returns true if the given \p PhysReg is a callee saved register and has not
844 /// been used for allocation yet.
845 bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const {
846   unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
847   if (CSR == 0)
848     return false;
849 
850   return !Matrix->isPhysRegUsed(PhysReg);
851 }
852 
853 /// tryEvict - Try to evict all interferences for a physreg.
854 /// @param  VirtReg Currently unassigned virtual register.
855 /// @param  Order   Physregs to try.
856 /// @return         Physreg to assign VirtReg, or 0.
857 unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
858                             AllocationOrder &Order,
859                             SmallVectorImpl<unsigned> &NewVRegs,
860                             unsigned CostPerUseLimit) {
861   NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
862 
863   // Keep track of the cheapest interference seen so far.
864   EvictionCost BestCost;
865   BestCost.setMax();
866   unsigned BestPhys = 0;
867   unsigned OrderLimit = Order.getOrder().size();
868 
869   // When we are just looking for a reduced cost per use, don't break any
870   // hints, and only evict smaller spill weights.
871   if (CostPerUseLimit < ~0u) {
872     BestCost.BrokenHints = 0;
873     BestCost.MaxWeight = VirtReg.weight;
874 
875     // Check of any registers in RC are below CostPerUseLimit.
876     const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);
877     unsigned MinCost = RegClassInfo.getMinCost(RC);
878     if (MinCost >= CostPerUseLimit) {
879       DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost
880                    << ", no cheaper registers to be found.\n");
881       return 0;
882     }
883 
884     // It is normal for register classes to have a long tail of registers with
885     // the same cost. We don't need to look at them if they're too expensive.
886     if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) {
887       OrderLimit = RegClassInfo.getLastCostChange(RC);
888       DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n");
889     }
890   }
891 
892   Order.rewind();
893   while (unsigned PhysReg = Order.next(OrderLimit)) {
894     if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
895       continue;
896     // The first use of a callee-saved register in a function has cost 1.
897     // Don't start using a CSR when the CostPerUseLimit is low.
898     if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
899       DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
900             << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
901             << '\n');
902       continue;
903     }
904 
905     if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
906       continue;
907 
908     // Best so far.
909     BestPhys = PhysReg;
910 
911     // Stop if the hint can be used.
912     if (Order.isHint())
913       break;
914   }
915 
916   if (!BestPhys)
917     return 0;
918 
919   evictInterference(VirtReg, BestPhys, NewVRegs);
920   return BestPhys;
921 }
922 
923 
924 //===----------------------------------------------------------------------===//
925 //                              Region Splitting
926 //===----------------------------------------------------------------------===//
927 
928 /// addSplitConstraints - Fill out the SplitConstraints vector based on the
929 /// interference pattern in Physreg and its aliases. Add the constraints to
930 /// SpillPlacement and return the static cost of this split in Cost, assuming
931 /// that all preferences in SplitConstraints are met.
932 /// Return false if there are no bundles with positive bias.
933 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
934                                    BlockFrequency &Cost) {
935   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
936 
937   // Reset interference dependent info.
938   SplitConstraints.resize(UseBlocks.size());
939   BlockFrequency StaticCost = 0;
940   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
941     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
942     SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
943 
944     BC.Number = BI.MBB->getNumber();
945     Intf.moveToBlock(BC.Number);
946     BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
947     BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
948     BC.ChangesValue = BI.FirstDef.isValid();
949 
950     if (!Intf.hasInterference())
951       continue;
952 
953     // Number of spill code instructions to insert.
954     unsigned Ins = 0;
955 
956     // Interference for the live-in value.
957     if (BI.LiveIn) {
958       if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
959         BC.Entry = SpillPlacement::MustSpill;
960         ++Ins;
961       } else if (Intf.first() < BI.FirstInstr) {
962         BC.Entry = SpillPlacement::PrefSpill;
963         ++Ins;
964       } else if (Intf.first() < BI.LastInstr) {
965         ++Ins;
966       }
967     }
968 
969     // Interference for the live-out value.
970     if (BI.LiveOut) {
971       if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
972         BC.Exit = SpillPlacement::MustSpill;
973         ++Ins;
974       } else if (Intf.last() > BI.LastInstr) {
975         BC.Exit = SpillPlacement::PrefSpill;
976         ++Ins;
977       } else if (Intf.last() > BI.FirstInstr) {
978         ++Ins;
979       }
980     }
981 
982     // Accumulate the total frequency of inserted spill code.
983     while (Ins--)
984       StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
985   }
986   Cost = StaticCost;
987 
988   // Add constraints for use-blocks. Note that these are the only constraints
989   // that may add a positive bias, it is downhill from here.
990   SpillPlacer->addConstraints(SplitConstraints);
991   return SpillPlacer->scanActiveBundles();
992 }
993 
994 
995 /// addThroughConstraints - Add constraints and links to SpillPlacer from the
996 /// live-through blocks in Blocks.
997 void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
998                                      ArrayRef<unsigned> Blocks) {
999   const unsigned GroupSize = 8;
1000   SpillPlacement::BlockConstraint BCS[GroupSize];
1001   unsigned TBS[GroupSize];
1002   unsigned B = 0, T = 0;
1003 
1004   for (unsigned i = 0; i != Blocks.size(); ++i) {
1005     unsigned Number = Blocks[i];
1006     Intf.moveToBlock(Number);
1007 
1008     if (!Intf.hasInterference()) {
1009       assert(T < GroupSize && "Array overflow");
1010       TBS[T] = Number;
1011       if (++T == GroupSize) {
1012         SpillPlacer->addLinks(makeArrayRef(TBS, T));
1013         T = 0;
1014       }
1015       continue;
1016     }
1017 
1018     assert(B < GroupSize && "Array overflow");
1019     BCS[B].Number = Number;
1020 
1021     // Interference for the live-in value.
1022     if (Intf.first() <= Indexes->getMBBStartIdx(Number))
1023       BCS[B].Entry = SpillPlacement::MustSpill;
1024     else
1025       BCS[B].Entry = SpillPlacement::PrefSpill;
1026 
1027     // Interference for the live-out value.
1028     if (Intf.last() >= SA->getLastSplitPoint(Number))
1029       BCS[B].Exit = SpillPlacement::MustSpill;
1030     else
1031       BCS[B].Exit = SpillPlacement::PrefSpill;
1032 
1033     if (++B == GroupSize) {
1034       SpillPlacer->addConstraints(makeArrayRef(BCS, B));
1035       B = 0;
1036     }
1037   }
1038 
1039   SpillPlacer->addConstraints(makeArrayRef(BCS, B));
1040   SpillPlacer->addLinks(makeArrayRef(TBS, T));
1041 }
1042 
1043 void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
1044   // Keep track of through blocks that have not been added to SpillPlacer.
1045   BitVector Todo = SA->getThroughBlocks();
1046   SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
1047   unsigned AddedTo = 0;
1048 #ifndef NDEBUG
1049   unsigned Visited = 0;
1050 #endif
1051 
1052   for (;;) {
1053     ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
1054     // Find new through blocks in the periphery of PrefRegBundles.
1055     for (int i = 0, e = NewBundles.size(); i != e; ++i) {
1056       unsigned Bundle = NewBundles[i];
1057       // Look at all blocks connected to Bundle in the full graph.
1058       ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
1059       for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
1060            I != E; ++I) {
1061         unsigned Block = *I;
1062         if (!Todo.test(Block))
1063           continue;
1064         Todo.reset(Block);
1065         // This is a new through block. Add it to SpillPlacer later.
1066         ActiveBlocks.push_back(Block);
1067 #ifndef NDEBUG
1068         ++Visited;
1069 #endif
1070       }
1071     }
1072     // Any new blocks to add?
1073     if (ActiveBlocks.size() == AddedTo)
1074       break;
1075 
1076     // Compute through constraints from the interference, or assume that all
1077     // through blocks prefer spilling when forming compact regions.
1078     auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
1079     if (Cand.PhysReg)
1080       addThroughConstraints(Cand.Intf, NewBlocks);
1081     else
1082       // Provide a strong negative bias on through blocks to prevent unwanted
1083       // liveness on loop backedges.
1084       SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
1085     AddedTo = ActiveBlocks.size();
1086 
1087     // Perhaps iterating can enable more bundles?
1088     SpillPlacer->iterate();
1089   }
1090   DEBUG(dbgs() << ", v=" << Visited);
1091 }
1092 
1093 /// calcCompactRegion - Compute the set of edge bundles that should be live
1094 /// when splitting the current live range into compact regions.  Compact
1095 /// regions can be computed without looking at interference.  They are the
1096 /// regions formed by removing all the live-through blocks from the live range.
1097 ///
1098 /// Returns false if the current live range is already compact, or if the
1099 /// compact regions would form single block regions anyway.
1100 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
1101   // Without any through blocks, the live range is already compact.
1102   if (!SA->getNumThroughBlocks())
1103     return false;
1104 
1105   // Compact regions don't correspond to any physreg.
1106   Cand.reset(IntfCache, 0);
1107 
1108   DEBUG(dbgs() << "Compact region bundles");
1109 
1110   // Use the spill placer to determine the live bundles. GrowRegion pretends
1111   // that all the through blocks have interference when PhysReg is unset.
1112   SpillPlacer->prepare(Cand.LiveBundles);
1113 
1114   // The static split cost will be zero since Cand.Intf reports no interference.
1115   BlockFrequency Cost;
1116   if (!addSplitConstraints(Cand.Intf, Cost)) {
1117     DEBUG(dbgs() << ", none.\n");
1118     return false;
1119   }
1120 
1121   growRegion(Cand);
1122   SpillPlacer->finish();
1123 
1124   if (!Cand.LiveBundles.any()) {
1125     DEBUG(dbgs() << ", none.\n");
1126     return false;
1127   }
1128 
1129   DEBUG({
1130     for (int i = Cand.LiveBundles.find_first(); i>=0;
1131          i = Cand.LiveBundles.find_next(i))
1132     dbgs() << " EB#" << i;
1133     dbgs() << ".\n";
1134   });
1135   return true;
1136 }
1137 
1138 /// calcSpillCost - Compute how expensive it would be to split the live range in
1139 /// SA around all use blocks instead of forming bundle regions.
1140 BlockFrequency RAGreedy::calcSpillCost() {
1141   BlockFrequency Cost = 0;
1142   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1143   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
1144     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
1145     unsigned Number = BI.MBB->getNumber();
1146     // We normally only need one spill instruction - a load or a store.
1147     Cost += SpillPlacer->getBlockFrequency(Number);
1148 
1149     // Unless the value is redefined in the block.
1150     if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
1151       Cost += SpillPlacer->getBlockFrequency(Number);
1152   }
1153   return Cost;
1154 }
1155 
1156 /// calcGlobalSplitCost - Return the global split cost of following the split
1157 /// pattern in LiveBundles. This cost should be added to the local cost of the
1158 /// interference pattern in SplitConstraints.
1159 ///
1160 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
1161   BlockFrequency GlobalCost = 0;
1162   const BitVector &LiveBundles = Cand.LiveBundles;
1163   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1164   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
1165     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
1166     SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
1167     bool RegIn  = LiveBundles[Bundles->getBundle(BC.Number, 0)];
1168     bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)];
1169     unsigned Ins = 0;
1170 
1171     if (BI.LiveIn)
1172       Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
1173     if (BI.LiveOut)
1174       Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
1175     while (Ins--)
1176       GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
1177   }
1178 
1179   for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
1180     unsigned Number = Cand.ActiveBlocks[i];
1181     bool RegIn  = LiveBundles[Bundles->getBundle(Number, 0)];
1182     bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
1183     if (!RegIn && !RegOut)
1184       continue;
1185     if (RegIn && RegOut) {
1186       // We need double spill code if this block has interference.
1187       Cand.Intf.moveToBlock(Number);
1188       if (Cand.Intf.hasInterference()) {
1189         GlobalCost += SpillPlacer->getBlockFrequency(Number);
1190         GlobalCost += SpillPlacer->getBlockFrequency(Number);
1191       }
1192       continue;
1193     }
1194     // live-in / stack-out or stack-in live-out.
1195     GlobalCost += SpillPlacer->getBlockFrequency(Number);
1196   }
1197   return GlobalCost;
1198 }
1199 
1200 /// splitAroundRegion - Split the current live range around the regions
1201 /// determined by BundleCand and GlobalCand.
1202 ///
1203 /// Before calling this function, GlobalCand and BundleCand must be initialized
1204 /// so each bundle is assigned to a valid candidate, or NoCand for the
1205 /// stack-bound bundles.  The shared SA/SE SplitAnalysis and SplitEditor
1206 /// objects must be initialized for the current live range, and intervals
1207 /// created for the used candidates.
1208 ///
1209 /// @param LREdit    The LiveRangeEdit object handling the current split.
1210 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value
1211 ///                  must appear in this list.
1212 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
1213                                  ArrayRef<unsigned> UsedCands) {
1214   // These are the intervals created for new global ranges. We may create more
1215   // intervals for local ranges.
1216   const unsigned NumGlobalIntvs = LREdit.size();
1217   DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n");
1218   assert(NumGlobalIntvs && "No global intervals configured");
1219 
1220   // Isolate even single instructions when dealing with a proper sub-class.
1221   // That guarantees register class inflation for the stack interval because it
1222   // is all copies.
1223   unsigned Reg = SA->getParent().reg;
1224   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1225 
1226   // First handle all the blocks with uses.
1227   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1228   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
1229     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
1230     unsigned Number = BI.MBB->getNumber();
1231     unsigned IntvIn = 0, IntvOut = 0;
1232     SlotIndex IntfIn, IntfOut;
1233     if (BI.LiveIn) {
1234       unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
1235       if (CandIn != NoCand) {
1236         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1237         IntvIn = Cand.IntvIdx;
1238         Cand.Intf.moveToBlock(Number);
1239         IntfIn = Cand.Intf.first();
1240       }
1241     }
1242     if (BI.LiveOut) {
1243       unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
1244       if (CandOut != NoCand) {
1245         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1246         IntvOut = Cand.IntvIdx;
1247         Cand.Intf.moveToBlock(Number);
1248         IntfOut = Cand.Intf.last();
1249       }
1250     }
1251 
1252     // Create separate intervals for isolated blocks with multiple uses.
1253     if (!IntvIn && !IntvOut) {
1254       DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
1255       if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1256         SE->splitSingleBlock(BI);
1257       continue;
1258     }
1259 
1260     if (IntvIn && IntvOut)
1261       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1262     else if (IntvIn)
1263       SE->splitRegInBlock(BI, IntvIn, IntfIn);
1264     else
1265       SE->splitRegOutBlock(BI, IntvOut, IntfOut);
1266   }
1267 
1268   // Handle live-through blocks. The relevant live-through blocks are stored in
1269   // the ActiveBlocks list with each candidate. We need to filter out
1270   // duplicates.
1271   BitVector Todo = SA->getThroughBlocks();
1272   for (unsigned c = 0; c != UsedCands.size(); ++c) {
1273     ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks;
1274     for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1275       unsigned Number = Blocks[i];
1276       if (!Todo.test(Number))
1277         continue;
1278       Todo.reset(Number);
1279 
1280       unsigned IntvIn = 0, IntvOut = 0;
1281       SlotIndex IntfIn, IntfOut;
1282 
1283       unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
1284       if (CandIn != NoCand) {
1285         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1286         IntvIn = Cand.IntvIdx;
1287         Cand.Intf.moveToBlock(Number);
1288         IntfIn = Cand.Intf.first();
1289       }
1290 
1291       unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
1292       if (CandOut != NoCand) {
1293         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1294         IntvOut = Cand.IntvIdx;
1295         Cand.Intf.moveToBlock(Number);
1296         IntfOut = Cand.Intf.last();
1297       }
1298       if (!IntvIn && !IntvOut)
1299         continue;
1300       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1301     }
1302   }
1303 
1304   ++NumGlobalSplits;
1305 
1306   SmallVector<unsigned, 8> IntvMap;
1307   SE->finish(&IntvMap);
1308   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1309 
1310   ExtraRegInfo.resize(MRI->getNumVirtRegs());
1311   unsigned OrigBlocks = SA->getNumLiveBlocks();
1312 
1313   // Sort out the new intervals created by splitting. We get four kinds:
1314   // - Remainder intervals should not be split again.
1315   // - Candidate intervals can be assigned to Cand.PhysReg.
1316   // - Block-local splits are candidates for local splitting.
1317   // - DCE leftovers should go back on the queue.
1318   for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
1319     LiveInterval &Reg = LIS->getInterval(LREdit.get(i));
1320 
1321     // Ignore old intervals from DCE.
1322     if (getStage(Reg) != RS_New)
1323       continue;
1324 
1325     // Remainder interval. Don't try splitting again, spill if it doesn't
1326     // allocate.
1327     if (IntvMap[i] == 0) {
1328       setStage(Reg, RS_Spill);
1329       continue;
1330     }
1331 
1332     // Global intervals. Allow repeated splitting as long as the number of live
1333     // blocks is strictly decreasing.
1334     if (IntvMap[i] < NumGlobalIntvs) {
1335       if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1336         DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1337                      << " blocks as original.\n");
1338         // Don't allow repeated splitting as a safe guard against looping.
1339         setStage(Reg, RS_Split2);
1340       }
1341       continue;
1342     }
1343 
1344     // Other intervals are treated as new. This includes local intervals created
1345     // for blocks with multiple uses, and anything created by DCE.
1346   }
1347 
1348   if (VerifyEnabled)
1349     MF->verify(this, "After splitting live range around region");
1350 }
1351 
1352 unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
1353                                   SmallVectorImpl<unsigned> &NewVRegs) {
1354   unsigned NumCands = 0;
1355   BlockFrequency BestCost;
1356 
1357   // Check if we can split this live range around a compact region.
1358   bool HasCompact = calcCompactRegion(GlobalCand.front());
1359   if (HasCompact) {
1360     // Yes, keep GlobalCand[0] as the compact region candidate.
1361     NumCands = 1;
1362     BestCost = BlockFrequency::getMaxFrequency();
1363   } else {
1364     // No benefit from the compact region, our fallback will be per-block
1365     // splitting. Make sure we find a solution that is cheaper than spilling.
1366     BestCost = calcSpillCost();
1367     DEBUG(dbgs() << "Cost of isolating all blocks = ";
1368                  MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
1369   }
1370 
1371   unsigned BestCand =
1372       calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
1373                                false/*IgnoreCSR*/);
1374 
1375   // No solutions found, fall back to single block splitting.
1376   if (!HasCompact && BestCand == NoCand)
1377     return 0;
1378 
1379   return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1380 }
1381 
1382 unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
1383                                             AllocationOrder &Order,
1384                                             BlockFrequency &BestCost,
1385                                             unsigned &NumCands,
1386                                             bool IgnoreCSR) {
1387   unsigned BestCand = NoCand;
1388   Order.rewind();
1389   while (unsigned PhysReg = Order.next()) {
1390     if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg))
1391       continue;
1392 
1393     // Discard bad candidates before we run out of interference cache cursors.
1394     // This will only affect register classes with a lot of registers (>32).
1395     if (NumCands == IntfCache.getMaxCursors()) {
1396       unsigned WorstCount = ~0u;
1397       unsigned Worst = 0;
1398       for (unsigned i = 0; i != NumCands; ++i) {
1399         if (i == BestCand || !GlobalCand[i].PhysReg)
1400           continue;
1401         unsigned Count = GlobalCand[i].LiveBundles.count();
1402         if (Count < WorstCount) {
1403           Worst = i;
1404           WorstCount = Count;
1405         }
1406       }
1407       --NumCands;
1408       GlobalCand[Worst] = GlobalCand[NumCands];
1409       if (BestCand == NumCands)
1410         BestCand = Worst;
1411     }
1412 
1413     if (GlobalCand.size() <= NumCands)
1414       GlobalCand.resize(NumCands+1);
1415     GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1416     Cand.reset(IntfCache, PhysReg);
1417 
1418     SpillPlacer->prepare(Cand.LiveBundles);
1419     BlockFrequency Cost;
1420     if (!addSplitConstraints(Cand.Intf, Cost)) {
1421       DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
1422       continue;
1423     }
1424     DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = ";
1425                  MBFI->printBlockFreq(dbgs(), Cost));
1426     if (Cost >= BestCost) {
1427       DEBUG({
1428         if (BestCand == NoCand)
1429           dbgs() << " worse than no bundles\n";
1430         else
1431           dbgs() << " worse than "
1432                  << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1433       });
1434       continue;
1435     }
1436     growRegion(Cand);
1437 
1438     SpillPlacer->finish();
1439 
1440     // No live bundles, defer to splitSingleBlocks().
1441     if (!Cand.LiveBundles.any()) {
1442       DEBUG(dbgs() << " no bundles.\n");
1443       continue;
1444     }
1445 
1446     Cost += calcGlobalSplitCost(Cand);
1447     DEBUG({
1448       dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost)
1449                                 << " with bundles";
1450       for (int i = Cand.LiveBundles.find_first(); i>=0;
1451            i = Cand.LiveBundles.find_next(i))
1452         dbgs() << " EB#" << i;
1453       dbgs() << ".\n";
1454     });
1455     if (Cost < BestCost) {
1456       BestCand = NumCands;
1457       BestCost = Cost;
1458     }
1459     ++NumCands;
1460   }
1461   return BestCand;
1462 }
1463 
1464 unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
1465                                  bool HasCompact,
1466                                  SmallVectorImpl<unsigned> &NewVRegs) {
1467   SmallVector<unsigned, 8> UsedCands;
1468   // Prepare split editor.
1469   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1470   SE->reset(LREdit, SplitSpillMode);
1471 
1472   // Assign all edge bundles to the preferred candidate, or NoCand.
1473   BundleCand.assign(Bundles->getNumBundles(), NoCand);
1474 
1475   // Assign bundles for the best candidate region.
1476   if (BestCand != NoCand) {
1477     GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1478     if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1479       UsedCands.push_back(BestCand);
1480       Cand.IntvIdx = SE->openIntv();
1481       DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in "
1482                    << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1483       (void)B;
1484     }
1485   }
1486 
1487   // Assign bundles for the compact region.
1488   if (HasCompact) {
1489     GlobalSplitCandidate &Cand = GlobalCand.front();
1490     assert(!Cand.PhysReg && "Compact region has no physreg");
1491     if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1492       UsedCands.push_back(0);
1493       Cand.IntvIdx = SE->openIntv();
1494       DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv "
1495                    << Cand.IntvIdx << ".\n");
1496       (void)B;
1497     }
1498   }
1499 
1500   splitAroundRegion(LREdit, UsedCands);
1501   return 0;
1502 }
1503 
1504 
1505 //===----------------------------------------------------------------------===//
1506 //                            Per-Block Splitting
1507 //===----------------------------------------------------------------------===//
1508 
1509 /// tryBlockSplit - Split a global live range around every block with uses. This
1510 /// creates a lot of local live ranges, that will be split by tryLocalSplit if
1511 /// they don't allocate.
1512 unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
1513                                  SmallVectorImpl<unsigned> &NewVRegs) {
1514   assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1515   unsigned Reg = VirtReg.reg;
1516   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1517   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1518   SE->reset(LREdit, SplitSpillMode);
1519   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1520   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
1521     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
1522     if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1523       SE->splitSingleBlock(BI);
1524   }
1525   // No blocks were split.
1526   if (LREdit.empty())
1527     return 0;
1528 
1529   // We did split for some blocks.
1530   SmallVector<unsigned, 8> IntvMap;
1531   SE->finish(&IntvMap);
1532 
1533   // Tell LiveDebugVariables about the new ranges.
1534   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1535 
1536   ExtraRegInfo.resize(MRI->getNumVirtRegs());
1537 
1538   // Sort out the new intervals created by splitting. The remainder interval
1539   // goes straight to spilling, the new local ranges get to stay RS_New.
1540   for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
1541     LiveInterval &LI = LIS->getInterval(LREdit.get(i));
1542     if (getStage(LI) == RS_New && IntvMap[i] == 0)
1543       setStage(LI, RS_Spill);
1544   }
1545 
1546   if (VerifyEnabled)
1547     MF->verify(this, "After splitting live range around basic blocks");
1548   return 0;
1549 }
1550 
1551 
1552 //===----------------------------------------------------------------------===//
1553 //                         Per-Instruction Splitting
1554 //===----------------------------------------------------------------------===//
1555 
1556 /// Get the number of allocatable registers that match the constraints of \p Reg
1557 /// on \p MI and that are also in \p SuperRC.
1558 static unsigned getNumAllocatableRegsForConstraints(
1559     const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC,
1560     const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1561     const RegisterClassInfo &RCI) {
1562   assert(SuperRC && "Invalid register class");
1563 
1564   const TargetRegisterClass *ConstrainedRC =
1565       MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1566                                              /* ExploreBundle */ true);
1567   if (!ConstrainedRC)
1568     return 0;
1569   return RCI.getNumAllocatableRegs(ConstrainedRC);
1570 }
1571 
1572 /// tryInstructionSplit - Split a live range around individual instructions.
1573 /// This is normally not worthwhile since the spiller is doing essentially the
1574 /// same thing. However, when the live range is in a constrained register
1575 /// class, it may help to insert copies such that parts of the live range can
1576 /// be moved to a larger register class.
1577 ///
1578 /// This is similar to spilling to a larger register class.
1579 unsigned
1580 RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
1581                               SmallVectorImpl<unsigned> &NewVRegs) {
1582   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
1583   // There is no point to this if there are no larger sub-classes.
1584   if (!RegClassInfo.isProperSubClass(CurRC))
1585     return 0;
1586 
1587   // Always enable split spill mode, since we're effectively spilling to a
1588   // register.
1589   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1590   SE->reset(LREdit, SplitEditor::SM_Size);
1591 
1592   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1593   if (Uses.size() <= 1)
1594     return 0;
1595 
1596   DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
1597 
1598   const TargetRegisterClass *SuperRC =
1599       TRI->getLargestLegalSuperClass(CurRC, *MF);
1600   unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
1601   // Split around every non-copy instruction if this split will relax
1602   // the constraints on the virtual register.
1603   // Otherwise, splitting just inserts uncoalescable copies that do not help
1604   // the allocation.
1605   for (unsigned i = 0; i != Uses.size(); ++i) {
1606     if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]))
1607       if (MI->isFullCopy() ||
1608           SuperRCNumAllocatableRegs ==
1609               getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII,
1610                                                   TRI, RCI)) {
1611         DEBUG(dbgs() << "    skip:\t" << Uses[i] << '\t' << *MI);
1612         continue;
1613       }
1614     SE->openIntv();
1615     SlotIndex SegStart = SE->enterIntvBefore(Uses[i]);
1616     SlotIndex SegStop  = SE->leaveIntvAfter(Uses[i]);
1617     SE->useIntv(SegStart, SegStop);
1618   }
1619 
1620   if (LREdit.empty()) {
1621     DEBUG(dbgs() << "All uses were copies.\n");
1622     return 0;
1623   }
1624 
1625   SmallVector<unsigned, 8> IntvMap;
1626   SE->finish(&IntvMap);
1627   DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
1628   ExtraRegInfo.resize(MRI->getNumVirtRegs());
1629 
1630   // Assign all new registers to RS_Spill. This was the last chance.
1631   setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1632   return 0;
1633 }
1634 
1635 
1636 //===----------------------------------------------------------------------===//
1637 //                             Local Splitting
1638 //===----------------------------------------------------------------------===//
1639 
1640 
1641 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1642 /// in order to use PhysReg between two entries in SA->UseSlots.
1643 ///
1644 /// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
1645 ///
1646 void RAGreedy::calcGapWeights(unsigned PhysReg,
1647                               SmallVectorImpl<float> &GapWeight) {
1648   assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1649   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1650   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1651   const unsigned NumGaps = Uses.size()-1;
1652 
1653   // Start and end points for the interference check.
1654   SlotIndex StartIdx =
1655     BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
1656   SlotIndex StopIdx =
1657     BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
1658 
1659   GapWeight.assign(NumGaps, 0.0f);
1660 
1661   // Add interference from each overlapping register.
1662   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1663     if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
1664           .checkInterference())
1665       continue;
1666 
1667     // We know that VirtReg is a continuous interval from FirstInstr to
1668     // LastInstr, so we don't need InterferenceQuery.
1669     //
1670     // Interference that overlaps an instruction is counted in both gaps
1671     // surrounding the instruction. The exception is interference before
1672     // StartIdx and after StopIdx.
1673     //
1674     LiveIntervalUnion::SegmentIter IntI =
1675       Matrix->getLiveUnions()[*Units] .find(StartIdx);
1676     for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1677       // Skip the gaps before IntI.
1678       while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1679         if (++Gap == NumGaps)
1680           break;
1681       if (Gap == NumGaps)
1682         break;
1683 
1684       // Update the gaps covered by IntI.
1685       const float weight = IntI.value()->weight;
1686       for (; Gap != NumGaps; ++Gap) {
1687         GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1688         if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1689           break;
1690       }
1691       if (Gap == NumGaps)
1692         break;
1693     }
1694   }
1695 
1696   // Add fixed interference.
1697   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1698     const LiveRange &LR = LIS->getRegUnit(*Units);
1699     LiveRange::const_iterator I = LR.find(StartIdx);
1700     LiveRange::const_iterator E = LR.end();
1701 
1702     // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1703     for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1704       while (Uses[Gap+1].getBoundaryIndex() < I->start)
1705         if (++Gap == NumGaps)
1706           break;
1707       if (Gap == NumGaps)
1708         break;
1709 
1710       for (; Gap != NumGaps; ++Gap) {
1711         GapWeight[Gap] = llvm::huge_valf;
1712         if (Uses[Gap+1].getBaseIndex() >= I->end)
1713           break;
1714       }
1715       if (Gap == NumGaps)
1716         break;
1717     }
1718   }
1719 }
1720 
1721 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1722 /// basic block.
1723 ///
1724 unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
1725                                  SmallVectorImpl<unsigned> &NewVRegs) {
1726   assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1727   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1728 
1729   // Note that it is possible to have an interval that is live-in or live-out
1730   // while only covering a single block - A phi-def can use undef values from
1731   // predecessors, and the block could be a single-block loop.
1732   // We don't bother doing anything clever about such a case, we simply assume
1733   // that the interval is continuous from FirstInstr to LastInstr. We should
1734   // make sure that we don't do anything illegal to such an interval, though.
1735 
1736   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1737   if (Uses.size() <= 2)
1738     return 0;
1739   const unsigned NumGaps = Uses.size()-1;
1740 
1741   DEBUG({
1742     dbgs() << "tryLocalSplit: ";
1743     for (unsigned i = 0, e = Uses.size(); i != e; ++i)
1744       dbgs() << ' ' << Uses[i];
1745     dbgs() << '\n';
1746   });
1747 
1748   // If VirtReg is live across any register mask operands, compute a list of
1749   // gaps with register masks.
1750   SmallVector<unsigned, 8> RegMaskGaps;
1751   if (Matrix->checkRegMaskInterference(VirtReg)) {
1752     // Get regmask slots for the whole block.
1753     ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
1754     DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1755     // Constrain to VirtReg's live range.
1756     unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
1757                                    Uses.front().getRegSlot()) - RMS.begin();
1758     unsigned re = RMS.size();
1759     for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
1760       // Look for Uses[i] <= RMS <= Uses[i+1].
1761       assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i]));
1762       if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri]))
1763         continue;
1764       // Skip a regmask on the same instruction as the last use. It doesn't
1765       // overlap the live range.
1766       if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)
1767         break;
1768       DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]);
1769       RegMaskGaps.push_back(i);
1770       // Advance ri to the next gap. A regmask on one of the uses counts in
1771       // both gaps.
1772       while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))
1773         ++ri;
1774     }
1775     DEBUG(dbgs() << '\n');
1776   }
1777 
1778   // Since we allow local split results to be split again, there is a risk of
1779   // creating infinite loops. It is tempting to require that the new live
1780   // ranges have less instructions than the original. That would guarantee
1781   // convergence, but it is too strict. A live range with 3 instructions can be
1782   // split 2+3 (including the COPY), and we want to allow that.
1783   //
1784   // Instead we use these rules:
1785   //
1786   // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1787   //    noop split, of course).
1788   // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1789   //    the new ranges must have fewer instructions than before the split.
1790   // 3. New ranges with the same number of instructions are marked RS_Split2,
1791   //    smaller ranges are marked RS_New.
1792   //
1793   // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1794   // excessive splitting and infinite loops.
1795   //
1796   bool ProgressRequired = getStage(VirtReg) >= RS_Split2;
1797 
1798   // Best split candidate.
1799   unsigned BestBefore = NumGaps;
1800   unsigned BestAfter = 0;
1801   float BestDiff = 0;
1802 
1803   const float blockFreq =
1804     SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1805     (1.0f / MBFI->getEntryFreq());
1806   SmallVector<float, 8> GapWeight;
1807 
1808   Order.rewind();
1809   while (unsigned PhysReg = Order.next()) {
1810     // Keep track of the largest spill weight that would need to be evicted in
1811     // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
1812     calcGapWeights(PhysReg, GapWeight);
1813 
1814     // Remove any gaps with regmask clobbers.
1815     if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1816       for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
1817         GapWeight[RegMaskGaps[i]] = llvm::huge_valf;
1818 
1819     // Try to find the best sequence of gaps to close.
1820     // The new spill weight must be larger than any gap interference.
1821 
1822     // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1823     unsigned SplitBefore = 0, SplitAfter = 1;
1824 
1825     // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1826     // It is the spill weight that needs to be evicted.
1827     float MaxGap = GapWeight[0];
1828 
1829     for (;;) {
1830       // Live before/after split?
1831       const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1832       const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1833 
1834       DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
1835                    << Uses[SplitBefore] << '-' << Uses[SplitAfter]
1836                    << " i=" << MaxGap);
1837 
1838       // Stop before the interval gets so big we wouldn't be making progress.
1839       if (!LiveBefore && !LiveAfter) {
1840         DEBUG(dbgs() << " all\n");
1841         break;
1842       }
1843       // Should the interval be extended or shrunk?
1844       bool Shrink = true;
1845 
1846       // How many gaps would the new range have?
1847       unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1848 
1849       // Legally, without causing looping?
1850       bool Legal = !ProgressRequired || NewGaps < NumGaps;
1851 
1852       if (Legal && MaxGap < llvm::huge_valf) {
1853         // Estimate the new spill weight. Each instruction reads or writes the
1854         // register. Conservatively assume there are no read-modify-write
1855         // instructions.
1856         //
1857         // Try to guess the size of the new interval.
1858         const float EstWeight = normalizeSpillWeight(
1859             blockFreq * (NewGaps + 1),
1860             Uses[SplitBefore].distance(Uses[SplitAfter]) +
1861                 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1862             1);
1863         // Would this split be possible to allocate?
1864         // Never allocate all gaps, we wouldn't be making progress.
1865         DEBUG(dbgs() << " w=" << EstWeight);
1866         if (EstWeight * Hysteresis >= MaxGap) {
1867           Shrink = false;
1868           float Diff = EstWeight - MaxGap;
1869           if (Diff > BestDiff) {
1870             DEBUG(dbgs() << " (best)");
1871             BestDiff = Hysteresis * Diff;
1872             BestBefore = SplitBefore;
1873             BestAfter = SplitAfter;
1874           }
1875         }
1876       }
1877 
1878       // Try to shrink.
1879       if (Shrink) {
1880         if (++SplitBefore < SplitAfter) {
1881           DEBUG(dbgs() << " shrink\n");
1882           // Recompute the max when necessary.
1883           if (GapWeight[SplitBefore - 1] >= MaxGap) {
1884             MaxGap = GapWeight[SplitBefore];
1885             for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
1886               MaxGap = std::max(MaxGap, GapWeight[i]);
1887           }
1888           continue;
1889         }
1890         MaxGap = 0;
1891       }
1892 
1893       // Try to extend the interval.
1894       if (SplitAfter >= NumGaps) {
1895         DEBUG(dbgs() << " end\n");
1896         break;
1897       }
1898 
1899       DEBUG(dbgs() << " extend\n");
1900       MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1901     }
1902   }
1903 
1904   // Didn't find any candidates?
1905   if (BestBefore == NumGaps)
1906     return 0;
1907 
1908   DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
1909                << '-' << Uses[BestAfter] << ", " << BestDiff
1910                << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
1911 
1912   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1913   SE->reset(LREdit);
1914 
1915   SE->openIntv();
1916   SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1917   SlotIndex SegStop  = SE->leaveIntvAfter(Uses[BestAfter]);
1918   SE->useIntv(SegStart, SegStop);
1919   SmallVector<unsigned, 8> IntvMap;
1920   SE->finish(&IntvMap);
1921   DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
1922 
1923   // If the new range has the same number of instructions as before, mark it as
1924   // RS_Split2 so the next split will be forced to make progress. Otherwise,
1925   // leave the new intervals as RS_New so they can compete.
1926   bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1927   bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1928   unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1929   if (NewGaps >= NumGaps) {
1930     DEBUG(dbgs() << "Tagging non-progress ranges: ");
1931     assert(!ProgressRequired && "Didn't make progress when it was required.");
1932     for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
1933       if (IntvMap[i] == 1) {
1934         setStage(LIS->getInterval(LREdit.get(i)), RS_Split2);
1935         DEBUG(dbgs() << PrintReg(LREdit.get(i)));
1936       }
1937     DEBUG(dbgs() << '\n');
1938   }
1939   ++NumLocalSplits;
1940 
1941   return 0;
1942 }
1943 
1944 //===----------------------------------------------------------------------===//
1945 //                          Live Range Splitting
1946 //===----------------------------------------------------------------------===//
1947 
1948 /// trySplit - Try to split VirtReg or one of its interferences, making it
1949 /// assignable.
1950 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
1951 unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
1952                             SmallVectorImpl<unsigned>&NewVRegs) {
1953   // Ranges must be Split2 or less.
1954   if (getStage(VirtReg) >= RS_Spill)
1955     return 0;
1956 
1957   // Local intervals are handled separately.
1958   if (LIS->intervalIsInOneMBB(VirtReg)) {
1959     NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
1960     SA->analyze(&VirtReg);
1961     unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1962     if (PhysReg || !NewVRegs.empty())
1963       return PhysReg;
1964     return tryInstructionSplit(VirtReg, Order, NewVRegs);
1965   }
1966 
1967   NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
1968 
1969   SA->analyze(&VirtReg);
1970 
1971   // FIXME: SplitAnalysis may repair broken live ranges coming from the
1972   // coalescer. That may cause the range to become allocatable which means that
1973   // tryRegionSplit won't be making progress. This check should be replaced with
1974   // an assertion when the coalescer is fixed.
1975   if (SA->didRepairRange()) {
1976     // VirtReg has changed, so all cached queries are invalid.
1977     Matrix->invalidateVirtRegs();
1978     if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
1979       return PhysReg;
1980   }
1981 
1982   // First try to split around a region spanning multiple blocks. RS_Split2
1983   // ranges already made dubious progress with region splitting, so they go
1984   // straight to single block splitting.
1985   if (getStage(VirtReg) < RS_Split2) {
1986     unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1987     if (PhysReg || !NewVRegs.empty())
1988       return PhysReg;
1989   }
1990 
1991   // Then isolate blocks.
1992   return tryBlockSplit(VirtReg, Order, NewVRegs);
1993 }
1994 
1995 //===----------------------------------------------------------------------===//
1996 //                          Last Chance Recoloring
1997 //===----------------------------------------------------------------------===//
1998 
1999 /// mayRecolorAllInterferences - Check if the virtual registers that
2000 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
2001 /// recolored to free \p PhysReg.
2002 /// When true is returned, \p RecoloringCandidates has been augmented with all
2003 /// the live intervals that need to be recolored in order to free \p PhysReg
2004 /// for \p VirtReg.
2005 /// \p FixedRegisters contains all the virtual registers that cannot be
2006 /// recolored.
2007 bool
2008 RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
2009                                      SmallLISet &RecoloringCandidates,
2010                                      const SmallVirtRegSet &FixedRegisters) {
2011   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
2012 
2013   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
2014     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
2015     // If there is LastChanceRecoloringMaxInterference or more interferences,
2016     // chances are one would not be recolorable.
2017     if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >=
2018         LastChanceRecoloringMaxInterference && !ExhaustiveSearch) {
2019       DEBUG(dbgs() << "Early abort: too many interferences.\n");
2020       CutOffInfo |= CO_Interf;
2021       return false;
2022     }
2023     for (unsigned i = Q.interferingVRegs().size(); i; --i) {
2024       LiveInterval *Intf = Q.interferingVRegs()[i - 1];
2025       // If Intf is done and sit on the same register class as VirtReg,
2026       // it would not be recolorable as it is in the same state as VirtReg.
2027       if ((getStage(*Intf) == RS_Done &&
2028            MRI->getRegClass(Intf->reg) == CurRC) ||
2029           FixedRegisters.count(Intf->reg)) {
2030         DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n");
2031         return false;
2032       }
2033       RecoloringCandidates.insert(Intf);
2034     }
2035   }
2036   return true;
2037 }
2038 
2039 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
2040 /// its interferences.
2041 /// Last chance recoloring chooses a color for \p VirtReg and recolors every
2042 /// virtual register that was using it. The recoloring process may recursively
2043 /// use the last chance recoloring. Therefore, when a virtual register has been
2044 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
2045 /// be last-chance-recolored again during this recoloring "session".
2046 /// E.g.,
2047 /// Let
2048 /// vA can use {R1, R2    }
2049 /// vB can use {    R2, R3}
2050 /// vC can use {R1        }
2051 /// Where vA, vB, and vC cannot be split anymore (they are reloads for
2052 /// instance) and they all interfere.
2053 ///
2054 /// vA is assigned R1
2055 /// vB is assigned R2
2056 /// vC tries to evict vA but vA is already done.
2057 /// Regular register allocation fails.
2058 ///
2059 /// Last chance recoloring kicks in:
2060 /// vC does as if vA was evicted => vC uses R1.
2061 /// vC is marked as fixed.
2062 /// vA needs to find a color.
2063 /// None are available.
2064 /// vA cannot evict vC: vC is a fixed virtual register now.
2065 /// vA does as if vB was evicted => vA uses R2.
2066 /// vB needs to find a color.
2067 /// R3 is available.
2068 /// Recoloring => vC = R1, vA = R2, vB = R3
2069 ///
2070 /// \p Order defines the preferred allocation order for \p VirtReg.
2071 /// \p NewRegs will contain any new virtual register that have been created
2072 /// (split, spill) during the process and that must be assigned.
2073 /// \p FixedRegisters contains all the virtual registers that cannot be
2074 /// recolored.
2075 /// \p Depth gives the current depth of the last chance recoloring.
2076 /// \return a physical register that can be used for VirtReg or ~0u if none
2077 /// exists.
2078 unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
2079                                            AllocationOrder &Order,
2080                                            SmallVectorImpl<unsigned> &NewVRegs,
2081                                            SmallVirtRegSet &FixedRegisters,
2082                                            unsigned Depth) {
2083   DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
2084   // Ranges must be Done.
2085   assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
2086          "Last chance recoloring should really be last chance");
2087   // Set the max depth to LastChanceRecoloringMaxDepth.
2088   // We may want to reconsider that if we end up with a too large search space
2089   // for target with hundreds of registers.
2090   // Indeed, in that case we may want to cut the search space earlier.
2091   if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
2092     DEBUG(dbgs() << "Abort because max depth has been reached.\n");
2093     CutOffInfo |= CO_Depth;
2094     return ~0u;
2095   }
2096 
2097   // Set of Live intervals that will need to be recolored.
2098   SmallLISet RecoloringCandidates;
2099   // Record the original mapping virtual register to physical register in case
2100   // the recoloring fails.
2101   DenseMap<unsigned, unsigned> VirtRegToPhysReg;
2102   // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
2103   // this recoloring "session".
2104   FixedRegisters.insert(VirtReg.reg);
2105 
2106   Order.rewind();
2107   while (unsigned PhysReg = Order.next()) {
2108     DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
2109                  << PrintReg(PhysReg, TRI) << '\n');
2110     RecoloringCandidates.clear();
2111     VirtRegToPhysReg.clear();
2112 
2113     // It is only possible to recolor virtual register interference.
2114     if (Matrix->checkInterference(VirtReg, PhysReg) >
2115         LiveRegMatrix::IK_VirtReg) {
2116       DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n");
2117 
2118       continue;
2119     }
2120 
2121     // Early give up on this PhysReg if it is obvious we cannot recolor all
2122     // the interferences.
2123     if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
2124                                     FixedRegisters)) {
2125       DEBUG(dbgs() << "Some inteferences cannot be recolored.\n");
2126       continue;
2127     }
2128 
2129     // RecoloringCandidates contains all the virtual registers that interfer
2130     // with VirtReg on PhysReg (or one of its aliases).
2131     // Enqueue them for recoloring and perform the actual recoloring.
2132     PQueue RecoloringQueue;
2133     for (SmallLISet::iterator It = RecoloringCandidates.begin(),
2134                               EndIt = RecoloringCandidates.end();
2135          It != EndIt; ++It) {
2136       unsigned ItVirtReg = (*It)->reg;
2137       enqueue(RecoloringQueue, *It);
2138       assert(VRM->hasPhys(ItVirtReg) &&
2139              "Interferences are supposed to be with allocated vairables");
2140 
2141       // Record the current allocation.
2142       VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg);
2143       // unset the related struct.
2144       Matrix->unassign(**It);
2145     }
2146 
2147     // Do as if VirtReg was assigned to PhysReg so that the underlying
2148     // recoloring has the right information about the interferes and
2149     // available colors.
2150     Matrix->assign(VirtReg, PhysReg);
2151 
2152     // Save the current recoloring state.
2153     // If we cannot recolor all the interferences, we will have to start again
2154     // at this point for the next physical register.
2155     SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
2156     if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters,
2157                                 Depth)) {
2158       // Do not mess up with the global assignment process.
2159       // I.e., VirtReg must be unassigned.
2160       Matrix->unassign(VirtReg);
2161       return PhysReg;
2162     }
2163 
2164     DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
2165                  << PrintReg(PhysReg, TRI) << '\n');
2166 
2167     // The recoloring attempt failed, undo the changes.
2168     FixedRegisters = SaveFixedRegisters;
2169     Matrix->unassign(VirtReg);
2170 
2171     for (SmallLISet::iterator It = RecoloringCandidates.begin(),
2172                               EndIt = RecoloringCandidates.end();
2173          It != EndIt; ++It) {
2174       unsigned ItVirtReg = (*It)->reg;
2175       if (VRM->hasPhys(ItVirtReg))
2176         Matrix->unassign(**It);
2177       unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg];
2178       Matrix->assign(**It, ItPhysReg);
2179     }
2180   }
2181 
2182   // Last chance recoloring did not worked either, give up.
2183   return ~0u;
2184 }
2185 
2186 /// tryRecoloringCandidates - Try to assign a new color to every register
2187 /// in \RecoloringQueue.
2188 /// \p NewRegs will contain any new virtual register created during the
2189 /// recoloring process.
2190 /// \p FixedRegisters[in/out] contains all the registers that have been
2191 /// recolored.
2192 /// \return true if all virtual registers in RecoloringQueue were successfully
2193 /// recolored, false otherwise.
2194 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2195                                        SmallVectorImpl<unsigned> &NewVRegs,
2196                                        SmallVirtRegSet &FixedRegisters,
2197                                        unsigned Depth) {
2198   while (!RecoloringQueue.empty()) {
2199     LiveInterval *LI = dequeue(RecoloringQueue);
2200     DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2201     unsigned PhysReg;
2202     PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
2203     if (PhysReg == ~0u || !PhysReg)
2204       return false;
2205     DEBUG(dbgs() << "Recoloring of " << *LI
2206                  << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n');
2207     Matrix->assign(*LI, PhysReg);
2208     FixedRegisters.insert(LI->reg);
2209   }
2210   return true;
2211 }
2212 
2213 //===----------------------------------------------------------------------===//
2214 //                            Main Entry Point
2215 //===----------------------------------------------------------------------===//
2216 
2217 unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
2218                                  SmallVectorImpl<unsigned> &NewVRegs) {
2219   CutOffInfo = CO_None;
2220   LLVMContext &Ctx = MF->getFunction()->getContext();
2221   SmallVirtRegSet FixedRegisters;
2222   unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
2223   if (Reg == ~0U && (CutOffInfo != CO_None)) {
2224     uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2225     if (CutOffEncountered == CO_Depth)
2226       Ctx.emitError("register allocation failed: maximum depth for recoloring "
2227                     "reached. Use -fexhaustive-register-search to skip "
2228                     "cutoffs");
2229     else if (CutOffEncountered == CO_Interf)
2230       Ctx.emitError("register allocation failed: maximum interference for "
2231                     "recoloring reached. Use -fexhaustive-register-search "
2232                     "to skip cutoffs");
2233     else if (CutOffEncountered == (CO_Depth | CO_Interf))
2234       Ctx.emitError("register allocation failed: maximum interference and "
2235                     "depth for recoloring reached. Use "
2236                     "-fexhaustive-register-search to skip cutoffs");
2237   }
2238   return Reg;
2239 }
2240 
2241 /// Using a CSR for the first time has a cost because it causes push|pop
2242 /// to be added to prologue|epilogue. Splitting a cold section of the live
2243 /// range can have lower cost than using the CSR for the first time;
2244 /// Spilling a live range in the cold path can have lower cost than using
2245 /// the CSR for the first time. Returns the physical register if we decide
2246 /// to use the CSR; otherwise return 0.
2247 unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg,
2248                                          AllocationOrder &Order,
2249                                          unsigned PhysReg,
2250                                          unsigned &CostPerUseLimit,
2251                                          SmallVectorImpl<unsigned> &NewVRegs) {
2252   if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2253     // We choose spill over using the CSR for the first time if the spill cost
2254     // is lower than CSRCost.
2255     SA->analyze(&VirtReg);
2256     if (calcSpillCost() >= CSRCost)
2257       return PhysReg;
2258 
2259     // We are going to spill, set CostPerUseLimit to 1 to make sure that
2260     // we will not use a callee-saved register in tryEvict.
2261     CostPerUseLimit = 1;
2262     return 0;
2263   }
2264   if (getStage(VirtReg) < RS_Split) {
2265     // We choose pre-splitting over using the CSR for the first time if
2266     // the cost of splitting is lower than CSRCost.
2267     SA->analyze(&VirtReg);
2268     unsigned NumCands = 0;
2269     BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2270     unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2271                                                  NumCands, true /*IgnoreCSR*/);
2272     if (BestCand == NoCand)
2273       // Use the CSR if we can't find a region split below CSRCost.
2274       return PhysReg;
2275 
2276     // Perform the actual pre-splitting.
2277     doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2278     return 0;
2279   }
2280   return PhysReg;
2281 }
2282 
2283 void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) {
2284   // Do not keep invalid information around.
2285   SetOfBrokenHints.remove(&LI);
2286 }
2287 
2288 void RAGreedy::initializeCSRCost() {
2289   // We use the larger one out of the command-line option and the value report
2290   // by TRI.
2291   CSRCost = BlockFrequency(
2292       std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2293   if (!CSRCost.getFrequency())
2294     return;
2295 
2296   // Raw cost is relative to Entry == 2^14; scale it appropriately.
2297   uint64_t ActualEntry = MBFI->getEntryFreq();
2298   if (!ActualEntry) {
2299     CSRCost = 0;
2300     return;
2301   }
2302   uint64_t FixedEntry = 1 << 14;
2303   if (ActualEntry < FixedEntry)
2304     CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2305   else if (ActualEntry <= UINT32_MAX)
2306     // Invert the fraction and divide.
2307     CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2308   else
2309     // Can't use BranchProbability in general, since it takes 32-bit numbers.
2310     CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
2311 }
2312 
2313 /// \brief Collect the hint info for \p Reg.
2314 /// The results are stored into \p Out.
2315 /// \p Out is not cleared before being populated.
2316 void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
2317   for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2318     if (!Instr.isFullCopy())
2319       continue;
2320     // Look for the other end of the copy.
2321     unsigned OtherReg = Instr.getOperand(0).getReg();
2322     if (OtherReg == Reg) {
2323       OtherReg = Instr.getOperand(1).getReg();
2324       if (OtherReg == Reg)
2325         continue;
2326     }
2327     // Get the current assignment.
2328     unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
2329                                 ? OtherReg
2330                                 : VRM->getPhys(OtherReg);
2331     // Push the collected information.
2332     Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2333                            OtherPhysReg));
2334   }
2335 }
2336 
2337 /// \brief Using the given \p List, compute the cost of the broken hints if
2338 /// \p PhysReg was used.
2339 /// \return The cost of \p List for \p PhysReg.
2340 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2341                                            unsigned PhysReg) {
2342   BlockFrequency Cost = 0;
2343   for (const HintInfo &Info : List) {
2344     if (Info.PhysReg != PhysReg)
2345       Cost += Info.Freq;
2346   }
2347   return Cost;
2348 }
2349 
2350 /// \brief Using the register assigned to \p VirtReg, try to recolor
2351 /// all the live ranges that are copy-related with \p VirtReg.
2352 /// The recoloring is then propagated to all the live-ranges that have
2353 /// been recolored and so on, until no more copies can be coalesced or
2354 /// it is not profitable.
2355 /// For a given live range, profitability is determined by the sum of the
2356 /// frequencies of the non-identity copies it would introduce with the old
2357 /// and new register.
2358 void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
2359   // We have a broken hint, check if it is possible to fix it by
2360   // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2361   // some register and PhysReg may be available for the other live-ranges.
2362   SmallSet<unsigned, 4> Visited;
2363   SmallVector<unsigned, 2> RecoloringCandidates;
2364   HintsInfo Info;
2365   unsigned Reg = VirtReg.reg;
2366   unsigned PhysReg = VRM->getPhys(Reg);
2367   // Start the recoloring algorithm from the input live-interval, then
2368   // it will propagate to the ones that are copy-related with it.
2369   Visited.insert(Reg);
2370   RecoloringCandidates.push_back(Reg);
2371 
2372   DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '('
2373                << PrintReg(PhysReg, TRI) << ")\n");
2374 
2375   do {
2376     Reg = RecoloringCandidates.pop_back_val();
2377 
2378     // We cannot recolor physcal register.
2379     if (TargetRegisterInfo::isPhysicalRegister(Reg))
2380       continue;
2381 
2382     assert(VRM->hasPhys(Reg) && "We have unallocated variable!!");
2383 
2384     // Get the live interval mapped with this virtual register to be able
2385     // to check for the interference with the new color.
2386     LiveInterval &LI = LIS->getInterval(Reg);
2387     unsigned CurrPhys = VRM->getPhys(Reg);
2388     // Check that the new color matches the register class constraints and
2389     // that it is free for this live range.
2390     if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2391                                 Matrix->checkInterference(LI, PhysReg)))
2392       continue;
2393 
2394     DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI)
2395                  << ") is recolorable.\n");
2396 
2397     // Gather the hint info.
2398     Info.clear();
2399     collectHintInfo(Reg, Info);
2400     // Check if recoloring the live-range will increase the cost of the
2401     // non-identity copies.
2402     if (CurrPhys != PhysReg) {
2403       DEBUG(dbgs() << "Checking profitability:\n");
2404       BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2405       BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2406       DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
2407                    << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n');
2408       if (OldCopiesCost < NewCopiesCost) {
2409         DEBUG(dbgs() << "=> Not profitable.\n");
2410         continue;
2411       }
2412       // At this point, the cost is either cheaper or equal. If it is
2413       // equal, we consider this is profitable because it may expose
2414       // more recoloring opportunities.
2415       DEBUG(dbgs() << "=> Profitable.\n");
2416       // Recolor the live-range.
2417       Matrix->unassign(LI);
2418       Matrix->assign(LI, PhysReg);
2419     }
2420     // Push all copy-related live-ranges to keep reconciling the broken
2421     // hints.
2422     for (const HintInfo &HI : Info) {
2423       if (Visited.insert(HI.Reg).second)
2424         RecoloringCandidates.push_back(HI.Reg);
2425     }
2426   } while (!RecoloringCandidates.empty());
2427 }
2428 
2429 /// \brief Try to recolor broken hints.
2430 /// Broken hints may be repaired by recoloring when an evicted variable
2431 /// freed up a register for a larger live-range.
2432 /// Consider the following example:
2433 /// BB1:
2434 ///   a =
2435 ///   b =
2436 /// BB2:
2437 ///   ...
2438 ///   = b
2439 ///   = a
2440 /// Let us assume b gets split:
2441 /// BB1:
2442 ///   a =
2443 ///   b =
2444 /// BB2:
2445 ///   c = b
2446 ///   ...
2447 ///   d = c
2448 ///   = d
2449 ///   = a
2450 /// Because of how the allocation work, b, c, and d may be assigned different
2451 /// colors. Now, if a gets evicted later:
2452 /// BB1:
2453 ///   a =
2454 ///   st a, SpillSlot
2455 ///   b =
2456 /// BB2:
2457 ///   c = b
2458 ///   ...
2459 ///   d = c
2460 ///   = d
2461 ///   e = ld SpillSlot
2462 ///   = e
2463 /// This is likely that we can assign the same register for b, c, and d,
2464 /// getting rid of 2 copies.
2465 void RAGreedy::tryHintsRecoloring() {
2466   for (LiveInterval *LI : SetOfBrokenHints) {
2467     assert(TargetRegisterInfo::isVirtualRegister(LI->reg) &&
2468            "Recoloring is possible only for virtual registers");
2469     // Some dead defs may be around (e.g., because of debug uses).
2470     // Ignore those.
2471     if (!VRM->hasPhys(LI->reg))
2472       continue;
2473     tryHintRecoloring(*LI);
2474   }
2475 }
2476 
2477 unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
2478                                      SmallVectorImpl<unsigned> &NewVRegs,
2479                                      SmallVirtRegSet &FixedRegisters,
2480                                      unsigned Depth) {
2481   unsigned CostPerUseLimit = ~0u;
2482   // First try assigning a free register.
2483   AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
2484   if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
2485     // When NewVRegs is not empty, we may have made decisions such as evicting
2486     // a virtual register, go with the earlier decisions and use the physical
2487     // register.
2488     if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) &&
2489         NewVRegs.empty()) {
2490       unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2491                                               CostPerUseLimit, NewVRegs);
2492       if (CSRReg || !NewVRegs.empty())
2493         // Return now if we decide to use a CSR or create new vregs due to
2494         // pre-splitting.
2495         return CSRReg;
2496     } else
2497       return PhysReg;
2498   }
2499 
2500   LiveRangeStage Stage = getStage(VirtReg);
2501   DEBUG(dbgs() << StageName[Stage]
2502                << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
2503 
2504   // Try to evict a less worthy live range, but only for ranges from the primary
2505   // queue. The RS_Split ranges already failed to do this, and they should not
2506   // get a second chance until they have been split.
2507   if (Stage != RS_Split)
2508     if (unsigned PhysReg =
2509             tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) {
2510       unsigned Hint = MRI->getSimpleHint(VirtReg.reg);
2511       // If VirtReg has a hint and that hint is broken record this
2512       // virtual register as a recoloring candidate for broken hint.
2513       // Indeed, since we evicted a variable in its neighborhood it is
2514       // likely we can at least partially recolor some of the
2515       // copy-related live-ranges.
2516       if (Hint && Hint != PhysReg)
2517         SetOfBrokenHints.insert(&VirtReg);
2518       return PhysReg;
2519     }
2520 
2521   assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
2522 
2523   // The first time we see a live range, don't try to split or spill.
2524   // Wait until the second time, when all smaller ranges have been allocated.
2525   // This gives a better picture of the interference to split around.
2526   if (Stage < RS_Split) {
2527     setStage(VirtReg, RS_Split);
2528     DEBUG(dbgs() << "wait for second round\n");
2529     NewVRegs.push_back(VirtReg.reg);
2530     return 0;
2531   }
2532 
2533   // If we couldn't allocate a register from spilling, there is probably some
2534   // invalid inline assembly. The base class wil report it.
2535   if (Stage >= RS_Done || !VirtReg.isSpillable())
2536     return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2537                                    Depth);
2538 
2539   // Try splitting VirtReg or interferences.
2540   unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
2541   if (PhysReg || !NewVRegs.empty())
2542     return PhysReg;
2543 
2544   // Finally spill VirtReg itself.
2545   if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) {
2546     // TODO: This is experimental and in particular, we do not model
2547     // the live range splitting done by spilling correctly.
2548     // We would need a deep integration with the spiller to do the
2549     // right thing here. Anyway, that is still good for early testing.
2550     setStage(VirtReg, RS_Memory);
2551     DEBUG(dbgs() << "Do as if this register is in memory\n");
2552     NewVRegs.push_back(VirtReg.reg);
2553   } else {
2554     NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
2555     LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2556     spiller().spill(LRE);
2557     setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2558 
2559     if (VerifyEnabled)
2560       MF->verify(this, "After spilling");
2561   }
2562 
2563   // The live virtual register requesting allocation was spilled, so tell
2564   // the caller not to allocate anything during this round.
2565   return 0;
2566 }
2567 
2568 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
2569   DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2570                << "********** Function: " << mf.getName() << '\n');
2571 
2572   MF = &mf;
2573   TRI = MF->getSubtarget().getRegisterInfo();
2574   TII = MF->getSubtarget().getInstrInfo();
2575   RCI.runOnMachineFunction(mf);
2576 
2577   EnableLocalReassign = EnableLocalReassignment ||
2578                         MF->getSubtarget().enableRALocalReassignment(
2579                             MF->getTarget().getOptLevel());
2580 
2581   if (VerifyEnabled)
2582     MF->verify(this, "Before greedy register allocator");
2583 
2584   RegAllocBase::init(getAnalysis<VirtRegMap>(),
2585                      getAnalysis<LiveIntervals>(),
2586                      getAnalysis<LiveRegMatrix>());
2587   Indexes = &getAnalysis<SlotIndexes>();
2588   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
2589   DomTree = &getAnalysis<MachineDominatorTree>();
2590   SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
2591   Loops = &getAnalysis<MachineLoopInfo>();
2592   Bundles = &getAnalysis<EdgeBundles>();
2593   SpillPlacer = &getAnalysis<SpillPlacement>();
2594   DebugVars = &getAnalysis<LiveDebugVariables>();
2595 
2596   initializeCSRCost();
2597 
2598   calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI);
2599 
2600   DEBUG(LIS->dump());
2601 
2602   SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2603   SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI));
2604   ExtraRegInfo.clear();
2605   ExtraRegInfo.resize(MRI->getNumVirtRegs());
2606   NextCascade = 1;
2607   IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2608   GlobalCand.resize(32);  // This will grow as needed.
2609   SetOfBrokenHints.clear();
2610 
2611   allocatePhysRegs();
2612   tryHintsRecoloring();
2613   postOptimization();
2614 
2615   releaseMemory();
2616   return true;
2617 }
2618