1 //===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the RAGreedy function pass for register allocation in
11 // optimized builds.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/Passes.h"
16 #include "AllocationOrder.h"
17 #include "InterferenceCache.h"
18 #include "LiveDebugVariables.h"
19 #include "RegAllocBase.h"
20 #include "SpillPlacement.h"
21 #include "Spiller.h"
22 #include "SplitKit.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/CalcSpillWeights.h"
26 #include "llvm/CodeGen/EdgeBundles.h"
27 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
28 #include "llvm/CodeGen/LiveRangeEdit.h"
29 #include "llvm/CodeGen/LiveRegMatrix.h"
30 #include "llvm/CodeGen/LiveStackAnalysis.h"
31 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
32 #include "llvm/CodeGen/MachineDominators.h"
33 #include "llvm/CodeGen/MachineFunctionPass.h"
34 #include "llvm/CodeGen/MachineLoopInfo.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/CodeGen/RegAllocRegistry.h"
37 #include "llvm/CodeGen/RegisterClassInfo.h"
38 #include "llvm/CodeGen/VirtRegMap.h"
39 #include "llvm/IR/LLVMContext.h"
40 #include "llvm/PassAnalysisSupport.h"
41 #include "llvm/Support/BranchProbability.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Debug.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/Timer.h"
46 #include "llvm/Support/raw_ostream.h"
47 #include "llvm/Target/TargetSubtargetInfo.h"
48 #include <queue>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "regalloc"
53 
54 STATISTIC(NumGlobalSplits, "Number of split global live ranges");
55 STATISTIC(NumLocalSplits,  "Number of split local live ranges");
56 STATISTIC(NumEvicted,      "Number of interferences evicted");
57 
58 static cl::opt<SplitEditor::ComplementSpillMode>
59 SplitSpillMode("split-spill-mode", cl::Hidden,
60   cl::desc("Spill mode for splitting live ranges"),
61   cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
62              clEnumValN(SplitEditor::SM_Size,  "size",  "Optimize for size"),
63              clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
64              clEnumValEnd),
65   cl::init(SplitEditor::SM_Partition));
66 
67 static cl::opt<unsigned>
68 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
69                              cl::desc("Last chance recoloring max depth"),
70                              cl::init(5));
71 
72 static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
73     "lcr-max-interf", cl::Hidden,
74     cl::desc("Last chance recoloring maximum number of considered"
75              " interference at a time"),
76     cl::init(8));
77 
78 static cl::opt<bool>
79 ExhaustiveSearch("exhaustive-register-search", cl::NotHidden,
80                  cl::desc("Exhaustive Search for registers bypassing the depth "
81                           "and interference cutoffs of last chance recoloring"));
82 
83 static cl::opt<bool> EnableLocalReassignment(
84     "enable-local-reassign", cl::Hidden,
85     cl::desc("Local reassignment can yield better allocation decisions, but "
86              "may be compile time intensive"),
87     cl::init(false));
88 
89 static cl::opt<bool> EnableDeferredSpilling(
90     "enable-deferred-spilling", cl::Hidden,
91     cl::desc("Instead of spilling a variable right away, defer the actual "
92              "code insertion to the end of the allocation. That way the "
93              "allocator might still find a suitable coloring for this "
94              "variable because of other evicted variables."),
95     cl::init(false));
96 
97 // FIXME: Find a good default for this flag and remove the flag.
98 static cl::opt<unsigned>
99 CSRFirstTimeCost("regalloc-csr-first-time-cost",
100               cl::desc("Cost for first time use of callee-saved register."),
101               cl::init(0), cl::Hidden);
102 
103 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
104                                        createGreedyRegisterAllocator);
105 
106 namespace {
107 class RAGreedy : public MachineFunctionPass,
108                  public RegAllocBase,
109                  private LiveRangeEdit::Delegate {
110   // Convenient shortcuts.
111   typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue;
112   typedef SmallPtrSet<LiveInterval *, 4> SmallLISet;
113   typedef SmallSet<unsigned, 16> SmallVirtRegSet;
114 
115   // context
116   MachineFunction *MF;
117 
118   // Shortcuts to some useful interface.
119   const TargetInstrInfo *TII;
120   const TargetRegisterInfo *TRI;
121   RegisterClassInfo RCI;
122 
123   // analyses
124   SlotIndexes *Indexes;
125   MachineBlockFrequencyInfo *MBFI;
126   MachineDominatorTree *DomTree;
127   MachineLoopInfo *Loops;
128   EdgeBundles *Bundles;
129   SpillPlacement *SpillPlacer;
130   LiveDebugVariables *DebugVars;
131 
132   // state
133   std::unique_ptr<Spiller> SpillerInstance;
134   PQueue Queue;
135   unsigned NextCascade;
136 
137   // Live ranges pass through a number of stages as we try to allocate them.
138   // Some of the stages may also create new live ranges:
139   //
140   // - Region splitting.
141   // - Per-block splitting.
142   // - Local splitting.
143   // - Spilling.
144   //
145   // Ranges produced by one of the stages skip the previous stages when they are
146   // dequeued. This improves performance because we can skip interference checks
147   // that are unlikely to give any results. It also guarantees that the live
148   // range splitting algorithm terminates, something that is otherwise hard to
149   // ensure.
150   enum LiveRangeStage {
151     /// Newly created live range that has never been queued.
152     RS_New,
153 
154     /// Only attempt assignment and eviction. Then requeue as RS_Split.
155     RS_Assign,
156 
157     /// Attempt live range splitting if assignment is impossible.
158     RS_Split,
159 
160     /// Attempt more aggressive live range splitting that is guaranteed to make
161     /// progress.  This is used for split products that may not be making
162     /// progress.
163     RS_Split2,
164 
165     /// Live range will be spilled.  No more splitting will be attempted.
166     RS_Spill,
167 
168 
169     /// Live range is in memory. Because of other evictions, it might get moved
170     /// in a register in the end.
171     RS_Memory,
172 
173     /// There is nothing more we can do to this live range.  Abort compilation
174     /// if it can't be assigned.
175     RS_Done
176   };
177 
178   // Enum CutOffStage to keep a track whether the register allocation failed
179   // because of the cutoffs encountered in last chance recoloring.
180   // Note: This is used as bitmask. New value should be next power of 2.
181   enum CutOffStage {
182     // No cutoffs encountered
183     CO_None = 0,
184 
185     // lcr-max-depth cutoff encountered
186     CO_Depth = 1,
187 
188     // lcr-max-interf cutoff encountered
189     CO_Interf = 2
190   };
191 
192   uint8_t CutOffInfo;
193 
194 #ifndef NDEBUG
195   static const char *const StageName[];
196 #endif
197 
198   // RegInfo - Keep additional information about each live range.
199   struct RegInfo {
200     LiveRangeStage Stage;
201 
202     // Cascade - Eviction loop prevention. See canEvictInterference().
203     unsigned Cascade;
204 
205     RegInfo() : Stage(RS_New), Cascade(0) {}
206   };
207 
208   IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
209 
210   LiveRangeStage getStage(const LiveInterval &VirtReg) const {
211     return ExtraRegInfo[VirtReg.reg].Stage;
212   }
213 
214   void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
215     ExtraRegInfo.resize(MRI->getNumVirtRegs());
216     ExtraRegInfo[VirtReg.reg].Stage = Stage;
217   }
218 
219   template<typename Iterator>
220   void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
221     ExtraRegInfo.resize(MRI->getNumVirtRegs());
222     for (;Begin != End; ++Begin) {
223       unsigned Reg = *Begin;
224       if (ExtraRegInfo[Reg].Stage == RS_New)
225         ExtraRegInfo[Reg].Stage = NewStage;
226     }
227   }
228 
229   /// Cost of evicting interference.
230   struct EvictionCost {
231     unsigned BrokenHints; ///< Total number of broken hints.
232     float MaxWeight;      ///< Maximum spill weight evicted.
233 
234     EvictionCost(): BrokenHints(0), MaxWeight(0) {}
235 
236     bool isMax() const { return BrokenHints == ~0u; }
237 
238     void setMax() { BrokenHints = ~0u; }
239 
240     void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
241 
242     bool operator<(const EvictionCost &O) const {
243       return std::tie(BrokenHints, MaxWeight) <
244              std::tie(O.BrokenHints, O.MaxWeight);
245     }
246   };
247 
248   // splitting state.
249   std::unique_ptr<SplitAnalysis> SA;
250   std::unique_ptr<SplitEditor> SE;
251 
252   /// Cached per-block interference maps
253   InterferenceCache IntfCache;
254 
255   /// All basic blocks where the current register has uses.
256   SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
257 
258   /// Global live range splitting candidate info.
259   struct GlobalSplitCandidate {
260     // Register intended for assignment, or 0.
261     unsigned PhysReg;
262 
263     // SplitKit interval index for this candidate.
264     unsigned IntvIdx;
265 
266     // Interference for PhysReg.
267     InterferenceCache::Cursor Intf;
268 
269     // Bundles where this candidate should be live.
270     BitVector LiveBundles;
271     SmallVector<unsigned, 8> ActiveBlocks;
272 
273     void reset(InterferenceCache &Cache, unsigned Reg) {
274       PhysReg = Reg;
275       IntvIdx = 0;
276       Intf.setPhysReg(Cache, Reg);
277       LiveBundles.clear();
278       ActiveBlocks.clear();
279     }
280 
281     // Set B[i] = C for every live bundle where B[i] was NoCand.
282     unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
283       unsigned Count = 0;
284       for (int i = LiveBundles.find_first(); i >= 0;
285            i = LiveBundles.find_next(i))
286         if (B[i] == NoCand) {
287           B[i] = C;
288           Count++;
289         }
290       return Count;
291     }
292   };
293 
294   /// Candidate info for each PhysReg in AllocationOrder.
295   /// This vector never shrinks, but grows to the size of the largest register
296   /// class.
297   SmallVector<GlobalSplitCandidate, 32> GlobalCand;
298 
299   enum : unsigned { NoCand = ~0u };
300 
301   /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
302   /// NoCand which indicates the stack interval.
303   SmallVector<unsigned, 32> BundleCand;
304 
305   /// Callee-save register cost, calculated once per machine function.
306   BlockFrequency CSRCost;
307 
308   /// Run or not the local reassignment heuristic. This information is
309   /// obtained from the TargetSubtargetInfo.
310   bool EnableLocalReassign;
311 
312   /// Set of broken hints that may be reconciled later because of eviction.
313   SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
314 
315 public:
316   RAGreedy();
317 
318   /// Return the pass name.
319   const char* getPassName() const override {
320     return "Greedy Register Allocator";
321   }
322 
323   /// RAGreedy analysis usage.
324   void getAnalysisUsage(AnalysisUsage &AU) const override;
325   void releaseMemory() override;
326   Spiller &spiller() override { return *SpillerInstance; }
327   void enqueue(LiveInterval *LI) override;
328   LiveInterval *dequeue() override;
329   unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override;
330   void aboutToRemoveInterval(LiveInterval &) override;
331 
332   /// Perform register allocation.
333   bool runOnMachineFunction(MachineFunction &mf) override;
334 
335   static char ID;
336 
337 private:
338   unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &,
339                              SmallVirtRegSet &, unsigned = 0);
340 
341   bool LRE_CanEraseVirtReg(unsigned) override;
342   void LRE_WillShrinkVirtReg(unsigned) override;
343   void LRE_DidCloneVirtReg(unsigned, unsigned) override;
344   void enqueue(PQueue &CurQueue, LiveInterval *LI);
345   LiveInterval *dequeue(PQueue &CurQueue);
346 
347   BlockFrequency calcSpillCost();
348   bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
349   void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
350   void growRegion(GlobalSplitCandidate &Cand);
351   BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&);
352   bool calcCompactRegion(GlobalSplitCandidate&);
353   void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
354   void calcGapWeights(unsigned, SmallVectorImpl<float>&);
355   unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg);
356   bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
357   bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
358   void evictInterference(LiveInterval&, unsigned,
359                          SmallVectorImpl<unsigned>&);
360   bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
361                                   SmallLISet &RecoloringCandidates,
362                                   const SmallVirtRegSet &FixedRegisters);
363 
364   unsigned tryAssign(LiveInterval&, AllocationOrder&,
365                      SmallVectorImpl<unsigned>&);
366   unsigned tryEvict(LiveInterval&, AllocationOrder&,
367                     SmallVectorImpl<unsigned>&, unsigned = ~0u);
368   unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
369                           SmallVectorImpl<unsigned>&);
370   /// Calculate cost of region splitting.
371   unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
372                                     AllocationOrder &Order,
373                                     BlockFrequency &BestCost,
374                                     unsigned &NumCands, bool IgnoreCSR);
375   /// Perform region splitting.
376   unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
377                          bool HasCompact,
378                          SmallVectorImpl<unsigned> &NewVRegs);
379   /// Check other options before using a callee-saved register for the first
380   /// time.
381   unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
382                                  unsigned PhysReg, unsigned &CostPerUseLimit,
383                                  SmallVectorImpl<unsigned> &NewVRegs);
384   void initializeCSRCost();
385   unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
386                          SmallVectorImpl<unsigned>&);
387   unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
388                                SmallVectorImpl<unsigned>&);
389   unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
390     SmallVectorImpl<unsigned>&);
391   unsigned trySplit(LiveInterval&, AllocationOrder&,
392                     SmallVectorImpl<unsigned>&);
393   unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
394                                    SmallVectorImpl<unsigned> &,
395                                    SmallVirtRegSet &, unsigned);
396   bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &,
397                                SmallVirtRegSet &, unsigned);
398   void tryHintRecoloring(LiveInterval &);
399   void tryHintsRecoloring();
400 
401   /// Model the information carried by one end of a copy.
402   struct HintInfo {
403     /// The frequency of the copy.
404     BlockFrequency Freq;
405     /// The virtual register or physical register.
406     unsigned Reg;
407     /// Its currently assigned register.
408     /// In case of a physical register Reg == PhysReg.
409     unsigned PhysReg;
410     HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg)
411         : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {}
412   };
413   typedef SmallVector<HintInfo, 4> HintsInfo;
414   BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned);
415   void collectHintInfo(unsigned, HintsInfo &);
416 
417   bool isUnusedCalleeSavedReg(unsigned PhysReg) const;
418 };
419 } // end anonymous namespace
420 
421 char RAGreedy::ID = 0;
422 
423 #ifndef NDEBUG
424 const char *const RAGreedy::StageName[] = {
425     "RS_New",
426     "RS_Assign",
427     "RS_Split",
428     "RS_Split2",
429     "RS_Spill",
430     "RS_Memory",
431     "RS_Done"
432 };
433 #endif
434 
435 // Hysteresis to use when comparing floats.
436 // This helps stabilize decisions based on float comparisons.
437 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
438 
439 
440 FunctionPass* llvm::createGreedyRegisterAllocator() {
441   return new RAGreedy();
442 }
443 
444 RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
445   initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
446   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
447   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
448   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
449   initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
450   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
451   initializeLiveStacksPass(*PassRegistry::getPassRegistry());
452   initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
453   initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
454   initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
455   initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
456   initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
457   initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
458 }
459 
460 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
461   AU.setPreservesCFG();
462   AU.addRequired<MachineBlockFrequencyInfo>();
463   AU.addPreserved<MachineBlockFrequencyInfo>();
464   AU.addRequired<AAResultsWrapperPass>();
465   AU.addPreserved<AAResultsWrapperPass>();
466   AU.addRequired<LiveIntervals>();
467   AU.addPreserved<LiveIntervals>();
468   AU.addRequired<SlotIndexes>();
469   AU.addPreserved<SlotIndexes>();
470   AU.addRequired<LiveDebugVariables>();
471   AU.addPreserved<LiveDebugVariables>();
472   AU.addRequired<LiveStacks>();
473   AU.addPreserved<LiveStacks>();
474   AU.addRequired<MachineDominatorTree>();
475   AU.addPreserved<MachineDominatorTree>();
476   AU.addRequired<MachineLoopInfo>();
477   AU.addPreserved<MachineLoopInfo>();
478   AU.addRequired<VirtRegMap>();
479   AU.addPreserved<VirtRegMap>();
480   AU.addRequired<LiveRegMatrix>();
481   AU.addPreserved<LiveRegMatrix>();
482   AU.addRequired<EdgeBundles>();
483   AU.addRequired<SpillPlacement>();
484   MachineFunctionPass::getAnalysisUsage(AU);
485 }
486 
487 
488 //===----------------------------------------------------------------------===//
489 //                     LiveRangeEdit delegate methods
490 //===----------------------------------------------------------------------===//
491 
492 bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
493   if (VRM->hasPhys(VirtReg)) {
494     LiveInterval &LI = LIS->getInterval(VirtReg);
495     Matrix->unassign(LI);
496     aboutToRemoveInterval(LI);
497     return true;
498   }
499   // Unassigned virtreg is probably in the priority queue.
500   // RegAllocBase will erase it after dequeueing.
501   return false;
502 }
503 
504 void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
505   if (!VRM->hasPhys(VirtReg))
506     return;
507 
508   // Register is assigned, put it back on the queue for reassignment.
509   LiveInterval &LI = LIS->getInterval(VirtReg);
510   Matrix->unassign(LI);
511   enqueue(&LI);
512 }
513 
514 void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
515   // Cloning a register we haven't even heard about yet?  Just ignore it.
516   if (!ExtraRegInfo.inBounds(Old))
517     return;
518 
519   // LRE may clone a virtual register because dead code elimination causes it to
520   // be split into connected components. The new components are much smaller
521   // than the original, so they should get a new chance at being assigned.
522   // same stage as the parent.
523   ExtraRegInfo[Old].Stage = RS_Assign;
524   ExtraRegInfo.grow(New);
525   ExtraRegInfo[New] = ExtraRegInfo[Old];
526 }
527 
528 void RAGreedy::releaseMemory() {
529   SpillerInstance.reset();
530   ExtraRegInfo.clear();
531   GlobalCand.clear();
532 }
533 
534 void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); }
535 
536 void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
537   // Prioritize live ranges by size, assigning larger ranges first.
538   // The queue holds (size, reg) pairs.
539   const unsigned Size = LI->getSize();
540   const unsigned Reg = LI->reg;
541   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
542          "Can only enqueue virtual registers");
543   unsigned Prio;
544 
545   ExtraRegInfo.grow(Reg);
546   if (ExtraRegInfo[Reg].Stage == RS_New)
547     ExtraRegInfo[Reg].Stage = RS_Assign;
548 
549   if (ExtraRegInfo[Reg].Stage == RS_Split) {
550     // Unsplit ranges that couldn't be allocated immediately are deferred until
551     // everything else has been allocated.
552     Prio = Size;
553   } else if (ExtraRegInfo[Reg].Stage == RS_Memory) {
554     // Memory operand should be considered last.
555     // Change the priority such that Memory operand are assigned in
556     // the reverse order that they came in.
557     // TODO: Make this a member variable and probably do something about hints.
558     static unsigned MemOp = 0;
559     Prio = MemOp++;
560   } else {
561     // Giant live ranges fall back to the global assignment heuristic, which
562     // prevents excessive spilling in pathological cases.
563     bool ReverseLocal = TRI->reverseLocalAssignment();
564     const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
565     bool ForceGlobal = !ReverseLocal &&
566       (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
567 
568     if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
569         LIS->intervalIsInOneMBB(*LI)) {
570       // Allocate original local ranges in linear instruction order. Since they
571       // are singly defined, this produces optimal coloring in the absence of
572       // global interference and other constraints.
573       if (!ReverseLocal)
574         Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
575       else {
576         // Allocating bottom up may allow many short LRGs to be assigned first
577         // to one of the cheap registers. This could be much faster for very
578         // large blocks on targets with many physical registers.
579         Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
580       }
581       Prio |= RC.AllocationPriority << 24;
582     } else {
583       // Allocate global and split ranges in long->short order. Long ranges that
584       // don't fit should be spilled (or split) ASAP so they don't create
585       // interference.  Mark a bit to prioritize global above local ranges.
586       Prio = (1u << 29) + Size;
587     }
588     // Mark a higher bit to prioritize global and local above RS_Split.
589     Prio |= (1u << 31);
590 
591     // Boost ranges that have a physical register hint.
592     if (VRM->hasKnownPreference(Reg))
593       Prio |= (1u << 30);
594   }
595   // The virtual register number is a tie breaker for same-sized ranges.
596   // Give lower vreg numbers higher priority to assign them first.
597   CurQueue.push(std::make_pair(Prio, ~Reg));
598 }
599 
600 LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
601 
602 LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
603   if (CurQueue.empty())
604     return nullptr;
605   LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
606   CurQueue.pop();
607   return LI;
608 }
609 
610 
611 //===----------------------------------------------------------------------===//
612 //                            Direct Assignment
613 //===----------------------------------------------------------------------===//
614 
615 /// tryAssign - Try to assign VirtReg to an available register.
616 unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
617                              AllocationOrder &Order,
618                              SmallVectorImpl<unsigned> &NewVRegs) {
619   Order.rewind();
620   unsigned PhysReg;
621   while ((PhysReg = Order.next()))
622     if (!Matrix->checkInterference(VirtReg, PhysReg))
623       break;
624   if (!PhysReg || Order.isHint())
625     return PhysReg;
626 
627   // PhysReg is available, but there may be a better choice.
628 
629   // If we missed a simple hint, try to cheaply evict interference from the
630   // preferred register.
631   if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
632     if (Order.isHint(Hint)) {
633       DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
634       EvictionCost MaxCost;
635       MaxCost.setBrokenHints(1);
636       if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
637         evictInterference(VirtReg, Hint, NewVRegs);
638         return Hint;
639       }
640     }
641 
642   // Try to evict interference from a cheaper alternative.
643   unsigned Cost = TRI->getCostPerUse(PhysReg);
644 
645   // Most registers have 0 additional cost.
646   if (!Cost)
647     return PhysReg;
648 
649   DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
650                << '\n');
651   unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
652   return CheapReg ? CheapReg : PhysReg;
653 }
654 
655 
656 //===----------------------------------------------------------------------===//
657 //                         Interference eviction
658 //===----------------------------------------------------------------------===//
659 
660 unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
661   AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
662   unsigned PhysReg;
663   while ((PhysReg = Order.next())) {
664     if (PhysReg == PrevReg)
665       continue;
666 
667     MCRegUnitIterator Units(PhysReg, TRI);
668     for (; Units.isValid(); ++Units) {
669       // Instantiate a "subquery", not to be confused with the Queries array.
670       LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]);
671       if (subQ.checkInterference())
672         break;
673     }
674     // If no units have interference, break out with the current PhysReg.
675     if (!Units.isValid())
676       break;
677   }
678   if (PhysReg)
679     DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
680           << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI)
681           << '\n');
682   return PhysReg;
683 }
684 
685 /// shouldEvict - determine if A should evict the assigned live range B. The
686 /// eviction policy defined by this function together with the allocation order
687 /// defined by enqueue() decides which registers ultimately end up being split
688 /// and spilled.
689 ///
690 /// Cascade numbers are used to prevent infinite loops if this function is a
691 /// cyclic relation.
692 ///
693 /// @param A          The live range to be assigned.
694 /// @param IsHint     True when A is about to be assigned to its preferred
695 ///                   register.
696 /// @param B          The live range to be evicted.
697 /// @param BreaksHint True when B is already assigned to its preferred register.
698 bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
699                            LiveInterval &B, bool BreaksHint) {
700   bool CanSplit = getStage(B) < RS_Spill;
701 
702   // Be fairly aggressive about following hints as long as the evictee can be
703   // split.
704   if (CanSplit && IsHint && !BreaksHint)
705     return true;
706 
707   if (A.weight > B.weight) {
708     DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n');
709     return true;
710   }
711   return false;
712 }
713 
714 /// canEvictInterference - Return true if all interferences between VirtReg and
715 /// PhysReg can be evicted.
716 ///
717 /// @param VirtReg Live range that is about to be assigned.
718 /// @param PhysReg Desired register for assignment.
719 /// @param IsHint  True when PhysReg is VirtReg's preferred register.
720 /// @param MaxCost Only look for cheaper candidates and update with new cost
721 ///                when returning true.
722 /// @returns True when interference can be evicted cheaper than MaxCost.
723 bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
724                                     bool IsHint, EvictionCost &MaxCost) {
725   // It is only possible to evict virtual register interference.
726   if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
727     return false;
728 
729   bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
730 
731   // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
732   // involved in an eviction before. If a cascade number was assigned, deny
733   // evicting anything with the same or a newer cascade number. This prevents
734   // infinite eviction loops.
735   //
736   // This works out so a register without a cascade number is allowed to evict
737   // anything, and it can be evicted by anything.
738   unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
739   if (!Cascade)
740     Cascade = NextCascade;
741 
742   EvictionCost Cost;
743   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
744     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
745     // If there is 10 or more interferences, chances are one is heavier.
746     if (Q.collectInterferingVRegs(10) >= 10)
747       return false;
748 
749     // Check if any interfering live range is heavier than MaxWeight.
750     for (unsigned i = Q.interferingVRegs().size(); i; --i) {
751       LiveInterval *Intf = Q.interferingVRegs()[i - 1];
752       assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) &&
753              "Only expecting virtual register interference from query");
754       // Never evict spill products. They cannot split or spill.
755       if (getStage(*Intf) == RS_Done)
756         return false;
757       // Once a live range becomes small enough, it is urgent that we find a
758       // register for it. This is indicated by an infinite spill weight. These
759       // urgent live ranges get to evict almost anything.
760       //
761       // Also allow urgent evictions of unspillable ranges from a strictly
762       // larger allocation order.
763       bool Urgent = !VirtReg.isSpillable() &&
764         (Intf->isSpillable() ||
765          RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) <
766          RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg)));
767       // Only evict older cascades or live ranges without a cascade.
768       unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
769       if (Cascade <= IntfCascade) {
770         if (!Urgent)
771           return false;
772         // We permit breaking cascades for urgent evictions. It should be the
773         // last resort, though, so make it really expensive.
774         Cost.BrokenHints += 10;
775       }
776       // Would this break a satisfied hint?
777       bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
778       // Update eviction cost.
779       Cost.BrokenHints += BreaksHint;
780       Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
781       // Abort if this would be too expensive.
782       if (!(Cost < MaxCost))
783         return false;
784       if (Urgent)
785         continue;
786       // Apply the eviction policy for non-urgent evictions.
787       if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
788         return false;
789       // If !MaxCost.isMax(), then we're just looking for a cheap register.
790       // Evicting another local live range in this case could lead to suboptimal
791       // coloring.
792       if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
793           (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
794         return false;
795       }
796     }
797   }
798   MaxCost = Cost;
799   return true;
800 }
801 
802 /// evictInterference - Evict any interferring registers that prevent VirtReg
803 /// from being assigned to Physreg. This assumes that canEvictInterference
804 /// returned true.
805 void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
806                                  SmallVectorImpl<unsigned> &NewVRegs) {
807   // Make sure that VirtReg has a cascade number, and assign that cascade
808   // number to every evicted register. These live ranges than then only be
809   // evicted by a newer cascade, preventing infinite loops.
810   unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
811   if (!Cascade)
812     Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
813 
814   DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
815                << " interference: Cascade " << Cascade << '\n');
816 
817   // Collect all interfering virtregs first.
818   SmallVector<LiveInterval*, 8> Intfs;
819   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
820     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
821     assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
822     ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
823     Intfs.append(IVR.begin(), IVR.end());
824   }
825 
826   // Evict them second. This will invalidate the queries.
827   for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
828     LiveInterval *Intf = Intfs[i];
829     // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
830     if (!VRM->hasPhys(Intf->reg))
831       continue;
832     Matrix->unassign(*Intf);
833     assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
834             VirtReg.isSpillable() < Intf->isSpillable()) &&
835            "Cannot decrease cascade number, illegal eviction");
836     ExtraRegInfo[Intf->reg].Cascade = Cascade;
837     ++NumEvicted;
838     NewVRegs.push_back(Intf->reg);
839   }
840 }
841 
842 /// Returns true if the given \p PhysReg is a callee saved register and has not
843 /// been used for allocation yet.
844 bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const {
845   unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
846   if (CSR == 0)
847     return false;
848 
849   return !Matrix->isPhysRegUsed(PhysReg);
850 }
851 
852 /// tryEvict - Try to evict all interferences for a physreg.
853 /// @param  VirtReg Currently unassigned virtual register.
854 /// @param  Order   Physregs to try.
855 /// @return         Physreg to assign VirtReg, or 0.
856 unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
857                             AllocationOrder &Order,
858                             SmallVectorImpl<unsigned> &NewVRegs,
859                             unsigned CostPerUseLimit) {
860   NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
861 
862   // Keep track of the cheapest interference seen so far.
863   EvictionCost BestCost;
864   BestCost.setMax();
865   unsigned BestPhys = 0;
866   unsigned OrderLimit = Order.getOrder().size();
867 
868   // When we are just looking for a reduced cost per use, don't break any
869   // hints, and only evict smaller spill weights.
870   if (CostPerUseLimit < ~0u) {
871     BestCost.BrokenHints = 0;
872     BestCost.MaxWeight = VirtReg.weight;
873 
874     // Check of any registers in RC are below CostPerUseLimit.
875     const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);
876     unsigned MinCost = RegClassInfo.getMinCost(RC);
877     if (MinCost >= CostPerUseLimit) {
878       DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost
879                    << ", no cheaper registers to be found.\n");
880       return 0;
881     }
882 
883     // It is normal for register classes to have a long tail of registers with
884     // the same cost. We don't need to look at them if they're too expensive.
885     if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) {
886       OrderLimit = RegClassInfo.getLastCostChange(RC);
887       DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n");
888     }
889   }
890 
891   Order.rewind();
892   while (unsigned PhysReg = Order.next(OrderLimit)) {
893     if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
894       continue;
895     // The first use of a callee-saved register in a function has cost 1.
896     // Don't start using a CSR when the CostPerUseLimit is low.
897     if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
898       DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
899             << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
900             << '\n');
901       continue;
902     }
903 
904     if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
905       continue;
906 
907     // Best so far.
908     BestPhys = PhysReg;
909 
910     // Stop if the hint can be used.
911     if (Order.isHint())
912       break;
913   }
914 
915   if (!BestPhys)
916     return 0;
917 
918   evictInterference(VirtReg, BestPhys, NewVRegs);
919   return BestPhys;
920 }
921 
922 
923 //===----------------------------------------------------------------------===//
924 //                              Region Splitting
925 //===----------------------------------------------------------------------===//
926 
927 /// addSplitConstraints - Fill out the SplitConstraints vector based on the
928 /// interference pattern in Physreg and its aliases. Add the constraints to
929 /// SpillPlacement and return the static cost of this split in Cost, assuming
930 /// that all preferences in SplitConstraints are met.
931 /// Return false if there are no bundles with positive bias.
932 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
933                                    BlockFrequency &Cost) {
934   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
935 
936   // Reset interference dependent info.
937   SplitConstraints.resize(UseBlocks.size());
938   BlockFrequency StaticCost = 0;
939   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
940     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
941     SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
942 
943     BC.Number = BI.MBB->getNumber();
944     Intf.moveToBlock(BC.Number);
945     BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
946     BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
947     BC.ChangesValue = BI.FirstDef.isValid();
948 
949     if (!Intf.hasInterference())
950       continue;
951 
952     // Number of spill code instructions to insert.
953     unsigned Ins = 0;
954 
955     // Interference for the live-in value.
956     if (BI.LiveIn) {
957       if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
958         BC.Entry = SpillPlacement::MustSpill;
959         ++Ins;
960       } else if (Intf.first() < BI.FirstInstr) {
961         BC.Entry = SpillPlacement::PrefSpill;
962         ++Ins;
963       } else if (Intf.first() < BI.LastInstr) {
964         ++Ins;
965       }
966     }
967 
968     // Interference for the live-out value.
969     if (BI.LiveOut) {
970       if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
971         BC.Exit = SpillPlacement::MustSpill;
972         ++Ins;
973       } else if (Intf.last() > BI.LastInstr) {
974         BC.Exit = SpillPlacement::PrefSpill;
975         ++Ins;
976       } else if (Intf.last() > BI.FirstInstr) {
977         ++Ins;
978       }
979     }
980 
981     // Accumulate the total frequency of inserted spill code.
982     while (Ins--)
983       StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
984   }
985   Cost = StaticCost;
986 
987   // Add constraints for use-blocks. Note that these are the only constraints
988   // that may add a positive bias, it is downhill from here.
989   SpillPlacer->addConstraints(SplitConstraints);
990   return SpillPlacer->scanActiveBundles();
991 }
992 
993 
994 /// addThroughConstraints - Add constraints and links to SpillPlacer from the
995 /// live-through blocks in Blocks.
996 void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
997                                      ArrayRef<unsigned> Blocks) {
998   const unsigned GroupSize = 8;
999   SpillPlacement::BlockConstraint BCS[GroupSize];
1000   unsigned TBS[GroupSize];
1001   unsigned B = 0, T = 0;
1002 
1003   for (unsigned i = 0; i != Blocks.size(); ++i) {
1004     unsigned Number = Blocks[i];
1005     Intf.moveToBlock(Number);
1006 
1007     if (!Intf.hasInterference()) {
1008       assert(T < GroupSize && "Array overflow");
1009       TBS[T] = Number;
1010       if (++T == GroupSize) {
1011         SpillPlacer->addLinks(makeArrayRef(TBS, T));
1012         T = 0;
1013       }
1014       continue;
1015     }
1016 
1017     assert(B < GroupSize && "Array overflow");
1018     BCS[B].Number = Number;
1019 
1020     // Interference for the live-in value.
1021     if (Intf.first() <= Indexes->getMBBStartIdx(Number))
1022       BCS[B].Entry = SpillPlacement::MustSpill;
1023     else
1024       BCS[B].Entry = SpillPlacement::PrefSpill;
1025 
1026     // Interference for the live-out value.
1027     if (Intf.last() >= SA->getLastSplitPoint(Number))
1028       BCS[B].Exit = SpillPlacement::MustSpill;
1029     else
1030       BCS[B].Exit = SpillPlacement::PrefSpill;
1031 
1032     if (++B == GroupSize) {
1033       SpillPlacer->addConstraints(makeArrayRef(BCS, B));
1034       B = 0;
1035     }
1036   }
1037 
1038   SpillPlacer->addConstraints(makeArrayRef(BCS, B));
1039   SpillPlacer->addLinks(makeArrayRef(TBS, T));
1040 }
1041 
1042 void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
1043   // Keep track of through blocks that have not been added to SpillPlacer.
1044   BitVector Todo = SA->getThroughBlocks();
1045   SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
1046   unsigned AddedTo = 0;
1047 #ifndef NDEBUG
1048   unsigned Visited = 0;
1049 #endif
1050 
1051   for (;;) {
1052     ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
1053     // Find new through blocks in the periphery of PrefRegBundles.
1054     for (int i = 0, e = NewBundles.size(); i != e; ++i) {
1055       unsigned Bundle = NewBundles[i];
1056       // Look at all blocks connected to Bundle in the full graph.
1057       ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
1058       for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
1059            I != E; ++I) {
1060         unsigned Block = *I;
1061         if (!Todo.test(Block))
1062           continue;
1063         Todo.reset(Block);
1064         // This is a new through block. Add it to SpillPlacer later.
1065         ActiveBlocks.push_back(Block);
1066 #ifndef NDEBUG
1067         ++Visited;
1068 #endif
1069       }
1070     }
1071     // Any new blocks to add?
1072     if (ActiveBlocks.size() == AddedTo)
1073       break;
1074 
1075     // Compute through constraints from the interference, or assume that all
1076     // through blocks prefer spilling when forming compact regions.
1077     auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
1078     if (Cand.PhysReg)
1079       addThroughConstraints(Cand.Intf, NewBlocks);
1080     else
1081       // Provide a strong negative bias on through blocks to prevent unwanted
1082       // liveness on loop backedges.
1083       SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
1084     AddedTo = ActiveBlocks.size();
1085 
1086     // Perhaps iterating can enable more bundles?
1087     SpillPlacer->iterate();
1088   }
1089   DEBUG(dbgs() << ", v=" << Visited);
1090 }
1091 
1092 /// calcCompactRegion - Compute the set of edge bundles that should be live
1093 /// when splitting the current live range into compact regions.  Compact
1094 /// regions can be computed without looking at interference.  They are the
1095 /// regions formed by removing all the live-through blocks from the live range.
1096 ///
1097 /// Returns false if the current live range is already compact, or if the
1098 /// compact regions would form single block regions anyway.
1099 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
1100   // Without any through blocks, the live range is already compact.
1101   if (!SA->getNumThroughBlocks())
1102     return false;
1103 
1104   // Compact regions don't correspond to any physreg.
1105   Cand.reset(IntfCache, 0);
1106 
1107   DEBUG(dbgs() << "Compact region bundles");
1108 
1109   // Use the spill placer to determine the live bundles. GrowRegion pretends
1110   // that all the through blocks have interference when PhysReg is unset.
1111   SpillPlacer->prepare(Cand.LiveBundles);
1112 
1113   // The static split cost will be zero since Cand.Intf reports no interference.
1114   BlockFrequency Cost;
1115   if (!addSplitConstraints(Cand.Intf, Cost)) {
1116     DEBUG(dbgs() << ", none.\n");
1117     return false;
1118   }
1119 
1120   growRegion(Cand);
1121   SpillPlacer->finish();
1122 
1123   if (!Cand.LiveBundles.any()) {
1124     DEBUG(dbgs() << ", none.\n");
1125     return false;
1126   }
1127 
1128   DEBUG({
1129     for (int i = Cand.LiveBundles.find_first(); i>=0;
1130          i = Cand.LiveBundles.find_next(i))
1131     dbgs() << " EB#" << i;
1132     dbgs() << ".\n";
1133   });
1134   return true;
1135 }
1136 
1137 /// calcSpillCost - Compute how expensive it would be to split the live range in
1138 /// SA around all use blocks instead of forming bundle regions.
1139 BlockFrequency RAGreedy::calcSpillCost() {
1140   BlockFrequency Cost = 0;
1141   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1142   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
1143     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
1144     unsigned Number = BI.MBB->getNumber();
1145     // We normally only need one spill instruction - a load or a store.
1146     Cost += SpillPlacer->getBlockFrequency(Number);
1147 
1148     // Unless the value is redefined in the block.
1149     if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
1150       Cost += SpillPlacer->getBlockFrequency(Number);
1151   }
1152   return Cost;
1153 }
1154 
1155 /// calcGlobalSplitCost - Return the global split cost of following the split
1156 /// pattern in LiveBundles. This cost should be added to the local cost of the
1157 /// interference pattern in SplitConstraints.
1158 ///
1159 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
1160   BlockFrequency GlobalCost = 0;
1161   const BitVector &LiveBundles = Cand.LiveBundles;
1162   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1163   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
1164     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
1165     SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
1166     bool RegIn  = LiveBundles[Bundles->getBundle(BC.Number, 0)];
1167     bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)];
1168     unsigned Ins = 0;
1169 
1170     if (BI.LiveIn)
1171       Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
1172     if (BI.LiveOut)
1173       Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
1174     while (Ins--)
1175       GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
1176   }
1177 
1178   for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
1179     unsigned Number = Cand.ActiveBlocks[i];
1180     bool RegIn  = LiveBundles[Bundles->getBundle(Number, 0)];
1181     bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
1182     if (!RegIn && !RegOut)
1183       continue;
1184     if (RegIn && RegOut) {
1185       // We need double spill code if this block has interference.
1186       Cand.Intf.moveToBlock(Number);
1187       if (Cand.Intf.hasInterference()) {
1188         GlobalCost += SpillPlacer->getBlockFrequency(Number);
1189         GlobalCost += SpillPlacer->getBlockFrequency(Number);
1190       }
1191       continue;
1192     }
1193     // live-in / stack-out or stack-in live-out.
1194     GlobalCost += SpillPlacer->getBlockFrequency(Number);
1195   }
1196   return GlobalCost;
1197 }
1198 
1199 /// splitAroundRegion - Split the current live range around the regions
1200 /// determined by BundleCand and GlobalCand.
1201 ///
1202 /// Before calling this function, GlobalCand and BundleCand must be initialized
1203 /// so each bundle is assigned to a valid candidate, or NoCand for the
1204 /// stack-bound bundles.  The shared SA/SE SplitAnalysis and SplitEditor
1205 /// objects must be initialized for the current live range, and intervals
1206 /// created for the used candidates.
1207 ///
1208 /// @param LREdit    The LiveRangeEdit object handling the current split.
1209 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value
1210 ///                  must appear in this list.
1211 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
1212                                  ArrayRef<unsigned> UsedCands) {
1213   // These are the intervals created for new global ranges. We may create more
1214   // intervals for local ranges.
1215   const unsigned NumGlobalIntvs = LREdit.size();
1216   DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n");
1217   assert(NumGlobalIntvs && "No global intervals configured");
1218 
1219   // Isolate even single instructions when dealing with a proper sub-class.
1220   // That guarantees register class inflation for the stack interval because it
1221   // is all copies.
1222   unsigned Reg = SA->getParent().reg;
1223   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1224 
1225   // First handle all the blocks with uses.
1226   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1227   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
1228     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
1229     unsigned Number = BI.MBB->getNumber();
1230     unsigned IntvIn = 0, IntvOut = 0;
1231     SlotIndex IntfIn, IntfOut;
1232     if (BI.LiveIn) {
1233       unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
1234       if (CandIn != NoCand) {
1235         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1236         IntvIn = Cand.IntvIdx;
1237         Cand.Intf.moveToBlock(Number);
1238         IntfIn = Cand.Intf.first();
1239       }
1240     }
1241     if (BI.LiveOut) {
1242       unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
1243       if (CandOut != NoCand) {
1244         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1245         IntvOut = Cand.IntvIdx;
1246         Cand.Intf.moveToBlock(Number);
1247         IntfOut = Cand.Intf.last();
1248       }
1249     }
1250 
1251     // Create separate intervals for isolated blocks with multiple uses.
1252     if (!IntvIn && !IntvOut) {
1253       DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
1254       if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1255         SE->splitSingleBlock(BI);
1256       continue;
1257     }
1258 
1259     if (IntvIn && IntvOut)
1260       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1261     else if (IntvIn)
1262       SE->splitRegInBlock(BI, IntvIn, IntfIn);
1263     else
1264       SE->splitRegOutBlock(BI, IntvOut, IntfOut);
1265   }
1266 
1267   // Handle live-through blocks. The relevant live-through blocks are stored in
1268   // the ActiveBlocks list with each candidate. We need to filter out
1269   // duplicates.
1270   BitVector Todo = SA->getThroughBlocks();
1271   for (unsigned c = 0; c != UsedCands.size(); ++c) {
1272     ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks;
1273     for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1274       unsigned Number = Blocks[i];
1275       if (!Todo.test(Number))
1276         continue;
1277       Todo.reset(Number);
1278 
1279       unsigned IntvIn = 0, IntvOut = 0;
1280       SlotIndex IntfIn, IntfOut;
1281 
1282       unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
1283       if (CandIn != NoCand) {
1284         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1285         IntvIn = Cand.IntvIdx;
1286         Cand.Intf.moveToBlock(Number);
1287         IntfIn = Cand.Intf.first();
1288       }
1289 
1290       unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
1291       if (CandOut != NoCand) {
1292         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1293         IntvOut = Cand.IntvIdx;
1294         Cand.Intf.moveToBlock(Number);
1295         IntfOut = Cand.Intf.last();
1296       }
1297       if (!IntvIn && !IntvOut)
1298         continue;
1299       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1300     }
1301   }
1302 
1303   ++NumGlobalSplits;
1304 
1305   SmallVector<unsigned, 8> IntvMap;
1306   SE->finish(&IntvMap);
1307   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1308 
1309   ExtraRegInfo.resize(MRI->getNumVirtRegs());
1310   unsigned OrigBlocks = SA->getNumLiveBlocks();
1311 
1312   // Sort out the new intervals created by splitting. We get four kinds:
1313   // - Remainder intervals should not be split again.
1314   // - Candidate intervals can be assigned to Cand.PhysReg.
1315   // - Block-local splits are candidates for local splitting.
1316   // - DCE leftovers should go back on the queue.
1317   for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
1318     LiveInterval &Reg = LIS->getInterval(LREdit.get(i));
1319 
1320     // Ignore old intervals from DCE.
1321     if (getStage(Reg) != RS_New)
1322       continue;
1323 
1324     // Remainder interval. Don't try splitting again, spill if it doesn't
1325     // allocate.
1326     if (IntvMap[i] == 0) {
1327       setStage(Reg, RS_Spill);
1328       continue;
1329     }
1330 
1331     // Global intervals. Allow repeated splitting as long as the number of live
1332     // blocks is strictly decreasing.
1333     if (IntvMap[i] < NumGlobalIntvs) {
1334       if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1335         DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1336                      << " blocks as original.\n");
1337         // Don't allow repeated splitting as a safe guard against looping.
1338         setStage(Reg, RS_Split2);
1339       }
1340       continue;
1341     }
1342 
1343     // Other intervals are treated as new. This includes local intervals created
1344     // for blocks with multiple uses, and anything created by DCE.
1345   }
1346 
1347   if (VerifyEnabled)
1348     MF->verify(this, "After splitting live range around region");
1349 }
1350 
1351 unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
1352                                   SmallVectorImpl<unsigned> &NewVRegs) {
1353   unsigned NumCands = 0;
1354   BlockFrequency BestCost;
1355 
1356   // Check if we can split this live range around a compact region.
1357   bool HasCompact = calcCompactRegion(GlobalCand.front());
1358   if (HasCompact) {
1359     // Yes, keep GlobalCand[0] as the compact region candidate.
1360     NumCands = 1;
1361     BestCost = BlockFrequency::getMaxFrequency();
1362   } else {
1363     // No benefit from the compact region, our fallback will be per-block
1364     // splitting. Make sure we find a solution that is cheaper than spilling.
1365     BestCost = calcSpillCost();
1366     DEBUG(dbgs() << "Cost of isolating all blocks = ";
1367                  MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
1368   }
1369 
1370   unsigned BestCand =
1371       calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
1372                                false/*IgnoreCSR*/);
1373 
1374   // No solutions found, fall back to single block splitting.
1375   if (!HasCompact && BestCand == NoCand)
1376     return 0;
1377 
1378   return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1379 }
1380 
1381 unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
1382                                             AllocationOrder &Order,
1383                                             BlockFrequency &BestCost,
1384                                             unsigned &NumCands,
1385                                             bool IgnoreCSR) {
1386   unsigned BestCand = NoCand;
1387   Order.rewind();
1388   while (unsigned PhysReg = Order.next()) {
1389     if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg))
1390       continue;
1391 
1392     // Discard bad candidates before we run out of interference cache cursors.
1393     // This will only affect register classes with a lot of registers (>32).
1394     if (NumCands == IntfCache.getMaxCursors()) {
1395       unsigned WorstCount = ~0u;
1396       unsigned Worst = 0;
1397       for (unsigned i = 0; i != NumCands; ++i) {
1398         if (i == BestCand || !GlobalCand[i].PhysReg)
1399           continue;
1400         unsigned Count = GlobalCand[i].LiveBundles.count();
1401         if (Count < WorstCount) {
1402           Worst = i;
1403           WorstCount = Count;
1404         }
1405       }
1406       --NumCands;
1407       GlobalCand[Worst] = GlobalCand[NumCands];
1408       if (BestCand == NumCands)
1409         BestCand = Worst;
1410     }
1411 
1412     if (GlobalCand.size() <= NumCands)
1413       GlobalCand.resize(NumCands+1);
1414     GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1415     Cand.reset(IntfCache, PhysReg);
1416 
1417     SpillPlacer->prepare(Cand.LiveBundles);
1418     BlockFrequency Cost;
1419     if (!addSplitConstraints(Cand.Intf, Cost)) {
1420       DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
1421       continue;
1422     }
1423     DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = ";
1424                  MBFI->printBlockFreq(dbgs(), Cost));
1425     if (Cost >= BestCost) {
1426       DEBUG({
1427         if (BestCand == NoCand)
1428           dbgs() << " worse than no bundles\n";
1429         else
1430           dbgs() << " worse than "
1431                  << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1432       });
1433       continue;
1434     }
1435     growRegion(Cand);
1436 
1437     SpillPlacer->finish();
1438 
1439     // No live bundles, defer to splitSingleBlocks().
1440     if (!Cand.LiveBundles.any()) {
1441       DEBUG(dbgs() << " no bundles.\n");
1442       continue;
1443     }
1444 
1445     Cost += calcGlobalSplitCost(Cand);
1446     DEBUG({
1447       dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost)
1448                                 << " with bundles";
1449       for (int i = Cand.LiveBundles.find_first(); i>=0;
1450            i = Cand.LiveBundles.find_next(i))
1451         dbgs() << " EB#" << i;
1452       dbgs() << ".\n";
1453     });
1454     if (Cost < BestCost) {
1455       BestCand = NumCands;
1456       BestCost = Cost;
1457     }
1458     ++NumCands;
1459   }
1460   return BestCand;
1461 }
1462 
1463 unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
1464                                  bool HasCompact,
1465                                  SmallVectorImpl<unsigned> &NewVRegs) {
1466   SmallVector<unsigned, 8> UsedCands;
1467   // Prepare split editor.
1468   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
1469   SE->reset(LREdit, SplitSpillMode);
1470 
1471   // Assign all edge bundles to the preferred candidate, or NoCand.
1472   BundleCand.assign(Bundles->getNumBundles(), NoCand);
1473 
1474   // Assign bundles for the best candidate region.
1475   if (BestCand != NoCand) {
1476     GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1477     if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1478       UsedCands.push_back(BestCand);
1479       Cand.IntvIdx = SE->openIntv();
1480       DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in "
1481                    << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1482       (void)B;
1483     }
1484   }
1485 
1486   // Assign bundles for the compact region.
1487   if (HasCompact) {
1488     GlobalSplitCandidate &Cand = GlobalCand.front();
1489     assert(!Cand.PhysReg && "Compact region has no physreg");
1490     if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1491       UsedCands.push_back(0);
1492       Cand.IntvIdx = SE->openIntv();
1493       DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv "
1494                    << Cand.IntvIdx << ".\n");
1495       (void)B;
1496     }
1497   }
1498 
1499   splitAroundRegion(LREdit, UsedCands);
1500   return 0;
1501 }
1502 
1503 
1504 //===----------------------------------------------------------------------===//
1505 //                            Per-Block Splitting
1506 //===----------------------------------------------------------------------===//
1507 
1508 /// tryBlockSplit - Split a global live range around every block with uses. This
1509 /// creates a lot of local live ranges, that will be split by tryLocalSplit if
1510 /// they don't allocate.
1511 unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
1512                                  SmallVectorImpl<unsigned> &NewVRegs) {
1513   assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1514   unsigned Reg = VirtReg.reg;
1515   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1516   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
1517   SE->reset(LREdit, SplitSpillMode);
1518   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1519   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
1520     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
1521     if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1522       SE->splitSingleBlock(BI);
1523   }
1524   // No blocks were split.
1525   if (LREdit.empty())
1526     return 0;
1527 
1528   // We did split for some blocks.
1529   SmallVector<unsigned, 8> IntvMap;
1530   SE->finish(&IntvMap);
1531 
1532   // Tell LiveDebugVariables about the new ranges.
1533   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1534 
1535   ExtraRegInfo.resize(MRI->getNumVirtRegs());
1536 
1537   // Sort out the new intervals created by splitting. The remainder interval
1538   // goes straight to spilling, the new local ranges get to stay RS_New.
1539   for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
1540     LiveInterval &LI = LIS->getInterval(LREdit.get(i));
1541     if (getStage(LI) == RS_New && IntvMap[i] == 0)
1542       setStage(LI, RS_Spill);
1543   }
1544 
1545   if (VerifyEnabled)
1546     MF->verify(this, "After splitting live range around basic blocks");
1547   return 0;
1548 }
1549 
1550 
1551 //===----------------------------------------------------------------------===//
1552 //                         Per-Instruction Splitting
1553 //===----------------------------------------------------------------------===//
1554 
1555 /// Get the number of allocatable registers that match the constraints of \p Reg
1556 /// on \p MI and that are also in \p SuperRC.
1557 static unsigned getNumAllocatableRegsForConstraints(
1558     const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC,
1559     const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1560     const RegisterClassInfo &RCI) {
1561   assert(SuperRC && "Invalid register class");
1562 
1563   const TargetRegisterClass *ConstrainedRC =
1564       MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1565                                              /* ExploreBundle */ true);
1566   if (!ConstrainedRC)
1567     return 0;
1568   return RCI.getNumAllocatableRegs(ConstrainedRC);
1569 }
1570 
1571 /// tryInstructionSplit - Split a live range around individual instructions.
1572 /// This is normally not worthwhile since the spiller is doing essentially the
1573 /// same thing. However, when the live range is in a constrained register
1574 /// class, it may help to insert copies such that parts of the live range can
1575 /// be moved to a larger register class.
1576 ///
1577 /// This is similar to spilling to a larger register class.
1578 unsigned
1579 RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
1580                               SmallVectorImpl<unsigned> &NewVRegs) {
1581   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
1582   // There is no point to this if there are no larger sub-classes.
1583   if (!RegClassInfo.isProperSubClass(CurRC))
1584     return 0;
1585 
1586   // Always enable split spill mode, since we're effectively spilling to a
1587   // register.
1588   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
1589   SE->reset(LREdit, SplitEditor::SM_Size);
1590 
1591   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1592   if (Uses.size() <= 1)
1593     return 0;
1594 
1595   DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
1596 
1597   const TargetRegisterClass *SuperRC =
1598       TRI->getLargestLegalSuperClass(CurRC, *MF);
1599   unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
1600   // Split around every non-copy instruction if this split will relax
1601   // the constraints on the virtual register.
1602   // Otherwise, splitting just inserts uncoalescable copies that do not help
1603   // the allocation.
1604   for (unsigned i = 0; i != Uses.size(); ++i) {
1605     if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]))
1606       if (MI->isFullCopy() ||
1607           SuperRCNumAllocatableRegs ==
1608               getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII,
1609                                                   TRI, RCI)) {
1610         DEBUG(dbgs() << "    skip:\t" << Uses[i] << '\t' << *MI);
1611         continue;
1612       }
1613     SE->openIntv();
1614     SlotIndex SegStart = SE->enterIntvBefore(Uses[i]);
1615     SlotIndex SegStop  = SE->leaveIntvAfter(Uses[i]);
1616     SE->useIntv(SegStart, SegStop);
1617   }
1618 
1619   if (LREdit.empty()) {
1620     DEBUG(dbgs() << "All uses were copies.\n");
1621     return 0;
1622   }
1623 
1624   SmallVector<unsigned, 8> IntvMap;
1625   SE->finish(&IntvMap);
1626   DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
1627   ExtraRegInfo.resize(MRI->getNumVirtRegs());
1628 
1629   // Assign all new registers to RS_Spill. This was the last chance.
1630   setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1631   return 0;
1632 }
1633 
1634 
1635 //===----------------------------------------------------------------------===//
1636 //                             Local Splitting
1637 //===----------------------------------------------------------------------===//
1638 
1639 
1640 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1641 /// in order to use PhysReg between two entries in SA->UseSlots.
1642 ///
1643 /// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
1644 ///
1645 void RAGreedy::calcGapWeights(unsigned PhysReg,
1646                               SmallVectorImpl<float> &GapWeight) {
1647   assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1648   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1649   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1650   const unsigned NumGaps = Uses.size()-1;
1651 
1652   // Start and end points for the interference check.
1653   SlotIndex StartIdx =
1654     BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
1655   SlotIndex StopIdx =
1656     BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
1657 
1658   GapWeight.assign(NumGaps, 0.0f);
1659 
1660   // Add interference from each overlapping register.
1661   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1662     if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
1663           .checkInterference())
1664       continue;
1665 
1666     // We know that VirtReg is a continuous interval from FirstInstr to
1667     // LastInstr, so we don't need InterferenceQuery.
1668     //
1669     // Interference that overlaps an instruction is counted in both gaps
1670     // surrounding the instruction. The exception is interference before
1671     // StartIdx and after StopIdx.
1672     //
1673     LiveIntervalUnion::SegmentIter IntI =
1674       Matrix->getLiveUnions()[*Units] .find(StartIdx);
1675     for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1676       // Skip the gaps before IntI.
1677       while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1678         if (++Gap == NumGaps)
1679           break;
1680       if (Gap == NumGaps)
1681         break;
1682 
1683       // Update the gaps covered by IntI.
1684       const float weight = IntI.value()->weight;
1685       for (; Gap != NumGaps; ++Gap) {
1686         GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1687         if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1688           break;
1689       }
1690       if (Gap == NumGaps)
1691         break;
1692     }
1693   }
1694 
1695   // Add fixed interference.
1696   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1697     const LiveRange &LR = LIS->getRegUnit(*Units);
1698     LiveRange::const_iterator I = LR.find(StartIdx);
1699     LiveRange::const_iterator E = LR.end();
1700 
1701     // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1702     for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1703       while (Uses[Gap+1].getBoundaryIndex() < I->start)
1704         if (++Gap == NumGaps)
1705           break;
1706       if (Gap == NumGaps)
1707         break;
1708 
1709       for (; Gap != NumGaps; ++Gap) {
1710         GapWeight[Gap] = llvm::huge_valf;
1711         if (Uses[Gap+1].getBaseIndex() >= I->end)
1712           break;
1713       }
1714       if (Gap == NumGaps)
1715         break;
1716     }
1717   }
1718 }
1719 
1720 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1721 /// basic block.
1722 ///
1723 unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
1724                                  SmallVectorImpl<unsigned> &NewVRegs) {
1725   assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1726   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1727 
1728   // Note that it is possible to have an interval that is live-in or live-out
1729   // while only covering a single block - A phi-def can use undef values from
1730   // predecessors, and the block could be a single-block loop.
1731   // We don't bother doing anything clever about such a case, we simply assume
1732   // that the interval is continuous from FirstInstr to LastInstr. We should
1733   // make sure that we don't do anything illegal to such an interval, though.
1734 
1735   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1736   if (Uses.size() <= 2)
1737     return 0;
1738   const unsigned NumGaps = Uses.size()-1;
1739 
1740   DEBUG({
1741     dbgs() << "tryLocalSplit: ";
1742     for (unsigned i = 0, e = Uses.size(); i != e; ++i)
1743       dbgs() << ' ' << Uses[i];
1744     dbgs() << '\n';
1745   });
1746 
1747   // If VirtReg is live across any register mask operands, compute a list of
1748   // gaps with register masks.
1749   SmallVector<unsigned, 8> RegMaskGaps;
1750   if (Matrix->checkRegMaskInterference(VirtReg)) {
1751     // Get regmask slots for the whole block.
1752     ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
1753     DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1754     // Constrain to VirtReg's live range.
1755     unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
1756                                    Uses.front().getRegSlot()) - RMS.begin();
1757     unsigned re = RMS.size();
1758     for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
1759       // Look for Uses[i] <= RMS <= Uses[i+1].
1760       assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i]));
1761       if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri]))
1762         continue;
1763       // Skip a regmask on the same instruction as the last use. It doesn't
1764       // overlap the live range.
1765       if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)
1766         break;
1767       DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]);
1768       RegMaskGaps.push_back(i);
1769       // Advance ri to the next gap. A regmask on one of the uses counts in
1770       // both gaps.
1771       while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))
1772         ++ri;
1773     }
1774     DEBUG(dbgs() << '\n');
1775   }
1776 
1777   // Since we allow local split results to be split again, there is a risk of
1778   // creating infinite loops. It is tempting to require that the new live
1779   // ranges have less instructions than the original. That would guarantee
1780   // convergence, but it is too strict. A live range with 3 instructions can be
1781   // split 2+3 (including the COPY), and we want to allow that.
1782   //
1783   // Instead we use these rules:
1784   //
1785   // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1786   //    noop split, of course).
1787   // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1788   //    the new ranges must have fewer instructions than before the split.
1789   // 3. New ranges with the same number of instructions are marked RS_Split2,
1790   //    smaller ranges are marked RS_New.
1791   //
1792   // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1793   // excessive splitting and infinite loops.
1794   //
1795   bool ProgressRequired = getStage(VirtReg) >= RS_Split2;
1796 
1797   // Best split candidate.
1798   unsigned BestBefore = NumGaps;
1799   unsigned BestAfter = 0;
1800   float BestDiff = 0;
1801 
1802   const float blockFreq =
1803     SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1804     (1.0f / MBFI->getEntryFreq());
1805   SmallVector<float, 8> GapWeight;
1806 
1807   Order.rewind();
1808   while (unsigned PhysReg = Order.next()) {
1809     // Keep track of the largest spill weight that would need to be evicted in
1810     // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
1811     calcGapWeights(PhysReg, GapWeight);
1812 
1813     // Remove any gaps with regmask clobbers.
1814     if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1815       for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
1816         GapWeight[RegMaskGaps[i]] = llvm::huge_valf;
1817 
1818     // Try to find the best sequence of gaps to close.
1819     // The new spill weight must be larger than any gap interference.
1820 
1821     // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1822     unsigned SplitBefore = 0, SplitAfter = 1;
1823 
1824     // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1825     // It is the spill weight that needs to be evicted.
1826     float MaxGap = GapWeight[0];
1827 
1828     for (;;) {
1829       // Live before/after split?
1830       const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1831       const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1832 
1833       DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
1834                    << Uses[SplitBefore] << '-' << Uses[SplitAfter]
1835                    << " i=" << MaxGap);
1836 
1837       // Stop before the interval gets so big we wouldn't be making progress.
1838       if (!LiveBefore && !LiveAfter) {
1839         DEBUG(dbgs() << " all\n");
1840         break;
1841       }
1842       // Should the interval be extended or shrunk?
1843       bool Shrink = true;
1844 
1845       // How many gaps would the new range have?
1846       unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1847 
1848       // Legally, without causing looping?
1849       bool Legal = !ProgressRequired || NewGaps < NumGaps;
1850 
1851       if (Legal && MaxGap < llvm::huge_valf) {
1852         // Estimate the new spill weight. Each instruction reads or writes the
1853         // register. Conservatively assume there are no read-modify-write
1854         // instructions.
1855         //
1856         // Try to guess the size of the new interval.
1857         const float EstWeight = normalizeSpillWeight(
1858             blockFreq * (NewGaps + 1),
1859             Uses[SplitBefore].distance(Uses[SplitAfter]) +
1860                 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1861             1);
1862         // Would this split be possible to allocate?
1863         // Never allocate all gaps, we wouldn't be making progress.
1864         DEBUG(dbgs() << " w=" << EstWeight);
1865         if (EstWeight * Hysteresis >= MaxGap) {
1866           Shrink = false;
1867           float Diff = EstWeight - MaxGap;
1868           if (Diff > BestDiff) {
1869             DEBUG(dbgs() << " (best)");
1870             BestDiff = Hysteresis * Diff;
1871             BestBefore = SplitBefore;
1872             BestAfter = SplitAfter;
1873           }
1874         }
1875       }
1876 
1877       // Try to shrink.
1878       if (Shrink) {
1879         if (++SplitBefore < SplitAfter) {
1880           DEBUG(dbgs() << " shrink\n");
1881           // Recompute the max when necessary.
1882           if (GapWeight[SplitBefore - 1] >= MaxGap) {
1883             MaxGap = GapWeight[SplitBefore];
1884             for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
1885               MaxGap = std::max(MaxGap, GapWeight[i]);
1886           }
1887           continue;
1888         }
1889         MaxGap = 0;
1890       }
1891 
1892       // Try to extend the interval.
1893       if (SplitAfter >= NumGaps) {
1894         DEBUG(dbgs() << " end\n");
1895         break;
1896       }
1897 
1898       DEBUG(dbgs() << " extend\n");
1899       MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1900     }
1901   }
1902 
1903   // Didn't find any candidates?
1904   if (BestBefore == NumGaps)
1905     return 0;
1906 
1907   DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
1908                << '-' << Uses[BestAfter] << ", " << BestDiff
1909                << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
1910 
1911   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
1912   SE->reset(LREdit);
1913 
1914   SE->openIntv();
1915   SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1916   SlotIndex SegStop  = SE->leaveIntvAfter(Uses[BestAfter]);
1917   SE->useIntv(SegStart, SegStop);
1918   SmallVector<unsigned, 8> IntvMap;
1919   SE->finish(&IntvMap);
1920   DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
1921 
1922   // If the new range has the same number of instructions as before, mark it as
1923   // RS_Split2 so the next split will be forced to make progress. Otherwise,
1924   // leave the new intervals as RS_New so they can compete.
1925   bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1926   bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1927   unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1928   if (NewGaps >= NumGaps) {
1929     DEBUG(dbgs() << "Tagging non-progress ranges: ");
1930     assert(!ProgressRequired && "Didn't make progress when it was required.");
1931     for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
1932       if (IntvMap[i] == 1) {
1933         setStage(LIS->getInterval(LREdit.get(i)), RS_Split2);
1934         DEBUG(dbgs() << PrintReg(LREdit.get(i)));
1935       }
1936     DEBUG(dbgs() << '\n');
1937   }
1938   ++NumLocalSplits;
1939 
1940   return 0;
1941 }
1942 
1943 //===----------------------------------------------------------------------===//
1944 //                          Live Range Splitting
1945 //===----------------------------------------------------------------------===//
1946 
1947 /// trySplit - Try to split VirtReg or one of its interferences, making it
1948 /// assignable.
1949 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
1950 unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
1951                             SmallVectorImpl<unsigned>&NewVRegs) {
1952   // Ranges must be Split2 or less.
1953   if (getStage(VirtReg) >= RS_Spill)
1954     return 0;
1955 
1956   // Local intervals are handled separately.
1957   if (LIS->intervalIsInOneMBB(VirtReg)) {
1958     NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
1959     SA->analyze(&VirtReg);
1960     unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1961     if (PhysReg || !NewVRegs.empty())
1962       return PhysReg;
1963     return tryInstructionSplit(VirtReg, Order, NewVRegs);
1964   }
1965 
1966   NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
1967 
1968   SA->analyze(&VirtReg);
1969 
1970   // FIXME: SplitAnalysis may repair broken live ranges coming from the
1971   // coalescer. That may cause the range to become allocatable which means that
1972   // tryRegionSplit won't be making progress. This check should be replaced with
1973   // an assertion when the coalescer is fixed.
1974   if (SA->didRepairRange()) {
1975     // VirtReg has changed, so all cached queries are invalid.
1976     Matrix->invalidateVirtRegs();
1977     if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
1978       return PhysReg;
1979   }
1980 
1981   // First try to split around a region spanning multiple blocks. RS_Split2
1982   // ranges already made dubious progress with region splitting, so they go
1983   // straight to single block splitting.
1984   if (getStage(VirtReg) < RS_Split2) {
1985     unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1986     if (PhysReg || !NewVRegs.empty())
1987       return PhysReg;
1988   }
1989 
1990   // Then isolate blocks.
1991   return tryBlockSplit(VirtReg, Order, NewVRegs);
1992 }
1993 
1994 //===----------------------------------------------------------------------===//
1995 //                          Last Chance Recoloring
1996 //===----------------------------------------------------------------------===//
1997 
1998 /// mayRecolorAllInterferences - Check if the virtual registers that
1999 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
2000 /// recolored to free \p PhysReg.
2001 /// When true is returned, \p RecoloringCandidates has been augmented with all
2002 /// the live intervals that need to be recolored in order to free \p PhysReg
2003 /// for \p VirtReg.
2004 /// \p FixedRegisters contains all the virtual registers that cannot be
2005 /// recolored.
2006 bool
2007 RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
2008                                      SmallLISet &RecoloringCandidates,
2009                                      const SmallVirtRegSet &FixedRegisters) {
2010   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
2011 
2012   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
2013     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
2014     // If there is LastChanceRecoloringMaxInterference or more interferences,
2015     // chances are one would not be recolorable.
2016     if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >=
2017         LastChanceRecoloringMaxInterference && !ExhaustiveSearch) {
2018       DEBUG(dbgs() << "Early abort: too many interferences.\n");
2019       CutOffInfo |= CO_Interf;
2020       return false;
2021     }
2022     for (unsigned i = Q.interferingVRegs().size(); i; --i) {
2023       LiveInterval *Intf = Q.interferingVRegs()[i - 1];
2024       // If Intf is done and sit on the same register class as VirtReg,
2025       // it would not be recolorable as it is in the same state as VirtReg.
2026       if ((getStage(*Intf) == RS_Done &&
2027            MRI->getRegClass(Intf->reg) == CurRC) ||
2028           FixedRegisters.count(Intf->reg)) {
2029         DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n");
2030         return false;
2031       }
2032       RecoloringCandidates.insert(Intf);
2033     }
2034   }
2035   return true;
2036 }
2037 
2038 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
2039 /// its interferences.
2040 /// Last chance recoloring chooses a color for \p VirtReg and recolors every
2041 /// virtual register that was using it. The recoloring process may recursively
2042 /// use the last chance recoloring. Therefore, when a virtual register has been
2043 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
2044 /// be last-chance-recolored again during this recoloring "session".
2045 /// E.g.,
2046 /// Let
2047 /// vA can use {R1, R2    }
2048 /// vB can use {    R2, R3}
2049 /// vC can use {R1        }
2050 /// Where vA, vB, and vC cannot be split anymore (they are reloads for
2051 /// instance) and they all interfere.
2052 ///
2053 /// vA is assigned R1
2054 /// vB is assigned R2
2055 /// vC tries to evict vA but vA is already done.
2056 /// Regular register allocation fails.
2057 ///
2058 /// Last chance recoloring kicks in:
2059 /// vC does as if vA was evicted => vC uses R1.
2060 /// vC is marked as fixed.
2061 /// vA needs to find a color.
2062 /// None are available.
2063 /// vA cannot evict vC: vC is a fixed virtual register now.
2064 /// vA does as if vB was evicted => vA uses R2.
2065 /// vB needs to find a color.
2066 /// R3 is available.
2067 /// Recoloring => vC = R1, vA = R2, vB = R3
2068 ///
2069 /// \p Order defines the preferred allocation order for \p VirtReg.
2070 /// \p NewRegs will contain any new virtual register that have been created
2071 /// (split, spill) during the process and that must be assigned.
2072 /// \p FixedRegisters contains all the virtual registers that cannot be
2073 /// recolored.
2074 /// \p Depth gives the current depth of the last chance recoloring.
2075 /// \return a physical register that can be used for VirtReg or ~0u if none
2076 /// exists.
2077 unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
2078                                            AllocationOrder &Order,
2079                                            SmallVectorImpl<unsigned> &NewVRegs,
2080                                            SmallVirtRegSet &FixedRegisters,
2081                                            unsigned Depth) {
2082   DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
2083   // Ranges must be Done.
2084   assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
2085          "Last chance recoloring should really be last chance");
2086   // Set the max depth to LastChanceRecoloringMaxDepth.
2087   // We may want to reconsider that if we end up with a too large search space
2088   // for target with hundreds of registers.
2089   // Indeed, in that case we may want to cut the search space earlier.
2090   if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
2091     DEBUG(dbgs() << "Abort because max depth has been reached.\n");
2092     CutOffInfo |= CO_Depth;
2093     return ~0u;
2094   }
2095 
2096   // Set of Live intervals that will need to be recolored.
2097   SmallLISet RecoloringCandidates;
2098   // Record the original mapping virtual register to physical register in case
2099   // the recoloring fails.
2100   DenseMap<unsigned, unsigned> VirtRegToPhysReg;
2101   // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
2102   // this recoloring "session".
2103   FixedRegisters.insert(VirtReg.reg);
2104 
2105   Order.rewind();
2106   while (unsigned PhysReg = Order.next()) {
2107     DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
2108                  << PrintReg(PhysReg, TRI) << '\n');
2109     RecoloringCandidates.clear();
2110     VirtRegToPhysReg.clear();
2111 
2112     // It is only possible to recolor virtual register interference.
2113     if (Matrix->checkInterference(VirtReg, PhysReg) >
2114         LiveRegMatrix::IK_VirtReg) {
2115       DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n");
2116 
2117       continue;
2118     }
2119 
2120     // Early give up on this PhysReg if it is obvious we cannot recolor all
2121     // the interferences.
2122     if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
2123                                     FixedRegisters)) {
2124       DEBUG(dbgs() << "Some inteferences cannot be recolored.\n");
2125       continue;
2126     }
2127 
2128     // RecoloringCandidates contains all the virtual registers that interfer
2129     // with VirtReg on PhysReg (or one of its aliases).
2130     // Enqueue them for recoloring and perform the actual recoloring.
2131     PQueue RecoloringQueue;
2132     for (SmallLISet::iterator It = RecoloringCandidates.begin(),
2133                               EndIt = RecoloringCandidates.end();
2134          It != EndIt; ++It) {
2135       unsigned ItVirtReg = (*It)->reg;
2136       enqueue(RecoloringQueue, *It);
2137       assert(VRM->hasPhys(ItVirtReg) &&
2138              "Interferences are supposed to be with allocated vairables");
2139 
2140       // Record the current allocation.
2141       VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg);
2142       // unset the related struct.
2143       Matrix->unassign(**It);
2144     }
2145 
2146     // Do as if VirtReg was assigned to PhysReg so that the underlying
2147     // recoloring has the right information about the interferes and
2148     // available colors.
2149     Matrix->assign(VirtReg, PhysReg);
2150 
2151     // Save the current recoloring state.
2152     // If we cannot recolor all the interferences, we will have to start again
2153     // at this point for the next physical register.
2154     SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
2155     if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters,
2156                                 Depth)) {
2157       // Do not mess up with the global assignment process.
2158       // I.e., VirtReg must be unassigned.
2159       Matrix->unassign(VirtReg);
2160       return PhysReg;
2161     }
2162 
2163     DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
2164                  << PrintReg(PhysReg, TRI) << '\n');
2165 
2166     // The recoloring attempt failed, undo the changes.
2167     FixedRegisters = SaveFixedRegisters;
2168     Matrix->unassign(VirtReg);
2169 
2170     for (SmallLISet::iterator It = RecoloringCandidates.begin(),
2171                               EndIt = RecoloringCandidates.end();
2172          It != EndIt; ++It) {
2173       unsigned ItVirtReg = (*It)->reg;
2174       if (VRM->hasPhys(ItVirtReg))
2175         Matrix->unassign(**It);
2176       unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg];
2177       Matrix->assign(**It, ItPhysReg);
2178     }
2179   }
2180 
2181   // Last chance recoloring did not worked either, give up.
2182   return ~0u;
2183 }
2184 
2185 /// tryRecoloringCandidates - Try to assign a new color to every register
2186 /// in \RecoloringQueue.
2187 /// \p NewRegs will contain any new virtual register created during the
2188 /// recoloring process.
2189 /// \p FixedRegisters[in/out] contains all the registers that have been
2190 /// recolored.
2191 /// \return true if all virtual registers in RecoloringQueue were successfully
2192 /// recolored, false otherwise.
2193 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2194                                        SmallVectorImpl<unsigned> &NewVRegs,
2195                                        SmallVirtRegSet &FixedRegisters,
2196                                        unsigned Depth) {
2197   while (!RecoloringQueue.empty()) {
2198     LiveInterval *LI = dequeue(RecoloringQueue);
2199     DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2200     unsigned PhysReg;
2201     PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
2202     if (PhysReg == ~0u || !PhysReg)
2203       return false;
2204     DEBUG(dbgs() << "Recoloring of " << *LI
2205                  << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n');
2206     Matrix->assign(*LI, PhysReg);
2207     FixedRegisters.insert(LI->reg);
2208   }
2209   return true;
2210 }
2211 
2212 //===----------------------------------------------------------------------===//
2213 //                            Main Entry Point
2214 //===----------------------------------------------------------------------===//
2215 
2216 unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
2217                                  SmallVectorImpl<unsigned> &NewVRegs) {
2218   CutOffInfo = CO_None;
2219   LLVMContext &Ctx = MF->getFunction()->getContext();
2220   SmallVirtRegSet FixedRegisters;
2221   unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
2222   if (Reg == ~0U && (CutOffInfo != CO_None)) {
2223     uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2224     if (CutOffEncountered == CO_Depth)
2225       Ctx.emitError("register allocation failed: maximum depth for recoloring "
2226                     "reached. Use -fexhaustive-register-search to skip "
2227                     "cutoffs");
2228     else if (CutOffEncountered == CO_Interf)
2229       Ctx.emitError("register allocation failed: maximum interference for "
2230                     "recoloring reached. Use -fexhaustive-register-search "
2231                     "to skip cutoffs");
2232     else if (CutOffEncountered == (CO_Depth | CO_Interf))
2233       Ctx.emitError("register allocation failed: maximum interference and "
2234                     "depth for recoloring reached. Use "
2235                     "-fexhaustive-register-search to skip cutoffs");
2236   }
2237   return Reg;
2238 }
2239 
2240 /// Using a CSR for the first time has a cost because it causes push|pop
2241 /// to be added to prologue|epilogue. Splitting a cold section of the live
2242 /// range can have lower cost than using the CSR for the first time;
2243 /// Spilling a live range in the cold path can have lower cost than using
2244 /// the CSR for the first time. Returns the physical register if we decide
2245 /// to use the CSR; otherwise return 0.
2246 unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg,
2247                                          AllocationOrder &Order,
2248                                          unsigned PhysReg,
2249                                          unsigned &CostPerUseLimit,
2250                                          SmallVectorImpl<unsigned> &NewVRegs) {
2251   if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2252     // We choose spill over using the CSR for the first time if the spill cost
2253     // is lower than CSRCost.
2254     SA->analyze(&VirtReg);
2255     if (calcSpillCost() >= CSRCost)
2256       return PhysReg;
2257 
2258     // We are going to spill, set CostPerUseLimit to 1 to make sure that
2259     // we will not use a callee-saved register in tryEvict.
2260     CostPerUseLimit = 1;
2261     return 0;
2262   }
2263   if (getStage(VirtReg) < RS_Split) {
2264     // We choose pre-splitting over using the CSR for the first time if
2265     // the cost of splitting is lower than CSRCost.
2266     SA->analyze(&VirtReg);
2267     unsigned NumCands = 0;
2268     BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2269     unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2270                                                  NumCands, true /*IgnoreCSR*/);
2271     if (BestCand == NoCand)
2272       // Use the CSR if we can't find a region split below CSRCost.
2273       return PhysReg;
2274 
2275     // Perform the actual pre-splitting.
2276     doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2277     return 0;
2278   }
2279   return PhysReg;
2280 }
2281 
2282 void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) {
2283   // Do not keep invalid information around.
2284   SetOfBrokenHints.remove(&LI);
2285 }
2286 
2287 void RAGreedy::initializeCSRCost() {
2288   // We use the larger one out of the command-line option and the value report
2289   // by TRI.
2290   CSRCost = BlockFrequency(
2291       std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2292   if (!CSRCost.getFrequency())
2293     return;
2294 
2295   // Raw cost is relative to Entry == 2^14; scale it appropriately.
2296   uint64_t ActualEntry = MBFI->getEntryFreq();
2297   if (!ActualEntry) {
2298     CSRCost = 0;
2299     return;
2300   }
2301   uint64_t FixedEntry = 1 << 14;
2302   if (ActualEntry < FixedEntry)
2303     CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2304   else if (ActualEntry <= UINT32_MAX)
2305     // Invert the fraction and divide.
2306     CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2307   else
2308     // Can't use BranchProbability in general, since it takes 32-bit numbers.
2309     CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
2310 }
2311 
2312 /// \brief Collect the hint info for \p Reg.
2313 /// The results are stored into \p Out.
2314 /// \p Out is not cleared before being populated.
2315 void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
2316   for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2317     if (!Instr.isFullCopy())
2318       continue;
2319     // Look for the other end of the copy.
2320     unsigned OtherReg = Instr.getOperand(0).getReg();
2321     if (OtherReg == Reg) {
2322       OtherReg = Instr.getOperand(1).getReg();
2323       if (OtherReg == Reg)
2324         continue;
2325     }
2326     // Get the current assignment.
2327     unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
2328                                 ? OtherReg
2329                                 : VRM->getPhys(OtherReg);
2330     // Push the collected information.
2331     Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2332                            OtherPhysReg));
2333   }
2334 }
2335 
2336 /// \brief Using the given \p List, compute the cost of the broken hints if
2337 /// \p PhysReg was used.
2338 /// \return The cost of \p List for \p PhysReg.
2339 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2340                                            unsigned PhysReg) {
2341   BlockFrequency Cost = 0;
2342   for (const HintInfo &Info : List) {
2343     if (Info.PhysReg != PhysReg)
2344       Cost += Info.Freq;
2345   }
2346   return Cost;
2347 }
2348 
2349 /// \brief Using the register assigned to \p VirtReg, try to recolor
2350 /// all the live ranges that are copy-related with \p VirtReg.
2351 /// The recoloring is then propagated to all the live-ranges that have
2352 /// been recolored and so on, until no more copies can be coalesced or
2353 /// it is not profitable.
2354 /// For a given live range, profitability is determined by the sum of the
2355 /// frequencies of the non-identity copies it would introduce with the old
2356 /// and new register.
2357 void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
2358   // We have a broken hint, check if it is possible to fix it by
2359   // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2360   // some register and PhysReg may be available for the other live-ranges.
2361   SmallSet<unsigned, 4> Visited;
2362   SmallVector<unsigned, 2> RecoloringCandidates;
2363   HintsInfo Info;
2364   unsigned Reg = VirtReg.reg;
2365   unsigned PhysReg = VRM->getPhys(Reg);
2366   // Start the recoloring algorithm from the input live-interval, then
2367   // it will propagate to the ones that are copy-related with it.
2368   Visited.insert(Reg);
2369   RecoloringCandidates.push_back(Reg);
2370 
2371   DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '('
2372                << PrintReg(PhysReg, TRI) << ")\n");
2373 
2374   do {
2375     Reg = RecoloringCandidates.pop_back_val();
2376 
2377     // We cannot recolor physcal register.
2378     if (TargetRegisterInfo::isPhysicalRegister(Reg))
2379       continue;
2380 
2381     assert(VRM->hasPhys(Reg) && "We have unallocated variable!!");
2382 
2383     // Get the live interval mapped with this virtual register to be able
2384     // to check for the interference with the new color.
2385     LiveInterval &LI = LIS->getInterval(Reg);
2386     unsigned CurrPhys = VRM->getPhys(Reg);
2387     // Check that the new color matches the register class constraints and
2388     // that it is free for this live range.
2389     if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2390                                 Matrix->checkInterference(LI, PhysReg)))
2391       continue;
2392 
2393     DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI)
2394                  << ") is recolorable.\n");
2395 
2396     // Gather the hint info.
2397     Info.clear();
2398     collectHintInfo(Reg, Info);
2399     // Check if recoloring the live-range will increase the cost of the
2400     // non-identity copies.
2401     if (CurrPhys != PhysReg) {
2402       DEBUG(dbgs() << "Checking profitability:\n");
2403       BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2404       BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2405       DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
2406                    << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n');
2407       if (OldCopiesCost < NewCopiesCost) {
2408         DEBUG(dbgs() << "=> Not profitable.\n");
2409         continue;
2410       }
2411       // At this point, the cost is either cheaper or equal. If it is
2412       // equal, we consider this is profitable because it may expose
2413       // more recoloring opportunities.
2414       DEBUG(dbgs() << "=> Profitable.\n");
2415       // Recolor the live-range.
2416       Matrix->unassign(LI);
2417       Matrix->assign(LI, PhysReg);
2418     }
2419     // Push all copy-related live-ranges to keep reconciling the broken
2420     // hints.
2421     for (const HintInfo &HI : Info) {
2422       if (Visited.insert(HI.Reg).second)
2423         RecoloringCandidates.push_back(HI.Reg);
2424     }
2425   } while (!RecoloringCandidates.empty());
2426 }
2427 
2428 /// \brief Try to recolor broken hints.
2429 /// Broken hints may be repaired by recoloring when an evicted variable
2430 /// freed up a register for a larger live-range.
2431 /// Consider the following example:
2432 /// BB1:
2433 ///   a =
2434 ///   b =
2435 /// BB2:
2436 ///   ...
2437 ///   = b
2438 ///   = a
2439 /// Let us assume b gets split:
2440 /// BB1:
2441 ///   a =
2442 ///   b =
2443 /// BB2:
2444 ///   c = b
2445 ///   ...
2446 ///   d = c
2447 ///   = d
2448 ///   = a
2449 /// Because of how the allocation work, b, c, and d may be assigned different
2450 /// colors. Now, if a gets evicted later:
2451 /// BB1:
2452 ///   a =
2453 ///   st a, SpillSlot
2454 ///   b =
2455 /// BB2:
2456 ///   c = b
2457 ///   ...
2458 ///   d = c
2459 ///   = d
2460 ///   e = ld SpillSlot
2461 ///   = e
2462 /// This is likely that we can assign the same register for b, c, and d,
2463 /// getting rid of 2 copies.
2464 void RAGreedy::tryHintsRecoloring() {
2465   for (LiveInterval *LI : SetOfBrokenHints) {
2466     assert(TargetRegisterInfo::isVirtualRegister(LI->reg) &&
2467            "Recoloring is possible only for virtual registers");
2468     // Some dead defs may be around (e.g., because of debug uses).
2469     // Ignore those.
2470     if (!VRM->hasPhys(LI->reg))
2471       continue;
2472     tryHintRecoloring(*LI);
2473   }
2474 }
2475 
2476 unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
2477                                      SmallVectorImpl<unsigned> &NewVRegs,
2478                                      SmallVirtRegSet &FixedRegisters,
2479                                      unsigned Depth) {
2480   unsigned CostPerUseLimit = ~0u;
2481   // First try assigning a free register.
2482   AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
2483   if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
2484     // When NewVRegs is not empty, we may have made decisions such as evicting
2485     // a virtual register, go with the earlier decisions and use the physical
2486     // register.
2487     if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) &&
2488         NewVRegs.empty()) {
2489       unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2490                                               CostPerUseLimit, NewVRegs);
2491       if (CSRReg || !NewVRegs.empty())
2492         // Return now if we decide to use a CSR or create new vregs due to
2493         // pre-splitting.
2494         return CSRReg;
2495     } else
2496       return PhysReg;
2497   }
2498 
2499   LiveRangeStage Stage = getStage(VirtReg);
2500   DEBUG(dbgs() << StageName[Stage]
2501                << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
2502 
2503   // Try to evict a less worthy live range, but only for ranges from the primary
2504   // queue. The RS_Split ranges already failed to do this, and they should not
2505   // get a second chance until they have been split.
2506   if (Stage != RS_Split)
2507     if (unsigned PhysReg =
2508             tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) {
2509       unsigned Hint = MRI->getSimpleHint(VirtReg.reg);
2510       // If VirtReg has a hint and that hint is broken record this
2511       // virtual register as a recoloring candidate for broken hint.
2512       // Indeed, since we evicted a variable in its neighborhood it is
2513       // likely we can at least partially recolor some of the
2514       // copy-related live-ranges.
2515       if (Hint && Hint != PhysReg)
2516         SetOfBrokenHints.insert(&VirtReg);
2517       return PhysReg;
2518     }
2519 
2520   assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
2521 
2522   // The first time we see a live range, don't try to split or spill.
2523   // Wait until the second time, when all smaller ranges have been allocated.
2524   // This gives a better picture of the interference to split around.
2525   if (Stage < RS_Split) {
2526     setStage(VirtReg, RS_Split);
2527     DEBUG(dbgs() << "wait for second round\n");
2528     NewVRegs.push_back(VirtReg.reg);
2529     return 0;
2530   }
2531 
2532   // If we couldn't allocate a register from spilling, there is probably some
2533   // invalid inline assembly. The base class wil report it.
2534   if (Stage >= RS_Done || !VirtReg.isSpillable())
2535     return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2536                                    Depth);
2537 
2538   // Try splitting VirtReg or interferences.
2539   unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
2540   if (PhysReg || !NewVRegs.empty())
2541     return PhysReg;
2542 
2543   // Finally spill VirtReg itself.
2544   if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) {
2545     // TODO: This is experimental and in particular, we do not model
2546     // the live range splitting done by spilling correctly.
2547     // We would need a deep integration with the spiller to do the
2548     // right thing here. Anyway, that is still good for early testing.
2549     setStage(VirtReg, RS_Memory);
2550     DEBUG(dbgs() << "Do as if this register is in memory\n");
2551     NewVRegs.push_back(VirtReg.reg);
2552   } else {
2553     NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
2554     LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
2555     spiller().spill(LRE);
2556     setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2557 
2558     if (VerifyEnabled)
2559       MF->verify(this, "After spilling");
2560   }
2561 
2562   // The live virtual register requesting allocation was spilled, so tell
2563   // the caller not to allocate anything during this round.
2564   return 0;
2565 }
2566 
2567 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
2568   DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2569                << "********** Function: " << mf.getName() << '\n');
2570 
2571   MF = &mf;
2572   TRI = MF->getSubtarget().getRegisterInfo();
2573   TII = MF->getSubtarget().getInstrInfo();
2574   RCI.runOnMachineFunction(mf);
2575 
2576   EnableLocalReassign = EnableLocalReassignment ||
2577                         MF->getSubtarget().enableRALocalReassignment(
2578                             MF->getTarget().getOptLevel());
2579 
2580   if (VerifyEnabled)
2581     MF->verify(this, "Before greedy register allocator");
2582 
2583   RegAllocBase::init(getAnalysis<VirtRegMap>(),
2584                      getAnalysis<LiveIntervals>(),
2585                      getAnalysis<LiveRegMatrix>());
2586   Indexes = &getAnalysis<SlotIndexes>();
2587   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
2588   DomTree = &getAnalysis<MachineDominatorTree>();
2589   SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
2590   Loops = &getAnalysis<MachineLoopInfo>();
2591   Bundles = &getAnalysis<EdgeBundles>();
2592   SpillPlacer = &getAnalysis<SpillPlacement>();
2593   DebugVars = &getAnalysis<LiveDebugVariables>();
2594 
2595   initializeCSRCost();
2596 
2597   calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI);
2598 
2599   DEBUG(LIS->dump());
2600 
2601   SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2602   SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI));
2603   ExtraRegInfo.clear();
2604   ExtraRegInfo.resize(MRI->getNumVirtRegs());
2605   NextCascade = 1;
2606   IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2607   GlobalCand.resize(32);  // This will grow as needed.
2608   SetOfBrokenHints.clear();
2609 
2610   allocatePhysRegs();
2611   tryHintsRecoloring();
2612   releaseMemory();
2613   return true;
2614 }
2615