1 //===-- StackColoring.cpp -------------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements the stack-coloring optimization that looks for
11 // lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
12 // which represent the possible lifetime of stack slots. It attempts to
13 // merge disjoint stack slots and reduce the used stack space.
14 // NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
15 //
16 // TODO: In the future we plan to improve stack coloring in the following ways:
17 // 1. Allow merging multiple small slots into a single larger slot at different
18 //    offsets.
19 // 2. Merge this pass with StackSlotColoring and allow merging of allocas with
20 //    spill slots.
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/DepthFirstIterator.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/CodeGen/LiveInterval.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineFrameInfo.h"
33 #include "llvm/CodeGen/MachineFunctionPass.h"
34 #include "llvm/CodeGen/MachineLoopInfo.h"
35 #include "llvm/CodeGen/MachineMemOperand.h"
36 #include "llvm/CodeGen/MachineModuleInfo.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/Passes.h"
39 #include "llvm/CodeGen/PseudoSourceValue.h"
40 #include "llvm/CodeGen/SelectionDAGNodes.h"
41 #include "llvm/CodeGen/SlotIndexes.h"
42 #include "llvm/CodeGen/StackProtector.h"
43 #include "llvm/CodeGen/WinEHFuncInfo.h"
44 #include "llvm/IR/DebugInfo.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/Debug.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include "llvm/Target/TargetInstrInfo.h"
53 #include "llvm/Target/TargetRegisterInfo.h"
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "stack-coloring"
58 
59 static cl::opt<bool>
60 DisableColoring("no-stack-coloring",
61         cl::init(false), cl::Hidden,
62         cl::desc("Disable stack coloring"));
63 
64 /// The user may write code that uses allocas outside of the declared lifetime
65 /// zone. This can happen when the user returns a reference to a local
66 /// data-structure. We can detect these cases and decide not to optimize the
67 /// code. If this flag is enabled, we try to save the user. This option
68 /// is treated as overriding LifetimeStartOnFirstUse below.
69 static cl::opt<bool>
70 ProtectFromEscapedAllocas("protect-from-escaped-allocas",
71                           cl::init(false), cl::Hidden,
72                           cl::desc("Do not optimize lifetime zones that "
73                                    "are broken"));
74 
75 /// Enable enhanced dataflow scheme for lifetime analysis (treat first
76 /// use of stack slot as start of slot lifetime, as opposed to looking
77 /// for LIFETIME_START marker). See "Implementation notes" below for
78 /// more info.
79 static cl::opt<bool>
80 LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
81         cl::init(true), cl::Hidden,
82         cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
83 
84 
85 STATISTIC(NumMarkerSeen,  "Number of lifetime markers found.");
86 STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
87 STATISTIC(StackSlotMerged, "Number of stack slot merged.");
88 STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
89 
90 //===----------------------------------------------------------------------===//
91 //                           StackColoring Pass
92 //===----------------------------------------------------------------------===//
93 //
94 // Stack Coloring reduces stack usage by merging stack slots when they
95 // can't be used together. For example, consider the following C program:
96 //
97 //     void bar(char *, int);
98 //     void foo(bool var) {
99 //         A: {
100 //             char z[4096];
101 //             bar(z, 0);
102 //         }
103 //
104 //         char *p;
105 //         char x[4096];
106 //         char y[4096];
107 //         if (var) {
108 //             p = x;
109 //         } else {
110 //             bar(y, 1);
111 //             p = y + 1024;
112 //         }
113 //     B:
114 //         bar(p, 2);
115 //     }
116 //
117 // Naively-compiled, this program would use 12k of stack space. However, the
118 // stack slot corresponding to `z` is always destroyed before either of the
119 // stack slots for `x` or `y` are used, and then `x` is only used if `var`
120 // is true, while `y` is only used if `var` is false. So in no time are 2
121 // of the stack slots used together, and therefore we can merge them,
122 // compiling the function using only a single 4k alloca:
123 //
124 //     void foo(bool var) { // equivalent
125 //         char x[4096];
126 //         char *p;
127 //         bar(x, 0);
128 //         if (var) {
129 //             p = x;
130 //         } else {
131 //             bar(x, 1);
132 //             p = x + 1024;
133 //         }
134 //         bar(p, 2);
135 //     }
136 //
137 // This is an important optimization if we want stack space to be under
138 // control in large functions, both open-coded ones and ones created by
139 // inlining.
140 //
141 // Implementation Notes:
142 // ---------------------
143 //
144 // An important part of the above reasoning is that `z` can't be accessed
145 // while the latter 2 calls to `bar` are running. This is justified because
146 // `z`'s lifetime is over after we exit from block `A:`, so any further
147 // accesses to it would be UB. The way we represent this information
148 // in LLVM is by having frontends delimit blocks with `lifetime.start`
149 // and `lifetime.end` intrinsics.
150 //
151 // The effect of these intrinsics seems to be as follows (maybe I should
152 // specify this in the reference?):
153 //
154 //   L1) at start, each stack-slot is marked as *out-of-scope*, unless no
155 //   lifetime intrinsic refers to that stack slot, in which case
156 //   it is marked as *in-scope*.
157 //   L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
158 //   the stack slot is overwritten with `undef`.
159 //   L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
160 //   L4) on function exit, all stack slots are marked as *out-of-scope*.
161 //   L5) `lifetime.end` is a no-op when called on a slot that is already
162 //   *out-of-scope*.
163 //   L6) memory accesses to *out-of-scope* stack slots are UB.
164 //   L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
165 //   are invalidated, unless the slot is "degenerate". This is used to
166 //   justify not marking slots as in-use until the pointer to them is
167 //   used, but feels a bit hacky in the presence of things like LICM. See
168 //   the "Degenerate Slots" section for more details.
169 //
170 // Now, let's ground stack coloring on these rules. We'll define a slot
171 // as *in-use* at a (dynamic) point in execution if it either can be
172 // written to at that point, or if it has a live and non-undef content
173 // at that point.
174 //
175 // Obviously, slots that are never *in-use* together can be merged, and
176 // in our example `foo`, the slots for `x`, `y` and `z` are never
177 // in-use together (of course, sometimes slots that *are* in-use together
178 // might still be mergable, but we don't care about that here).
179 //
180 // In this implementation, we successively merge pairs of slots that are
181 // not *in-use* together. We could be smarter - for example, we could merge
182 // a single large slot with 2 small slots, or we could construct the
183 // interference graph and run a "smart" graph coloring algorithm, but with
184 // that aside, how do we find out whether a pair of slots might be *in-use*
185 // together?
186 //
187 // From our rules, we see that *out-of-scope* slots are never *in-use*,
188 // and from (L7) we see that "non-degenerate" slots remain non-*in-use*
189 // until their address is taken. Therefore, we can approximate slot activity
190 // using dataflow.
191 //
192 // A subtle point: naively, we might try to figure out which pairs of
193 // stack-slots interfere by propagating `S in-use` through the CFG for every
194 // stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
195 // which they are both *in-use*.
196 //
197 // That is sound, but overly conservative in some cases: in our (artificial)
198 // example `foo`, either `x` or `y` might be in use at the label `B:`, but
199 // as `x` is only in use if we came in from the `var` edge and `y` only
200 // if we came from the `!var` edge, they still can't be in use together.
201 // See PR32488 for an important real-life case.
202 //
203 // If we wanted to find all points of interference precisely, we could
204 // propagate `S in-use` and `S&T in-use` predicates through the CFG. That
205 // would be precise, but requires propagating `O(n^2)` dataflow facts.
206 //
207 // However, we aren't interested in the *set* of points of interference
208 // between 2 stack slots, only *whether* there *is* such a point. So we
209 // can rely on a little trick: for `S` and `T` to be in-use together,
210 // one of them needs to become in-use while the other is in-use (or
211 // they might both become in use simultaneously). We can check this
212 // by also keeping track of the points at which a stack slot might *start*
213 // being in-use.
214 //
215 // Exact first use:
216 // ----------------
217 //
218 // Consider the following motivating example:
219 //
220 //     int foo() {
221 //       char b1[1024], b2[1024];
222 //       if (...) {
223 //         char b3[1024];
224 //         <uses of b1, b3>;
225 //         return x;
226 //       } else {
227 //         char b4[1024], b5[1024];
228 //         <uses of b2, b4, b5>;
229 //         return y;
230 //       }
231 //     }
232 //
233 // In the code above, "b3" and "b4" are declared in distinct lexical
234 // scopes, meaning that it is easy to prove that they can share the
235 // same stack slot. Variables "b1" and "b2" are declared in the same
236 // scope, meaning that from a lexical point of view, their lifetimes
237 // overlap. From a control flow pointer of view, however, the two
238 // variables are accessed in disjoint regions of the CFG, thus it
239 // should be possible for them to share the same stack slot. An ideal
240 // stack allocation for the function above would look like:
241 //
242 //     slot 0: b1, b2
243 //     slot 1: b3, b4
244 //     slot 2: b5
245 //
246 // Achieving this allocation is tricky, however, due to the way
247 // lifetime markers are inserted. Here is a simplified view of the
248 // control flow graph for the code above:
249 //
250 //                +------  block 0 -------+
251 //               0| LIFETIME_START b1, b2 |
252 //               1| <test 'if' condition> |
253 //                +-----------------------+
254 //                   ./              \.
255 //   +------  block 1 -------+   +------  block 2 -------+
256 //  2| LIFETIME_START b3     |  5| LIFETIME_START b4, b5 |
257 //  3| <uses of b1, b3>      |  6| <uses of b2, b4, b5>  |
258 //  4| LIFETIME_END b3       |  7| LIFETIME_END b4, b5   |
259 //   +-----------------------+   +-----------------------+
260 //                   \.              /.
261 //                +------  block 3 -------+
262 //               8| <cleanupcode>         |
263 //               9| LIFETIME_END b1, b2   |
264 //              10| return                |
265 //                +-----------------------+
266 //
267 // If we create live intervals for the variables above strictly based
268 // on the lifetime markers, we'll get the set of intervals on the
269 // left. If we ignore the lifetime start markers and instead treat a
270 // variable's lifetime as beginning with the first reference to the
271 // var, then we get the intervals on the right.
272 //
273 //            LIFETIME_START      First Use
274 //     b1:    [0,9]               [3,4] [8,9]
275 //     b2:    [0,9]               [6,9]
276 //     b3:    [2,4]               [3,4]
277 //     b4:    [5,7]               [6,7]
278 //     b5:    [5,7]               [6,7]
279 //
280 // For the intervals on the left, the best we can do is overlap two
281 // variables (b3 and b4, for example); this gives us a stack size of
282 // 4*1024 bytes, not ideal. When treating first-use as the start of a
283 // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
284 // byte stack (better).
285 //
286 // Degenerate Slots:
287 // -----------------
288 //
289 // Relying entirely on first-use of stack slots is problematic,
290 // however, due to the fact that optimizations can sometimes migrate
291 // uses of a variable outside of its lifetime start/end region. Here
292 // is an example:
293 //
294 //     int bar() {
295 //       char b1[1024], b2[1024];
296 //       if (...) {
297 //         <uses of b2>
298 //         return y;
299 //       } else {
300 //         <uses of b1>
301 //         while (...) {
302 //           char b3[1024];
303 //           <uses of b3>
304 //         }
305 //       }
306 //     }
307 //
308 // Before optimization, the control flow graph for the code above
309 // might look like the following:
310 //
311 //                +------  block 0 -------+
312 //               0| LIFETIME_START b1, b2 |
313 //               1| <test 'if' condition> |
314 //                +-----------------------+
315 //                   ./              \.
316 //   +------  block 1 -------+    +------- block 2 -------+
317 //  2| <uses of b2>          |   3| <uses of b1>          |
318 //   +-----------------------+    +-----------------------+
319 //              |                            |
320 //              |                 +------- block 3 -------+ <-\.
321 //              |                4| <while condition>     |    |
322 //              |                 +-----------------------+    |
323 //              |               /          |                   |
324 //              |              /  +------- block 4 -------+
325 //              \             /  5| LIFETIME_START b3     |    |
326 //               \           /   6| <uses of b3>          |    |
327 //                \         /    7| LIFETIME_END b3       |    |
328 //                 \        |    +------------------------+    |
329 //                  \       |                 \                /
330 //                +------  block 5 -----+      \---------------
331 //               8| <cleanupcode>       |
332 //               9| LIFETIME_END b1, b2 |
333 //              10| return              |
334 //                +---------------------+
335 //
336 // During optimization, however, it can happen that an instruction
337 // computing an address in "b3" (for example, a loop-invariant GEP) is
338 // hoisted up out of the loop from block 4 to block 2.  [Note that
339 // this is not an actual load from the stack, only an instruction that
340 // computes the address to be loaded]. If this happens, there is now a
341 // path leading from the first use of b3 to the return instruction
342 // that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
343 // now larger than if we were computing live intervals strictly based
344 // on lifetime markers. In the example above, this lengthened lifetime
345 // would mean that it would appear illegal to overlap b3 with b2.
346 //
347 // To deal with this such cases, the code in ::collectMarkers() below
348 // tries to identify "degenerate" slots -- those slots where on a single
349 // forward pass through the CFG we encounter a first reference to slot
350 // K before we hit the slot K lifetime start marker. For such slots,
351 // we fall back on using the lifetime start marker as the beginning of
352 // the variable's lifetime.  NB: with this implementation, slots can
353 // appear degenerate in cases where there is unstructured control flow:
354 //
355 //    if (q) goto mid;
356 //    if (x > 9) {
357 //         int b[100];
358 //         memcpy(&b[0], ...);
359 //    mid: b[k] = ...;
360 //         abc(&b);
361 //    }
362 //
363 // If in RPO ordering chosen to walk the CFG  we happen to visit the b[k]
364 // before visiting the memcpy block (which will contain the lifetime start
365 // for "b" then it will appear that 'b' has a degenerate lifetime.
366 //
367 
368 namespace {
369 /// StackColoring - A machine pass for merging disjoint stack allocations,
370 /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
371 class StackColoring : public MachineFunctionPass {
372   MachineFrameInfo *MFI;
373   MachineFunction *MF;
374 
375   /// A class representing liveness information for a single basic block.
376   /// Each bit in the BitVector represents the liveness property
377   /// for a different stack slot.
378   struct BlockLifetimeInfo {
379     /// Which slots BEGINs in each basic block.
380     BitVector Begin;
381     /// Which slots ENDs in each basic block.
382     BitVector End;
383     /// Which slots are marked as LIVE_IN, coming into each basic block.
384     BitVector LiveIn;
385     /// Which slots are marked as LIVE_OUT, coming out of each basic block.
386     BitVector LiveOut;
387   };
388 
389   /// Maps active slots (per bit) for each basic block.
390   typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap;
391   LivenessMap BlockLiveness;
392 
393   /// Maps serial numbers to basic blocks.
394   DenseMap<const MachineBasicBlock*, int> BasicBlocks;
395   /// Maps basic blocks to a serial number.
396   SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
397 
398   /// Maps slots to their use interval. Outside of this interval, slots
399   /// values are either dead or `undef` and they will not be written to.
400   SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
401   /// Maps slots to the points where they can become in-use.
402   SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
403   /// VNInfo is used for the construction of LiveIntervals.
404   VNInfo::Allocator VNInfoAllocator;
405   /// SlotIndex analysis object.
406   SlotIndexes *Indexes;
407   /// The stack protector object.
408   StackProtector *SP;
409 
410   /// The list of lifetime markers found. These markers are to be removed
411   /// once the coloring is done.
412   SmallVector<MachineInstr*, 8> Markers;
413 
414   /// Record the FI slots for which we have seen some sort of
415   /// lifetime marker (either start or end).
416   BitVector InterestingSlots;
417 
418   /// FI slots that need to be handled conservatively (for these
419   /// slots lifetime-start-on-first-use is disabled).
420   BitVector ConservativeSlots;
421 
422   /// Number of iterations taken during data flow analysis.
423   unsigned NumIterations;
424 
425 public:
426   static char ID;
427   StackColoring() : MachineFunctionPass(ID) {
428     initializeStackColoringPass(*PassRegistry::getPassRegistry());
429   }
430   void getAnalysisUsage(AnalysisUsage &AU) const override;
431   bool runOnMachineFunction(MachineFunction &MF) override;
432 
433 private:
434   /// Debug.
435   void dump() const;
436   void dumpIntervals() const;
437   void dumpBB(MachineBasicBlock *MBB) const;
438   void dumpBV(const char *tag, const BitVector &BV) const;
439 
440   /// Removes all of the lifetime marker instructions from the function.
441   /// \returns true if any markers were removed.
442   bool removeAllMarkers();
443 
444   /// Scan the machine function and find all of the lifetime markers.
445   /// Record the findings in the BEGIN and END vectors.
446   /// \returns the number of markers found.
447   unsigned collectMarkers(unsigned NumSlot);
448 
449   /// Perform the dataflow calculation and calculate the lifetime for each of
450   /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
451   /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
452   /// in and out blocks.
453   void calculateLocalLiveness();
454 
455   /// Returns TRUE if we're using the first-use-begins-lifetime method for
456   /// this slot (if FALSE, then the start marker is treated as start of lifetime).
457   bool applyFirstUse(int Slot) {
458     if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas)
459       return false;
460     if (ConservativeSlots.test(Slot))
461       return false;
462     return true;
463   }
464 
465   /// Examines the specified instruction and returns TRUE if the instruction
466   /// represents the start or end of an interesting lifetime. The slot or slots
467   /// starting or ending are added to the vector "slots" and "isStart" is set
468   /// accordingly.
469   /// \returns True if inst contains a lifetime start or end
470   bool isLifetimeStartOrEnd(const MachineInstr &MI,
471                             SmallVector<int, 4> &slots,
472                             bool &isStart);
473 
474   /// Construct the LiveIntervals for the slots.
475   void calculateLiveIntervals(unsigned NumSlots);
476 
477   /// Go over the machine function and change instructions which use stack
478   /// slots to use the joint slots.
479   void remapInstructions(DenseMap<int, int> &SlotRemap);
480 
481   /// The input program may contain instructions which are not inside lifetime
482   /// markers. This can happen due to a bug in the compiler or due to a bug in
483   /// user code (for example, returning a reference to a local variable).
484   /// This procedure checks all of the instructions in the function and
485   /// invalidates lifetime ranges which do not contain all of the instructions
486   /// which access that frame slot.
487   void removeInvalidSlotRanges();
488 
489   /// Map entries which point to other entries to their destination.
490   ///   A->B->C becomes A->C.
491   void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
492 
493   /// Used in collectMarkers
494   typedef DenseMap<const MachineBasicBlock*, BitVector> BlockBitVecMap;
495 };
496 } // end anonymous namespace
497 
498 char StackColoring::ID = 0;
499 char &llvm::StackColoringID = StackColoring::ID;
500 
501 INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
502                       "Merge disjoint stack slots", false, false)
503 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
504 INITIALIZE_PASS_DEPENDENCY(StackProtector)
505 INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE,
506                     "Merge disjoint stack slots", false, false)
507 
508 void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
509   AU.addRequired<SlotIndexes>();
510   AU.addRequired<StackProtector>();
511   MachineFunctionPass::getAnalysisUsage(AU);
512 }
513 
514 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
515 LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
516                                             const BitVector &BV) const {
517   dbgs() << tag << " : { ";
518   for (unsigned I = 0, E = BV.size(); I != E; ++I)
519     dbgs() << BV.test(I) << " ";
520   dbgs() << "}\n";
521 }
522 
523 LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
524   LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
525   assert(BI != BlockLiveness.end() && "Block not found");
526   const BlockLifetimeInfo &BlockInfo = BI->second;
527 
528   dumpBV("BEGIN", BlockInfo.Begin);
529   dumpBV("END", BlockInfo.End);
530   dumpBV("LIVE_IN", BlockInfo.LiveIn);
531   dumpBV("LIVE_OUT", BlockInfo.LiveOut);
532 }
533 
534 LLVM_DUMP_METHOD void StackColoring::dump() const {
535   for (MachineBasicBlock *MBB : depth_first(MF)) {
536     dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
537            << MBB->getName() << "]\n";
538     dumpBB(MBB);
539   }
540 }
541 
542 LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
543   for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
544     dbgs() << "Interval[" << I << "]:\n";
545     Intervals[I]->dump();
546   }
547 }
548 #endif
549 
550 static inline int getStartOrEndSlot(const MachineInstr &MI)
551 {
552   assert((MI.getOpcode() == TargetOpcode::LIFETIME_START ||
553           MI.getOpcode() == TargetOpcode::LIFETIME_END) &&
554          "Expected LIFETIME_START or LIFETIME_END op");
555   const MachineOperand &MO = MI.getOperand(0);
556   int Slot = MO.getIndex();
557   if (Slot >= 0)
558     return Slot;
559   return -1;
560 }
561 
562 //
563 // At the moment the only way to end a variable lifetime is with
564 // a VARIABLE_LIFETIME op (which can't contain a start). If things
565 // change and the IR allows for a single inst that both begins
566 // and ends lifetime(s), this interface will need to be reworked.
567 //
568 bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
569                                          SmallVector<int, 4> &slots,
570                                          bool &isStart)
571 {
572   if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
573       MI.getOpcode() == TargetOpcode::LIFETIME_END) {
574     int Slot = getStartOrEndSlot(MI);
575     if (Slot < 0)
576       return false;
577     if (!InterestingSlots.test(Slot))
578       return false;
579     slots.push_back(Slot);
580     if (MI.getOpcode() == TargetOpcode::LIFETIME_END) {
581       isStart = false;
582       return true;
583     }
584     if (! applyFirstUse(Slot)) {
585       isStart = true;
586       return true;
587     }
588   } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) {
589     if (! MI.isDebugValue()) {
590       bool found = false;
591       for (const MachineOperand &MO : MI.operands()) {
592         if (!MO.isFI())
593           continue;
594         int Slot = MO.getIndex();
595         if (Slot<0)
596           continue;
597         if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
598           slots.push_back(Slot);
599           found = true;
600         }
601       }
602       if (found) {
603         isStart = true;
604         return true;
605       }
606     }
607   }
608   return false;
609 }
610 
611 unsigned StackColoring::collectMarkers(unsigned NumSlot)
612 {
613   unsigned MarkersFound = 0;
614   BlockBitVecMap SeenStartMap;
615   InterestingSlots.clear();
616   InterestingSlots.resize(NumSlot);
617   ConservativeSlots.clear();
618   ConservativeSlots.resize(NumSlot);
619 
620   // number of start and end lifetime ops for each slot
621   SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
622   SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
623 
624   // Step 1: collect markers and populate the "InterestingSlots"
625   // and "ConservativeSlots" sets.
626   for (MachineBasicBlock *MBB : depth_first(MF)) {
627 
628     // Compute the set of slots for which we've seen a START marker but have
629     // not yet seen an END marker at this point in the walk (e.g. on entry
630     // to this bb).
631     BitVector BetweenStartEnd;
632     BetweenStartEnd.resize(NumSlot);
633     for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
634              PE = MBB->pred_end(); PI != PE; ++PI) {
635       BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI);
636       if (I != SeenStartMap.end()) {
637         BetweenStartEnd |= I->second;
638       }
639     }
640 
641     // Walk the instructions in the block to look for start/end ops.
642     for (MachineInstr &MI : *MBB) {
643       if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
644           MI.getOpcode() == TargetOpcode::LIFETIME_END) {
645         int Slot = getStartOrEndSlot(MI);
646         if (Slot < 0)
647           continue;
648         InterestingSlots.set(Slot);
649         if (MI.getOpcode() == TargetOpcode::LIFETIME_START) {
650           BetweenStartEnd.set(Slot);
651           NumStartLifetimes[Slot] += 1;
652         } else {
653           BetweenStartEnd.reset(Slot);
654           NumEndLifetimes[Slot] += 1;
655         }
656         const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
657         if (Allocation) {
658           DEBUG(dbgs() << "Found a lifetime ");
659           DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
660                                ? "start"
661                                : "end"));
662           DEBUG(dbgs() << " marker for slot #" << Slot);
663           DEBUG(dbgs() << " with allocation: " << Allocation->getName()
664                        << "\n");
665         }
666         Markers.push_back(&MI);
667         MarkersFound += 1;
668       } else {
669         for (const MachineOperand &MO : MI.operands()) {
670           if (!MO.isFI())
671             continue;
672           int Slot = MO.getIndex();
673           if (Slot < 0)
674             continue;
675           if (! BetweenStartEnd.test(Slot)) {
676             ConservativeSlots.set(Slot);
677           }
678         }
679       }
680     }
681     BitVector &SeenStart = SeenStartMap[MBB];
682     SeenStart |= BetweenStartEnd;
683   }
684   if (!MarkersFound) {
685     return 0;
686   }
687 
688   // PR27903: slots with multiple start or end lifetime ops are not
689   // safe to enable for "lifetime-start-on-first-use".
690   for (unsigned slot = 0; slot < NumSlot; ++slot)
691     if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
692       ConservativeSlots.set(slot);
693   DEBUG(dumpBV("Conservative slots", ConservativeSlots));
694 
695   // Step 2: compute begin/end sets for each block
696 
697   // NOTE: We use a depth-first iteration to ensure that we obtain a
698   // deterministic numbering.
699   for (MachineBasicBlock *MBB : depth_first(MF)) {
700 
701     // Assign a serial number to this basic block.
702     BasicBlocks[MBB] = BasicBlockNumbering.size();
703     BasicBlockNumbering.push_back(MBB);
704 
705     // Keep a reference to avoid repeated lookups.
706     BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
707 
708     BlockInfo.Begin.resize(NumSlot);
709     BlockInfo.End.resize(NumSlot);
710 
711     SmallVector<int, 4> slots;
712     for (MachineInstr &MI : *MBB) {
713       bool isStart = false;
714       slots.clear();
715       if (isLifetimeStartOrEnd(MI, slots, isStart)) {
716         if (!isStart) {
717           assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
718           int Slot = slots[0];
719           if (BlockInfo.Begin.test(Slot)) {
720             BlockInfo.Begin.reset(Slot);
721           }
722           BlockInfo.End.set(Slot);
723         } else {
724           for (auto Slot : slots) {
725             DEBUG(dbgs() << "Found a use of slot #" << Slot);
726             DEBUG(dbgs() << " at BB#" << MBB->getNumber() << " index ");
727             DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
728             const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
729             if (Allocation) {
730               DEBUG(dbgs() << " with allocation: "<< Allocation->getName());
731             }
732             DEBUG(dbgs() << "\n");
733             if (BlockInfo.End.test(Slot)) {
734               BlockInfo.End.reset(Slot);
735             }
736             BlockInfo.Begin.set(Slot);
737           }
738         }
739       }
740     }
741   }
742 
743   // Update statistics.
744   NumMarkerSeen += MarkersFound;
745   return MarkersFound;
746 }
747 
748 void StackColoring::calculateLocalLiveness()
749 {
750   unsigned NumIters = 0;
751   bool changed = true;
752   while (changed) {
753     changed = false;
754     ++NumIters;
755 
756     for (const MachineBasicBlock *BB : BasicBlockNumbering) {
757 
758       // Use an iterator to avoid repeated lookups.
759       LivenessMap::iterator BI = BlockLiveness.find(BB);
760       assert(BI != BlockLiveness.end() && "Block not found");
761       BlockLifetimeInfo &BlockInfo = BI->second;
762 
763       // Compute LiveIn by unioning together the LiveOut sets of all preds.
764       BitVector LocalLiveIn;
765       for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
766            PE = BB->pred_end(); PI != PE; ++PI) {
767         LivenessMap::const_iterator I = BlockLiveness.find(*PI);
768         assert(I != BlockLiveness.end() && "Predecessor not found");
769         LocalLiveIn |= I->second.LiveOut;
770       }
771 
772       // Compute LiveOut by subtracting out lifetimes that end in this
773       // block, then adding in lifetimes that begin in this block.  If
774       // we have both BEGIN and END markers in the same basic block
775       // then we know that the BEGIN marker comes after the END,
776       // because we already handle the case where the BEGIN comes
777       // before the END when collecting the markers (and building the
778       // BEGIN/END vectors).
779       BitVector LocalLiveOut = LocalLiveIn;
780       LocalLiveOut.reset(BlockInfo.End);
781       LocalLiveOut |= BlockInfo.Begin;
782 
783       // Update block LiveIn set, noting whether it has changed.
784       if (LocalLiveIn.test(BlockInfo.LiveIn)) {
785         changed = true;
786         BlockInfo.LiveIn |= LocalLiveIn;
787       }
788 
789       // Update block LiveOut set, noting whether it has changed.
790       if (LocalLiveOut.test(BlockInfo.LiveOut)) {
791         changed = true;
792         BlockInfo.LiveOut |= LocalLiveOut;
793       }
794     }
795   }// while changed.
796 
797   NumIterations = NumIters;
798 }
799 
800 void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
801   SmallVector<SlotIndex, 16> Starts;
802   SmallVector<bool, 16> DefinitelyInUse;
803 
804   // For each block, find which slots are active within this block
805   // and update the live intervals.
806   for (const MachineBasicBlock &MBB : *MF) {
807     Starts.clear();
808     Starts.resize(NumSlots);
809     DefinitelyInUse.clear();
810     DefinitelyInUse.resize(NumSlots);
811 
812     // Start the interval of the slots that we previously found to be 'in-use'.
813     BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
814     for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
815          pos = MBBLiveness.LiveIn.find_next(pos)) {
816       Starts[pos] = Indexes->getMBBStartIdx(&MBB);
817     }
818 
819     // Create the interval for the basic blocks containing lifetime begin/end.
820     for (const MachineInstr &MI : MBB) {
821 
822       SmallVector<int, 4> slots;
823       bool IsStart = false;
824       if (!isLifetimeStartOrEnd(MI, slots, IsStart))
825         continue;
826       SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
827       for (auto Slot : slots) {
828         if (IsStart) {
829           // If a slot is already definitely in use, we don't have to emit
830           // a new start marker because there is already a pre-existing
831           // one.
832           if (!DefinitelyInUse[Slot]) {
833             LiveStarts[Slot].push_back(ThisIndex);
834             DefinitelyInUse[Slot] = true;
835           }
836           if (!Starts[Slot].isValid())
837             Starts[Slot] = ThisIndex;
838         } else {
839           if (Starts[Slot].isValid()) {
840             VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
841             Intervals[Slot]->addSegment(
842                 LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
843             Starts[Slot] = SlotIndex(); // Invalidate the start index
844             DefinitelyInUse[Slot] = false;
845           }
846         }
847       }
848     }
849 
850     // Finish up started segments
851     for (unsigned i = 0; i < NumSlots; ++i) {
852       if (!Starts[i].isValid())
853         continue;
854 
855       SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
856       VNInfo *VNI = Intervals[i]->getValNumInfo(0);
857       Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
858     }
859   }
860 }
861 
862 bool StackColoring::removeAllMarkers() {
863   unsigned Count = 0;
864   for (MachineInstr *MI : Markers) {
865     MI->eraseFromParent();
866     Count++;
867   }
868   Markers.clear();
869 
870   DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n");
871   return Count;
872 }
873 
874 void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
875   unsigned FixedInstr = 0;
876   unsigned FixedMemOp = 0;
877   unsigned FixedDbg = 0;
878 
879   // Remap debug information that refers to stack slots.
880   for (auto &VI : MF->getVariableDbgInfo()) {
881     if (!VI.Var)
882       continue;
883     if (SlotRemap.count(VI.Slot)) {
884       DEBUG(dbgs() << "Remapping debug info for ["
885                    << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
886       VI.Slot = SlotRemap[VI.Slot];
887       FixedDbg++;
888     }
889   }
890 
891   // Keep a list of *allocas* which need to be remapped.
892   DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
893 
894   // Keep a list of allocas which has been affected by the remap.
895   SmallPtrSet<const AllocaInst*, 32> MergedAllocas;
896 
897   for (const std::pair<int, int> &SI : SlotRemap) {
898     const AllocaInst *From = MFI->getObjectAllocation(SI.first);
899     const AllocaInst *To = MFI->getObjectAllocation(SI.second);
900     assert(To && From && "Invalid allocation object");
901     Allocas[From] = To;
902 
903     // AA might be used later for instruction scheduling, and we need it to be
904     // able to deduce the correct aliasing releationships between pointers
905     // derived from the alloca being remapped and the target of that remapping.
906     // The only safe way, without directly informing AA about the remapping
907     // somehow, is to directly update the IR to reflect the change being made
908     // here.
909     Instruction *Inst = const_cast<AllocaInst *>(To);
910     if (From->getType() != To->getType()) {
911       BitCastInst *Cast = new BitCastInst(Inst, From->getType());
912       Cast->insertAfter(Inst);
913       Inst = Cast;
914     }
915 
916     // We keep both slots to maintain AliasAnalysis metadata later.
917     MergedAllocas.insert(From);
918     MergedAllocas.insert(To);
919 
920     // Allow the stack protector to adjust its value map to account for the
921     // upcoming replacement.
922     SP->adjustForColoring(From, To);
923 
924     // The new alloca might not be valid in a llvm.dbg.declare for this
925     // variable, so undef out the use to make the verifier happy.
926     AllocaInst *FromAI = const_cast<AllocaInst *>(From);
927     if (FromAI->isUsedByMetadata())
928       ValueAsMetadata::handleRAUW(FromAI, UndefValue::get(FromAI->getType()));
929     for (auto &Use : FromAI->uses()) {
930       if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
931         if (BCI->isUsedByMetadata())
932           ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType()));
933     }
934 
935     // Note that this will not replace uses in MMOs (which we'll update below),
936     // or anywhere else (which is why we won't delete the original
937     // instruction).
938     FromAI->replaceAllUsesWith(Inst);
939   }
940 
941   // Remap all instructions to the new stack slots.
942   for (MachineBasicBlock &BB : *MF)
943     for (MachineInstr &I : BB) {
944       // Skip lifetime markers. We'll remove them soon.
945       if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
946           I.getOpcode() == TargetOpcode::LIFETIME_END)
947         continue;
948 
949       // Update the MachineMemOperand to use the new alloca.
950       for (MachineMemOperand *MMO : I.memoperands()) {
951         // We've replaced IR-level uses of the remapped allocas, so we only
952         // need to replace direct uses here.
953         const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
954         if (!AI)
955           continue;
956 
957         if (!Allocas.count(AI))
958           continue;
959 
960         MMO->setValue(Allocas[AI]);
961         FixedMemOp++;
962       }
963 
964       // Update all of the machine instruction operands.
965       for (MachineOperand &MO : I.operands()) {
966         if (!MO.isFI())
967           continue;
968         int FromSlot = MO.getIndex();
969 
970         // Don't touch arguments.
971         if (FromSlot<0)
972           continue;
973 
974         // Only look at mapped slots.
975         if (!SlotRemap.count(FromSlot))
976           continue;
977 
978         // In a debug build, check that the instruction that we are modifying is
979         // inside the expected live range. If the instruction is not inside
980         // the calculated range then it means that the alloca usage moved
981         // outside of the lifetime markers, or that the user has a bug.
982         // NOTE: Alloca address calculations which happen outside the lifetime
983         // zone are are okay, despite the fact that we don't have a good way
984         // for validating all of the usages of the calculation.
985 #ifndef NDEBUG
986         bool TouchesMemory = I.mayLoad() || I.mayStore();
987         // If we *don't* protect the user from escaped allocas, don't bother
988         // validating the instructions.
989         if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) {
990           SlotIndex Index = Indexes->getInstructionIndex(I);
991           const LiveInterval *Interval = &*Intervals[FromSlot];
992           assert(Interval->find(Index) != Interval->end() &&
993                  "Found instruction usage outside of live range.");
994         }
995 #endif
996 
997         // Fix the machine instructions.
998         int ToSlot = SlotRemap[FromSlot];
999         MO.setIndex(ToSlot);
1000         FixedInstr++;
1001       }
1002 
1003       // We adjust AliasAnalysis information for merged stack slots.
1004       MachineSDNode::mmo_iterator NewMemOps =
1005           MF->allocateMemRefsArray(I.getNumMemOperands());
1006       unsigned MemOpIdx = 0;
1007       bool ReplaceMemOps = false;
1008       for (MachineMemOperand *MMO : I.memoperands()) {
1009         // If this memory location can be a slot remapped here,
1010         // we remove AA information.
1011         bool MayHaveConflictingAAMD = false;
1012         if (MMO->getAAInfo()) {
1013           if (const Value *MMOV = MMO->getValue()) {
1014             SmallVector<Value *, 4> Objs;
1015             getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout());
1016 
1017             if (Objs.empty())
1018               MayHaveConflictingAAMD = true;
1019             else
1020               for (Value *V : Objs) {
1021                 // If this memory location comes from a known stack slot
1022                 // that is not remapped, we continue checking.
1023                 // Otherwise, we need to invalidate AA infomation.
1024                 const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
1025                 if (AI && MergedAllocas.count(AI)) {
1026                   MayHaveConflictingAAMD = true;
1027                   break;
1028                 }
1029               }
1030           }
1031         }
1032         if (MayHaveConflictingAAMD) {
1033           NewMemOps[MemOpIdx++] = MF->getMachineMemOperand(MMO, AAMDNodes());
1034           ReplaceMemOps = true;
1035         }
1036         else
1037           NewMemOps[MemOpIdx++] = MMO;
1038       }
1039 
1040       // If any memory operand is updated, set memory references of
1041       // this instruction.
1042       if (ReplaceMemOps)
1043         I.setMemRefs(std::make_pair(NewMemOps, I.getNumMemOperands()));
1044     }
1045 
1046   // Update the location of C++ catch objects for the MSVC personality routine.
1047   if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
1048     for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
1049       for (WinEHHandlerType &H : TBME.HandlerArray)
1050         if (H.CatchObj.FrameIndex != INT_MAX &&
1051             SlotRemap.count(H.CatchObj.FrameIndex))
1052           H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
1053 
1054   DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n");
1055   DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n");
1056   DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
1057 }
1058 
1059 void StackColoring::removeInvalidSlotRanges() {
1060   for (MachineBasicBlock &BB : *MF)
1061     for (MachineInstr &I : BB) {
1062       if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1063           I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugValue())
1064         continue;
1065 
1066       // Some intervals are suspicious! In some cases we find address
1067       // calculations outside of the lifetime zone, but not actual memory
1068       // read or write. Memory accesses outside of the lifetime zone are a clear
1069       // violation, but address calculations are okay. This can happen when
1070       // GEPs are hoisted outside of the lifetime zone.
1071       // So, in here we only check instructions which can read or write memory.
1072       if (!I.mayLoad() && !I.mayStore())
1073         continue;
1074 
1075       // Check all of the machine operands.
1076       for (const MachineOperand &MO : I.operands()) {
1077         if (!MO.isFI())
1078           continue;
1079 
1080         int Slot = MO.getIndex();
1081 
1082         if (Slot<0)
1083           continue;
1084 
1085         if (Intervals[Slot]->empty())
1086           continue;
1087 
1088         // Check that the used slot is inside the calculated lifetime range.
1089         // If it is not, warn about it and invalidate the range.
1090         LiveInterval *Interval = &*Intervals[Slot];
1091         SlotIndex Index = Indexes->getInstructionIndex(I);
1092         if (Interval->find(Index) == Interval->end()) {
1093           Interval->clear();
1094           DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n");
1095           EscapedAllocas++;
1096         }
1097       }
1098     }
1099 }
1100 
1101 void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
1102                                    unsigned NumSlots) {
1103   // Expunge slot remap map.
1104   for (unsigned i=0; i < NumSlots; ++i) {
1105     // If we are remapping i
1106     if (SlotRemap.count(i)) {
1107       int Target = SlotRemap[i];
1108       // As long as our target is mapped to something else, follow it.
1109       while (SlotRemap.count(Target)) {
1110         Target = SlotRemap[Target];
1111         SlotRemap[i] = Target;
1112       }
1113     }
1114   }
1115 }
1116 
1117 bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
1118   DEBUG(dbgs() << "********** Stack Coloring **********\n"
1119                << "********** Function: "
1120                << ((const Value*)Func.getFunction())->getName() << '\n');
1121   MF = &Func;
1122   MFI = &MF->getFrameInfo();
1123   Indexes = &getAnalysis<SlotIndexes>();
1124   SP = &getAnalysis<StackProtector>();
1125   BlockLiveness.clear();
1126   BasicBlocks.clear();
1127   BasicBlockNumbering.clear();
1128   Markers.clear();
1129   Intervals.clear();
1130   LiveStarts.clear();
1131   VNInfoAllocator.Reset();
1132 
1133   unsigned NumSlots = MFI->getObjectIndexEnd();
1134 
1135   // If there are no stack slots then there are no markers to remove.
1136   if (!NumSlots)
1137     return false;
1138 
1139   SmallVector<int, 8> SortedSlots;
1140   SortedSlots.reserve(NumSlots);
1141   Intervals.reserve(NumSlots);
1142   LiveStarts.resize(NumSlots);
1143 
1144   unsigned NumMarkers = collectMarkers(NumSlots);
1145 
1146   unsigned TotalSize = 0;
1147   DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n");
1148   DEBUG(dbgs()<<"Slot structure:\n");
1149 
1150   for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
1151     DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n");
1152     TotalSize += MFI->getObjectSize(i);
1153   }
1154 
1155   DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n");
1156 
1157   // Don't continue because there are not enough lifetime markers, or the
1158   // stack is too small, or we are told not to optimize the slots.
1159   if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
1160       skipFunction(*Func.getFunction())) {
1161     DEBUG(dbgs()<<"Will not try to merge slots.\n");
1162     return removeAllMarkers();
1163   }
1164 
1165   for (unsigned i=0; i < NumSlots; ++i) {
1166     std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
1167     LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
1168     Intervals.push_back(std::move(LI));
1169     SortedSlots.push_back(i);
1170   }
1171 
1172   // Calculate the liveness of each block.
1173   calculateLocalLiveness();
1174   DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
1175   DEBUG(dump());
1176 
1177   // Propagate the liveness information.
1178   calculateLiveIntervals(NumSlots);
1179   DEBUG(dumpIntervals());
1180 
1181   // Search for allocas which are used outside of the declared lifetime
1182   // markers.
1183   if (ProtectFromEscapedAllocas)
1184     removeInvalidSlotRanges();
1185 
1186   // Maps old slots to new slots.
1187   DenseMap<int, int> SlotRemap;
1188   unsigned RemovedSlots = 0;
1189   unsigned ReducedSize = 0;
1190 
1191   // Do not bother looking at empty intervals.
1192   for (unsigned I = 0; I < NumSlots; ++I) {
1193     if (Intervals[SortedSlots[I]]->empty())
1194       SortedSlots[I] = -1;
1195   }
1196 
1197   // This is a simple greedy algorithm for merging allocas. First, sort the
1198   // slots, placing the largest slots first. Next, perform an n^2 scan and look
1199   // for disjoint slots. When you find disjoint slots, merge the samller one
1200   // into the bigger one and update the live interval. Remove the small alloca
1201   // and continue.
1202 
1203   // Sort the slots according to their size. Place unused slots at the end.
1204   // Use stable sort to guarantee deterministic code generation.
1205   std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
1206                    [this](int LHS, int RHS) {
1207     // We use -1 to denote a uninteresting slot. Place these slots at the end.
1208     if (LHS == -1) return false;
1209     if (RHS == -1) return true;
1210     // Sort according to size.
1211     return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
1212   });
1213 
1214   for (auto &s : LiveStarts)
1215     std::sort(s.begin(), s.end());
1216 
1217   bool Changed = true;
1218   while (Changed) {
1219     Changed = false;
1220     for (unsigned I = 0; I < NumSlots; ++I) {
1221       if (SortedSlots[I] == -1)
1222         continue;
1223 
1224       for (unsigned J=I+1; J < NumSlots; ++J) {
1225         if (SortedSlots[J] == -1)
1226           continue;
1227 
1228         int FirstSlot = SortedSlots[I];
1229         int SecondSlot = SortedSlots[J];
1230         LiveInterval *First = &*Intervals[FirstSlot];
1231         LiveInterval *Second = &*Intervals[SecondSlot];
1232         auto &FirstS = LiveStarts[FirstSlot];
1233         auto &SecondS = LiveStarts[SecondSlot];
1234         assert (!First->empty() && !Second->empty() && "Found an empty range");
1235 
1236         // Merge disjoint slots. This is a little bit tricky - see the
1237         // Implementation Notes section for an explanation.
1238         if (!First->isLiveAtIndexes(SecondS) &&
1239             !Second->isLiveAtIndexes(FirstS)) {
1240           Changed = true;
1241           First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
1242 
1243           int OldSize = FirstS.size();
1244           FirstS.append(SecondS.begin(), SecondS.end());
1245           auto Mid = FirstS.begin() + OldSize;
1246           std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
1247 
1248           SlotRemap[SecondSlot] = FirstSlot;
1249           SortedSlots[J] = -1;
1250           DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
1251                 SecondSlot<<" together.\n");
1252           unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
1253                                            MFI->getObjectAlignment(SecondSlot));
1254 
1255           assert(MFI->getObjectSize(FirstSlot) >=
1256                  MFI->getObjectSize(SecondSlot) &&
1257                  "Merging a small object into a larger one");
1258 
1259           RemovedSlots+=1;
1260           ReducedSize += MFI->getObjectSize(SecondSlot);
1261           MFI->setObjectAlignment(FirstSlot, MaxAlignment);
1262           MFI->RemoveStackObject(SecondSlot);
1263         }
1264       }
1265     }
1266   }// While changed.
1267 
1268   // Record statistics.
1269   StackSpaceSaved += ReducedSize;
1270   StackSlotMerged += RemovedSlots;
1271   DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<<
1272         ReducedSize<<" bytes\n");
1273 
1274   // Scan the entire function and update all machine operands that use frame
1275   // indices to use the remapped frame index.
1276   expungeSlotMap(SlotRemap, NumSlots);
1277   remapInstructions(SlotRemap);
1278 
1279   return removeAllMarkers();
1280 }
1281