1 //===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// Replaces repeated sequences of instructions with function calls.
12 ///
13 /// This works by placing every instruction from every basic block in a
14 /// suffix tree, and repeatedly querying that tree for repeated sequences of
15 /// instructions. If a sequence of instructions appears often, then it ought
16 /// to be beneficial to pull out into a function.
17 ///
18 /// This was originally presented at the 2016 LLVM Developers' Meeting in the
19 /// talk "Reducing Code Size Using Outlining". For a high-level overview of
20 /// how this pass works, the talk is available on YouTube at
21 ///
22 /// https://www.youtube.com/watch?v=yorld-WSOeU
23 ///
24 /// The slides for the talk are available at
25 ///
26 /// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
27 ///
28 /// The talk provides an overview of how the outliner finds candidates and
29 /// ultimately outlines them. It describes how the main data structure for this
30 /// pass, the suffix tree, is queried and purged for candidates. It also gives
31 /// a simplified suffix tree construction algorithm for suffix trees based off
32 /// of the algorithm actually used here, Ukkonen's algorithm.
33 ///
34 /// For the original RFC for this pass, please see
35 ///
36 /// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
37 ///
38 /// For more information on the suffix tree data structure, please see
39 /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
40 ///
41 //===----------------------------------------------------------------------===//
42 #include "llvm/ADT/DenseMap.h"
43 #include "llvm/ADT/Statistic.h"
44 #include "llvm/ADT/Twine.h"
45 #include "llvm/CodeGen/MachineFrameInfo.h"
46 #include "llvm/CodeGen/MachineFunction.h"
47 #include "llvm/CodeGen/MachineInstrBuilder.h"
48 #include "llvm/CodeGen/MachineModuleInfo.h"
49 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
50 #include "llvm/CodeGen/Passes.h"
51 #include "llvm/IR/IRBuilder.h"
52 #include "llvm/Support/Allocator.h"
53 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include "llvm/Target/TargetInstrInfo.h"
56 #include "llvm/Target/TargetMachine.h"
57 #include "llvm/Target/TargetRegisterInfo.h"
58 #include "llvm/Target/TargetSubtargetInfo.h"
59 #include <functional>
60 #include <map>
61 #include <sstream>
62 #include <tuple>
63 #include <vector>
64 
65 #define DEBUG_TYPE "machine-outliner"
66 
67 using namespace llvm;
68 using namespace ore;
69 
70 STATISTIC(NumOutlined, "Number of candidates outlined");
71 STATISTIC(FunctionsCreated, "Number of functions created");
72 
73 namespace {
74 
75 /// \brief An individual sequence of instructions to be replaced with a call to
76 /// an outlined function.
77 struct Candidate {
78 
79   /// Set to false if the candidate overlapped with another candidate.
80   bool InCandidateList = true;
81 
82   /// The start index of this \p Candidate.
83   size_t StartIdx;
84 
85   /// The number of instructions in this \p Candidate.
86   size_t Len;
87 
88   /// The index of this \p Candidate's \p OutlinedFunction in the list of
89   /// \p OutlinedFunctions.
90   size_t FunctionIdx;
91 
92   /// Target-defined unsigned defining how to emit a call for this candidate.
93   unsigned CallClass = 0;
94 
95   /// \brief The number of instructions that would be saved by outlining every
96   /// candidate of this type.
97   ///
98   /// This is a fixed value which is not updated during the candidate pruning
99   /// process. It is only used for deciding which candidate to keep if two
100   /// candidates overlap. The true benefit is stored in the OutlinedFunction
101   /// for some given candidate.
102   unsigned Benefit = 0;
103 
104   Candidate(size_t StartIdx, size_t Len, size_t FunctionIdx, unsigned CallClass)
105       : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx),
106         CallClass(CallClass) {}
107 
108   Candidate() {}
109 
110   /// \brief Used to ensure that \p Candidates are outlined in an order that
111   /// preserves the start and end indices of other \p Candidates.
112   bool operator<(const Candidate &RHS) const { return StartIdx > RHS.StartIdx; }
113 };
114 
115 /// \brief The information necessary to create an outlined function for some
116 /// class of candidate.
117 struct OutlinedFunction {
118 
119   /// The actual outlined function created.
120   /// This is initialized after we go through and create the actual function.
121   MachineFunction *MF = nullptr;
122 
123   /// A numbefr assigned to this function which appears at the end of its name.
124   size_t Name;
125 
126   /// The number of candidates for this OutlinedFunction.
127   size_t OccurrenceCount = 0;
128 
129   /// \brief The sequence of integers corresponding to the instructions in this
130   /// function.
131   std::vector<unsigned> Sequence;
132 
133   /// The number of instructions this function would save.
134   unsigned Benefit = 0;
135 
136   /// Target-defined unsigned defining how to emit the frame for this function.
137   unsigned FrameClass = 0;
138 
139   OutlinedFunction(size_t Name, size_t OccurrenceCount,
140                    const std::vector<unsigned> &Sequence, unsigned Benefit,
141                    unsigned FrameClass)
142       : Name(Name), OccurrenceCount(OccurrenceCount), Sequence(Sequence),
143         Benefit(Benefit), FrameClass(FrameClass) {}
144 };
145 
146 /// Represents an undefined index in the suffix tree.
147 const size_t EmptyIdx = -1;
148 
149 /// A node in a suffix tree which represents a substring or suffix.
150 ///
151 /// Each node has either no children or at least two children, with the root
152 /// being a exception in the empty tree.
153 ///
154 /// Children are represented as a map between unsigned integers and nodes. If
155 /// a node N has a child M on unsigned integer k, then the mapping represented
156 /// by N is a proper prefix of the mapping represented by M. Note that this,
157 /// although similar to a trie is somewhat different: each node stores a full
158 /// substring of the full mapping rather than a single character state.
159 ///
160 /// Each internal node contains a pointer to the internal node representing
161 /// the same string, but with the first character chopped off. This is stored
162 /// in \p Link. Each leaf node stores the start index of its respective
163 /// suffix in \p SuffixIdx.
164 struct SuffixTreeNode {
165 
166   /// The children of this node.
167   ///
168   /// A child existing on an unsigned integer implies that from the mapping
169   /// represented by the current node, there is a way to reach another
170   /// mapping by tacking that character on the end of the current string.
171   DenseMap<unsigned, SuffixTreeNode *> Children;
172 
173   /// A flag set to false if the node has been pruned from the tree.
174   bool IsInTree = true;
175 
176   /// The start index of this node's substring in the main string.
177   size_t StartIdx = EmptyIdx;
178 
179   /// The end index of this node's substring in the main string.
180   ///
181   /// Every leaf node must have its \p EndIdx incremented at the end of every
182   /// step in the construction algorithm. To avoid having to update O(N)
183   /// nodes individually at the end of every step, the end index is stored
184   /// as a pointer.
185   size_t *EndIdx = nullptr;
186 
187   /// For leaves, the start index of the suffix represented by this node.
188   ///
189   /// For all other nodes, this is ignored.
190   size_t SuffixIdx = EmptyIdx;
191 
192   /// \brief For internal nodes, a pointer to the internal node representing
193   /// the same sequence with the first character chopped off.
194   ///
195   /// This acts as a shortcut in Ukkonen's algorithm. One of the things that
196   /// Ukkonen's algorithm does to achieve linear-time construction is
197   /// keep track of which node the next insert should be at. This makes each
198   /// insert O(1), and there are a total of O(N) inserts. The suffix link
199   /// helps with inserting children of internal nodes.
200   ///
201   /// Say we add a child to an internal node with associated mapping S. The
202   /// next insertion must be at the node representing S - its first character.
203   /// This is given by the way that we iteratively build the tree in Ukkonen's
204   /// algorithm. The main idea is to look at the suffixes of each prefix in the
205   /// string, starting with the longest suffix of the prefix, and ending with
206   /// the shortest. Therefore, if we keep pointers between such nodes, we can
207   /// move to the next insertion point in O(1) time. If we don't, then we'd
208   /// have to query from the root, which takes O(N) time. This would make the
209   /// construction algorithm O(N^2) rather than O(N).
210   SuffixTreeNode *Link = nullptr;
211 
212   /// The parent of this node. Every node except for the root has a parent.
213   SuffixTreeNode *Parent = nullptr;
214 
215   /// The number of times this node's string appears in the tree.
216   ///
217   /// This is equal to the number of leaf children of the string. It represents
218   /// the number of suffixes that the node's string is a prefix of.
219   size_t OccurrenceCount = 0;
220 
221   /// The length of the string formed by concatenating the edge labels from the
222   /// root to this node.
223   size_t ConcatLen = 0;
224 
225   /// Returns true if this node is a leaf.
226   bool isLeaf() const { return SuffixIdx != EmptyIdx; }
227 
228   /// Returns true if this node is the root of its owning \p SuffixTree.
229   bool isRoot() const { return StartIdx == EmptyIdx; }
230 
231   /// Return the number of elements in the substring associated with this node.
232   size_t size() const {
233 
234     // Is it the root? If so, it's the empty string so return 0.
235     if (isRoot())
236       return 0;
237 
238     assert(*EndIdx != EmptyIdx && "EndIdx is undefined!");
239 
240     // Size = the number of elements in the string.
241     // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1.
242     return *EndIdx - StartIdx + 1;
243   }
244 
245   SuffixTreeNode(size_t StartIdx, size_t *EndIdx, SuffixTreeNode *Link,
246                  SuffixTreeNode *Parent)
247       : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {}
248 
249   SuffixTreeNode() {}
250 };
251 
252 /// A data structure for fast substring queries.
253 ///
254 /// Suffix trees represent the suffixes of their input strings in their leaves.
255 /// A suffix tree is a type of compressed trie structure where each node
256 /// represents an entire substring rather than a single character. Each leaf
257 /// of the tree is a suffix.
258 ///
259 /// A suffix tree can be seen as a type of state machine where each state is a
260 /// substring of the full string. The tree is structured so that, for a string
261 /// of length N, there are exactly N leaves in the tree. This structure allows
262 /// us to quickly find repeated substrings of the input string.
263 ///
264 /// In this implementation, a "string" is a vector of unsigned integers.
265 /// These integers may result from hashing some data type. A suffix tree can
266 /// contain 1 or many strings, which can then be queried as one large string.
267 ///
268 /// The suffix tree is implemented using Ukkonen's algorithm for linear-time
269 /// suffix tree construction. Ukkonen's algorithm is explained in more detail
270 /// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
271 /// paper is available at
272 ///
273 /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
274 class SuffixTree {
275 public:
276   /// Stores each leaf node in the tree.
277   ///
278   /// This is used for finding outlining candidates.
279   std::vector<SuffixTreeNode *> LeafVector;
280 
281   /// Each element is an integer representing an instruction in the module.
282   ArrayRef<unsigned> Str;
283 
284 private:
285   /// Maintains each node in the tree.
286   SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
287 
288   /// The root of the suffix tree.
289   ///
290   /// The root represents the empty string. It is maintained by the
291   /// \p NodeAllocator like every other node in the tree.
292   SuffixTreeNode *Root = nullptr;
293 
294   /// Maintains the end indices of the internal nodes in the tree.
295   ///
296   /// Each internal node is guaranteed to never have its end index change
297   /// during the construction algorithm; however, leaves must be updated at
298   /// every step. Therefore, we need to store leaf end indices by reference
299   /// to avoid updating O(N) leaves at every step of construction. Thus,
300   /// every internal node must be allocated its own end index.
301   BumpPtrAllocator InternalEndIdxAllocator;
302 
303   /// The end index of each leaf in the tree.
304   size_t LeafEndIdx = -1;
305 
306   /// \brief Helper struct which keeps track of the next insertion point in
307   /// Ukkonen's algorithm.
308   struct ActiveState {
309     /// The next node to insert at.
310     SuffixTreeNode *Node;
311 
312     /// The index of the first character in the substring currently being added.
313     size_t Idx = EmptyIdx;
314 
315     /// The length of the substring we have to add at the current step.
316     size_t Len = 0;
317   };
318 
319   /// \brief The point the next insertion will take place at in the
320   /// construction algorithm.
321   ActiveState Active;
322 
323   /// Allocate a leaf node and add it to the tree.
324   ///
325   /// \param Parent The parent of this node.
326   /// \param StartIdx The start index of this node's associated string.
327   /// \param Edge The label on the edge leaving \p Parent to this node.
328   ///
329   /// \returns A pointer to the allocated leaf node.
330   SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, size_t StartIdx,
331                              unsigned Edge) {
332 
333     assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
334 
335     SuffixTreeNode *N = new (NodeAllocator.Allocate())
336         SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent);
337     Parent.Children[Edge] = N;
338 
339     return N;
340   }
341 
342   /// Allocate an internal node and add it to the tree.
343   ///
344   /// \param Parent The parent of this node. Only null when allocating the root.
345   /// \param StartIdx The start index of this node's associated string.
346   /// \param EndIdx The end index of this node's associated string.
347   /// \param Edge The label on the edge leaving \p Parent to this node.
348   ///
349   /// \returns A pointer to the allocated internal node.
350   SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, size_t StartIdx,
351                                      size_t EndIdx, unsigned Edge) {
352 
353     assert(StartIdx <= EndIdx && "String can't start after it ends!");
354     assert(!(!Parent && StartIdx != EmptyIdx) &&
355            "Non-root internal nodes must have parents!");
356 
357     size_t *E = new (InternalEndIdxAllocator) size_t(EndIdx);
358     SuffixTreeNode *N = new (NodeAllocator.Allocate())
359         SuffixTreeNode(StartIdx, E, Root, Parent);
360     if (Parent)
361       Parent->Children[Edge] = N;
362 
363     return N;
364   }
365 
366   /// \brief Set the suffix indices of the leaves to the start indices of their
367   /// respective suffixes. Also stores each leaf in \p LeafVector at its
368   /// respective suffix index.
369   ///
370   /// \param[in] CurrNode The node currently being visited.
371   /// \param CurrIdx The current index of the string being visited.
372   void setSuffixIndices(SuffixTreeNode &CurrNode, size_t CurrIdx) {
373 
374     bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
375 
376     // Store the length of the concatenation of all strings from the root to
377     // this node.
378     if (!CurrNode.isRoot()) {
379       if (CurrNode.ConcatLen == 0)
380         CurrNode.ConcatLen = CurrNode.size();
381 
382       if (CurrNode.Parent)
383         CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
384     }
385 
386     // Traverse the tree depth-first.
387     for (auto &ChildPair : CurrNode.Children) {
388       assert(ChildPair.second && "Node had a null child!");
389       setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size());
390     }
391 
392     // Is this node a leaf?
393     if (IsLeaf) {
394       // If yes, give it a suffix index and bump its parent's occurrence count.
395       CurrNode.SuffixIdx = Str.size() - CurrIdx;
396       assert(CurrNode.Parent && "CurrNode had no parent!");
397       CurrNode.Parent->OccurrenceCount++;
398 
399       // Store the leaf in the leaf vector for pruning later.
400       LeafVector[CurrNode.SuffixIdx] = &CurrNode;
401     }
402   }
403 
404   /// \brief Construct the suffix tree for the prefix of the input ending at
405   /// \p EndIdx.
406   ///
407   /// Used to construct the full suffix tree iteratively. At the end of each
408   /// step, the constructed suffix tree is either a valid suffix tree, or a
409   /// suffix tree with implicit suffixes. At the end of the final step, the
410   /// suffix tree is a valid tree.
411   ///
412   /// \param EndIdx The end index of the current prefix in the main string.
413   /// \param SuffixesToAdd The number of suffixes that must be added
414   /// to complete the suffix tree at the current phase.
415   ///
416   /// \returns The number of suffixes that have not been added at the end of
417   /// this step.
418   unsigned extend(size_t EndIdx, size_t SuffixesToAdd) {
419     SuffixTreeNode *NeedsLink = nullptr;
420 
421     while (SuffixesToAdd > 0) {
422 
423       // Are we waiting to add anything other than just the last character?
424       if (Active.Len == 0) {
425         // If not, then say the active index is the end index.
426         Active.Idx = EndIdx;
427       }
428 
429       assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
430 
431       // The first character in the current substring we're looking at.
432       unsigned FirstChar = Str[Active.Idx];
433 
434       // Have we inserted anything starting with FirstChar at the current node?
435       if (Active.Node->Children.count(FirstChar) == 0) {
436         // If not, then we can just insert a leaf and move too the next step.
437         insertLeaf(*Active.Node, EndIdx, FirstChar);
438 
439         // The active node is an internal node, and we visited it, so it must
440         // need a link if it doesn't have one.
441         if (NeedsLink) {
442           NeedsLink->Link = Active.Node;
443           NeedsLink = nullptr;
444         }
445       } else {
446         // There's a match with FirstChar, so look for the point in the tree to
447         // insert a new node.
448         SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
449 
450         size_t SubstringLen = NextNode->size();
451 
452         // Is the current suffix we're trying to insert longer than the size of
453         // the child we want to move to?
454         if (Active.Len >= SubstringLen) {
455           // If yes, then consume the characters we've seen and move to the next
456           // node.
457           Active.Idx += SubstringLen;
458           Active.Len -= SubstringLen;
459           Active.Node = NextNode;
460           continue;
461         }
462 
463         // Otherwise, the suffix we're trying to insert must be contained in the
464         // next node we want to move to.
465         unsigned LastChar = Str[EndIdx];
466 
467         // Is the string we're trying to insert a substring of the next node?
468         if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
469           // If yes, then we're done for this step. Remember our insertion point
470           // and move to the next end index. At this point, we have an implicit
471           // suffix tree.
472           if (NeedsLink && !Active.Node->isRoot()) {
473             NeedsLink->Link = Active.Node;
474             NeedsLink = nullptr;
475           }
476 
477           Active.Len++;
478           break;
479         }
480 
481         // The string we're trying to insert isn't a substring of the next node,
482         // but matches up to a point. Split the node.
483         //
484         // For example, say we ended our search at a node n and we're trying to
485         // insert ABD. Then we'll create a new node s for AB, reduce n to just
486         // representing C, and insert a new leaf node l to represent d. This
487         // allows us to ensure that if n was a leaf, it remains a leaf.
488         //
489         //   | ABC  ---split--->  | AB
490         //   n                    s
491         //                     C / \ D
492         //                      n   l
493 
494         // The node s from the diagram
495         SuffixTreeNode *SplitNode =
496             insertInternalNode(Active.Node, NextNode->StartIdx,
497                                NextNode->StartIdx + Active.Len - 1, FirstChar);
498 
499         // Insert the new node representing the new substring into the tree as
500         // a child of the split node. This is the node l from the diagram.
501         insertLeaf(*SplitNode, EndIdx, LastChar);
502 
503         // Make the old node a child of the split node and update its start
504         // index. This is the node n from the diagram.
505         NextNode->StartIdx += Active.Len;
506         NextNode->Parent = SplitNode;
507         SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
508 
509         // SplitNode is an internal node, update the suffix link.
510         if (NeedsLink)
511           NeedsLink->Link = SplitNode;
512 
513         NeedsLink = SplitNode;
514       }
515 
516       // We've added something new to the tree, so there's one less suffix to
517       // add.
518       SuffixesToAdd--;
519 
520       if (Active.Node->isRoot()) {
521         if (Active.Len > 0) {
522           Active.Len--;
523           Active.Idx = EndIdx - SuffixesToAdd + 1;
524         }
525       } else {
526         // Start the next phase at the next smallest suffix.
527         Active.Node = Active.Node->Link;
528       }
529     }
530 
531     return SuffixesToAdd;
532   }
533 
534 public:
535   /// Construct a suffix tree from a sequence of unsigned integers.
536   ///
537   /// \param Str The string to construct the suffix tree for.
538   SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
539     Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
540     Root->IsInTree = true;
541     Active.Node = Root;
542     LeafVector = std::vector<SuffixTreeNode *>(Str.size());
543 
544     // Keep track of the number of suffixes we have to add of the current
545     // prefix.
546     size_t SuffixesToAdd = 0;
547     Active.Node = Root;
548 
549     // Construct the suffix tree iteratively on each prefix of the string.
550     // PfxEndIdx is the end index of the current prefix.
551     // End is one past the last element in the string.
552     for (size_t PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {
553       SuffixesToAdd++;
554       LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
555       SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
556     }
557 
558     // Set the suffix indices of each leaf.
559     assert(Root && "Root node can't be nullptr!");
560     setSuffixIndices(*Root, 0);
561   }
562 };
563 
564 /// \brief Maps \p MachineInstrs to unsigned integers and stores the mappings.
565 struct InstructionMapper {
566 
567   /// \brief The next available integer to assign to a \p MachineInstr that
568   /// cannot be outlined.
569   ///
570   /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
571   unsigned IllegalInstrNumber = -3;
572 
573   /// \brief The next available integer to assign to a \p MachineInstr that can
574   /// be outlined.
575   unsigned LegalInstrNumber = 0;
576 
577   /// Correspondence from \p MachineInstrs to unsigned integers.
578   DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
579       InstructionIntegerMap;
580 
581   /// Corresponcence from unsigned integers to \p MachineInstrs.
582   /// Inverse of \p InstructionIntegerMap.
583   DenseMap<unsigned, MachineInstr *> IntegerInstructionMap;
584 
585   /// The vector of unsigned integers that the module is mapped to.
586   std::vector<unsigned> UnsignedVec;
587 
588   /// \brief Stores the location of the instruction associated with the integer
589   /// at index i in \p UnsignedVec for each index i.
590   std::vector<MachineBasicBlock::iterator> InstrList;
591 
592   /// \brief Maps \p *It to a legal integer.
593   ///
594   /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap,
595   /// \p IntegerInstructionMap, and \p LegalInstrNumber.
596   ///
597   /// \returns The integer that \p *It was mapped to.
598   unsigned mapToLegalUnsigned(MachineBasicBlock::iterator &It) {
599 
600     // Get the integer for this instruction or give it the current
601     // LegalInstrNumber.
602     InstrList.push_back(It);
603     MachineInstr &MI = *It;
604     bool WasInserted;
605     DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
606         ResultIt;
607     std::tie(ResultIt, WasInserted) =
608         InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
609     unsigned MINumber = ResultIt->second;
610 
611     // There was an insertion.
612     if (WasInserted) {
613       LegalInstrNumber++;
614       IntegerInstructionMap.insert(std::make_pair(MINumber, &MI));
615     }
616 
617     UnsignedVec.push_back(MINumber);
618 
619     // Make sure we don't overflow or use any integers reserved by the DenseMap.
620     if (LegalInstrNumber >= IllegalInstrNumber)
621       report_fatal_error("Instruction mapping overflow!");
622 
623     assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
624            "Tried to assign DenseMap tombstone or empty key to instruction.");
625     assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
626            "Tried to assign DenseMap tombstone or empty key to instruction.");
627 
628     return MINumber;
629   }
630 
631   /// Maps \p *It to an illegal integer.
632   ///
633   /// Updates \p InstrList, \p UnsignedVec, and \p IllegalInstrNumber.
634   ///
635   /// \returns The integer that \p *It was mapped to.
636   unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It) {
637     unsigned MINumber = IllegalInstrNumber;
638 
639     InstrList.push_back(It);
640     UnsignedVec.push_back(IllegalInstrNumber);
641     IllegalInstrNumber--;
642 
643     assert(LegalInstrNumber < IllegalInstrNumber &&
644            "Instruction mapping overflow!");
645 
646     assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
647            "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
648 
649     assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
650            "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
651 
652     return MINumber;
653   }
654 
655   /// \brief Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
656   /// and appends it to \p UnsignedVec and \p InstrList.
657   ///
658   /// Two instructions are assigned the same integer if they are identical.
659   /// If an instruction is deemed unsafe to outline, then it will be assigned an
660   /// unique integer. The resulting mapping is placed into a suffix tree and
661   /// queried for candidates.
662   ///
663   /// \param MBB The \p MachineBasicBlock to be translated into integers.
664   /// \param TRI \p TargetRegisterInfo for the module.
665   /// \param TII \p TargetInstrInfo for the module.
666   void convertToUnsignedVec(MachineBasicBlock &MBB,
667                             const TargetRegisterInfo &TRI,
668                             const TargetInstrInfo &TII) {
669     for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et;
670          It++) {
671 
672       // Keep track of where this instruction is in the module.
673       switch (TII.getOutliningType(*It)) {
674       case TargetInstrInfo::MachineOutlinerInstrType::Illegal:
675         mapToIllegalUnsigned(It);
676         break;
677 
678       case TargetInstrInfo::MachineOutlinerInstrType::Legal:
679         mapToLegalUnsigned(It);
680         break;
681 
682       case TargetInstrInfo::MachineOutlinerInstrType::Invisible:
683         break;
684       }
685     }
686 
687     // After we're done every insertion, uniquely terminate this part of the
688     // "string". This makes sure we won't match across basic block or function
689     // boundaries since the "end" is encoded uniquely and thus appears in no
690     // repeated substring.
691     InstrList.push_back(MBB.end());
692     UnsignedVec.push_back(IllegalInstrNumber);
693     IllegalInstrNumber--;
694   }
695 
696   InstructionMapper() {
697     // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
698     // changed.
699     assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
700            "DenseMapInfo<unsigned>'s empty key isn't -1!");
701     assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
702            "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
703   }
704 };
705 
706 /// \brief An interprocedural pass which finds repeated sequences of
707 /// instructions and replaces them with calls to functions.
708 ///
709 /// Each instruction is mapped to an unsigned integer and placed in a string.
710 /// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
711 /// is then repeatedly queried for repeated sequences of instructions. Each
712 /// non-overlapping repeated sequence is then placed in its own
713 /// \p MachineFunction and each instance is then replaced with a call to that
714 /// function.
715 struct MachineOutliner : public ModulePass {
716 
717   static char ID;
718 
719   StringRef getPassName() const override { return "Machine Outliner"; }
720 
721   void getAnalysisUsage(AnalysisUsage &AU) const override {
722     AU.addRequired<MachineModuleInfo>();
723     AU.addPreserved<MachineModuleInfo>();
724     AU.setPreservesAll();
725     ModulePass::getAnalysisUsage(AU);
726   }
727 
728   MachineOutliner() : ModulePass(ID) {
729     initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());
730   }
731 
732   /// Find all repeated substrings that satisfy the outlining cost model.
733   ///
734   /// If a substring appears at least twice, then it must be represented by
735   /// an internal node which appears in at least two suffixes. Each suffix is
736   /// represented by a leaf node. To do this, we visit each internal node in
737   /// the tree, using the leaf children of each internal node. If an internal
738   /// node represents a beneficial substring, then we use each of its leaf
739   /// children to find the locations of its substring.
740   ///
741   /// \param ST A suffix tree to query.
742   /// \param TII TargetInstrInfo for the target.
743   /// \param Mapper Contains outlining mapping information.
744   /// \param[out] CandidateList Filled with candidates representing each
745   /// beneficial substring.
746   /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each
747   /// type of candidate.
748   ///
749   /// \returns The length of the longest candidate found.
750   size_t findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
751                         InstructionMapper &Mapper,
752                         std::vector<Candidate> &CandidateList,
753                         std::vector<OutlinedFunction> &FunctionList);
754 
755   /// \brief Replace the sequences of instructions represented by the
756   /// \p Candidates in \p CandidateList with calls to \p MachineFunctions
757   /// described in \p FunctionList.
758   ///
759   /// \param M The module we are outlining from.
760   /// \param CandidateList A list of candidates to be outlined.
761   /// \param FunctionList A list of functions to be inserted into the module.
762   /// \param Mapper Contains the instruction mappings for the module.
763   bool outline(Module &M, const ArrayRef<Candidate> &CandidateList,
764                std::vector<OutlinedFunction> &FunctionList,
765                InstructionMapper &Mapper);
766 
767   /// Creates a function for \p OF and inserts it into the module.
768   MachineFunction *createOutlinedFunction(Module &M, const OutlinedFunction &OF,
769                                           InstructionMapper &Mapper);
770 
771   /// Find potential outlining candidates and store them in \p CandidateList.
772   ///
773   /// For each type of potential candidate, also build an \p OutlinedFunction
774   /// struct containing the information to build the function for that
775   /// candidate.
776   ///
777   /// \param[out] CandidateList Filled with outlining candidates for the module.
778   /// \param[out] FunctionList Filled with functions corresponding to each type
779   /// of \p Candidate.
780   /// \param ST The suffix tree for the module.
781   /// \param TII TargetInstrInfo for the module.
782   ///
783   /// \returns The length of the longest candidate found. 0 if there are none.
784   unsigned buildCandidateList(std::vector<Candidate> &CandidateList,
785                               std::vector<OutlinedFunction> &FunctionList,
786                               SuffixTree &ST, InstructionMapper &Mapper,
787                               const TargetInstrInfo &TII);
788 
789   /// \brief Remove any overlapping candidates that weren't handled by the
790   /// suffix tree's pruning method.
791   ///
792   /// Pruning from the suffix tree doesn't necessarily remove all overlaps.
793   /// If a short candidate is chosen for outlining, then a longer candidate
794   /// which has that short candidate as a suffix is chosen, the tree's pruning
795   /// method will not find it. Thus, we need to prune before outlining as well.
796   ///
797   /// \param[in,out] CandidateList A list of outlining candidates.
798   /// \param[in,out] FunctionList A list of functions to be outlined.
799   /// \param Mapper Contains instruction mapping info for outlining.
800   /// \param MaxCandidateLen The length of the longest candidate.
801   /// \param TII TargetInstrInfo for the module.
802   void pruneOverlaps(std::vector<Candidate> &CandidateList,
803                      std::vector<OutlinedFunction> &FunctionList,
804                      InstructionMapper &Mapper, unsigned MaxCandidateLen,
805                      const TargetInstrInfo &TII);
806 
807   /// Construct a suffix tree on the instructions in \p M and outline repeated
808   /// strings from that tree.
809   bool runOnModule(Module &M) override;
810 };
811 
812 } // Anonymous namespace.
813 
814 char MachineOutliner::ID = 0;
815 
816 namespace llvm {
817 ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); }
818 } // namespace llvm
819 
820 INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
821                 false)
822 
823 size_t
824 MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
825                                 InstructionMapper &Mapper,
826                                 std::vector<Candidate> &CandidateList,
827                                 std::vector<OutlinedFunction> &FunctionList) {
828 
829   CandidateList.clear();
830   FunctionList.clear();
831   size_t FnIdx = 0;
832   size_t MaxLen = 0;
833 
834   // FIXME: Visit internal nodes instead of leaves.
835   for (SuffixTreeNode *Leaf : ST.LeafVector) {
836     assert(Leaf && "Leaves in LeafVector cannot be null!");
837     if (!Leaf->IsInTree)
838       continue;
839 
840     assert(Leaf->Parent && "All leaves must have parents!");
841     SuffixTreeNode &Parent = *(Leaf->Parent);
842 
843     // If it doesn't appear enough, or we already outlined from it, skip it.
844     if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
845       continue;
846 
847     // Figure out if this candidate is beneficial.
848     size_t StringLen = Leaf->ConcatLen - Leaf->size();
849 
850     // Too short to be beneficial; skip it.
851     // FIXME: This isn't necessarily true for, say, X86. If we factor in
852     // instruction lengths we need more information than this.
853     if (StringLen < 2)
854       continue;
855 
856     size_t CallOverhead = 0;
857     size_t SequenceOverhead = StringLen;
858 
859     // If this is a beneficial class of candidate, then every one is stored in
860     // this vector.
861     std::vector<Candidate> CandidatesForRepeatedSeq;
862 
863     // Used for getOutliningFrameOverhead.
864     // FIXME: CandidatesForRepeatedSeq and this should be combined.
865     std::vector<
866         std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
867         CandidateClass;
868 
869     // Figure out the call overhead for each instance of the sequence.
870     for (auto &ChildPair : Parent.Children) {
871       SuffixTreeNode *M = ChildPair.second;
872 
873       if (M && M->IsInTree && M->isLeaf()) {
874         // Each sequence is over [StartIt, EndIt].
875         MachineBasicBlock::iterator StartIt = Mapper.InstrList[M->SuffixIdx];
876         MachineBasicBlock::iterator EndIt =
877             Mapper.InstrList[M->SuffixIdx + StringLen - 1];
878 
879         // Get the overhead for calling a function for this sequence and any
880         // target-specified data for how to construct the call.
881         std::pair<size_t, unsigned> CallOverheadPair =
882             TII.getOutliningCallOverhead(StartIt, EndIt);
883         CallOverhead += CallOverheadPair.first;
884         CandidatesForRepeatedSeq.emplace_back(M->SuffixIdx, StringLen, FnIdx,
885                                               CallOverheadPair.second);
886         CandidateClass.emplace_back(std::make_pair(StartIt, EndIt));
887 
888         // Never visit this leaf again.
889         M->IsInTree = false;
890       }
891     }
892 
893     std::pair<size_t, unsigned> FrameOverheadPair =
894         TII.getOutliningFrameOverhead(CandidateClass);
895     size_t FrameOverhead = FrameOverheadPair.first;
896 
897     size_t OutliningCost = CallOverhead + FrameOverhead + SequenceOverhead;
898     size_t NotOutliningCost = SequenceOverhead * Parent.OccurrenceCount;
899 
900     // Is it better to outline this candidate than not?
901     if (NotOutliningCost <= OutliningCost) {
902       // Outlining this candidate would take more instructions than not
903       // outlining.
904       // Emit a remark explaining why we didn't outline this candidate.
905       std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator> C =
906           CandidateClass[0];
907       MachineOptimizationRemarkEmitter MORE(
908           *(C.first->getParent()->getParent()), nullptr);
909       MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
910                                         C.first->getDebugLoc(),
911                                         C.first->getParent());
912       R << "Did not outline " << NV("Length", StringLen) << " instructions"
913         << " from " << NV("NumOccurrences", CandidateClass.size())
914         << " locations."
915         << " Instructions from outlining all occurrences ("
916         << NV("OutliningCost", OutliningCost) << ")"
917         << " >= Unoutlined instruction count ("
918         << NV("NotOutliningCost", NotOutliningCost) << ")"
919         << " (Also found at: ";
920 
921       // Tell the user the other places the candidate was found.
922       for (size_t i = 1, e = CandidateClass.size(); i < e; i++) {
923         R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
924                 CandidateClass[i].first->getDebugLoc());
925         if (i != e - 1)
926           R << ", ";
927       }
928 
929       R << ")";
930       MORE.emit(R);
931 
932       // Move to the next candidate.
933       continue;
934     }
935 
936     size_t Benefit = NotOutliningCost - OutliningCost;
937 
938     if (StringLen > MaxLen)
939       MaxLen = StringLen;
940 
941     // At this point, the candidate class is seen as beneficial. Set their
942     // benefit values and save them in the candidate list.
943     for (Candidate &C : CandidatesForRepeatedSeq) {
944       C.Benefit = Benefit;
945       CandidateList.push_back(C);
946     }
947 
948     // Save the function for the new candidate sequence.
949     std::vector<unsigned> CandidateSequence;
950     for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
951       CandidateSequence.push_back(ST.Str[i]);
952 
953     FunctionList.emplace_back(FnIdx, CandidatesForRepeatedSeq.size(),
954                               CandidateSequence, Benefit,
955                               FrameOverheadPair.second);
956 
957     // Move to the next function.
958     FnIdx++;
959     Parent.IsInTree = false;
960   }
961 
962   return MaxLen;
963 }
964 
965 void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
966                                     std::vector<OutlinedFunction> &FunctionList,
967                                     InstructionMapper &Mapper,
968                                     unsigned MaxCandidateLen,
969                                     const TargetInstrInfo &TII) {
970   // TODO: Experiment with interval trees or other interval-checking structures
971   // to lower the time complexity of this function.
972   // TODO: Can we do better than the simple greedy choice?
973   // Check for overlaps in the range.
974   // This is O(MaxCandidateLen * CandidateList.size()).
975   for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et;
976        It++) {
977     Candidate &C1 = *It;
978     OutlinedFunction &F1 = FunctionList[C1.FunctionIdx];
979 
980     // If we removed this candidate, skip it.
981     if (!C1.InCandidateList)
982       continue;
983 
984     // Is it still worth it to outline C1?
985     if (F1.Benefit < 1 || F1.OccurrenceCount < 2) {
986       assert(F1.OccurrenceCount > 0 &&
987              "Can't remove OutlinedFunction with no occurrences!");
988       F1.OccurrenceCount--;
989       C1.InCandidateList = false;
990       continue;
991     }
992 
993     // The minimum start index of any candidate that could overlap with this
994     // one.
995     unsigned FarthestPossibleIdx = 0;
996 
997     // Either the index is 0, or it's at most MaxCandidateLen indices away.
998     if (C1.StartIdx > MaxCandidateLen)
999       FarthestPossibleIdx = C1.StartIdx - MaxCandidateLen;
1000 
1001     // Compare against the candidates in the list that start at at most
1002     // FarthestPossibleIdx indices away from C1. There are at most
1003     // MaxCandidateLen of these.
1004     for (auto Sit = It + 1; Sit != Et; Sit++) {
1005       Candidate &C2 = *Sit;
1006       OutlinedFunction &F2 = FunctionList[C2.FunctionIdx];
1007 
1008       // Is this candidate too far away to overlap?
1009       if (C2.StartIdx < FarthestPossibleIdx)
1010         break;
1011 
1012       // Did we already remove this candidate in a previous step?
1013       if (!C2.InCandidateList)
1014         continue;
1015 
1016       // Is the function beneficial to outline?
1017       if (F2.OccurrenceCount < 2 || F2.Benefit < 1) {
1018         // If not, remove this candidate and move to the next one.
1019         assert(F2.OccurrenceCount > 0 &&
1020                "Can't remove OutlinedFunction with no occurrences!");
1021         F2.OccurrenceCount--;
1022         C2.InCandidateList = false;
1023         continue;
1024       }
1025 
1026       size_t C2End = C2.StartIdx + C2.Len - 1;
1027 
1028       // Do C1 and C2 overlap?
1029       //
1030       // Not overlapping:
1031       // High indices... [C1End ... C1Start][C2End ... C2Start] ...Low indices
1032       //
1033       // We sorted our candidate list so C2Start <= C1Start. We know that
1034       // C2End > C2Start since each candidate has length >= 2. Therefore, all we
1035       // have to check is C2End < C2Start to see if we overlap.
1036       if (C2End < C1.StartIdx)
1037         continue;
1038 
1039       // C1 and C2 overlap.
1040       // We need to choose the better of the two.
1041       //
1042       // Approximate this by picking the one which would have saved us the
1043       // most instructions before any pruning.
1044       if (C1.Benefit >= C2.Benefit) {
1045 
1046         // C1 is better, so remove C2 and update C2's OutlinedFunction to
1047         // reflect the removal.
1048         assert(F2.OccurrenceCount > 0 &&
1049                "Can't remove OutlinedFunction with no occurrences!");
1050         F2.OccurrenceCount--;
1051 
1052         // Remove the call overhead from the removed sequence.
1053         MachineBasicBlock::iterator StartIt = Mapper.InstrList[C2.StartIdx];
1054         MachineBasicBlock::iterator EndIt =
1055             Mapper.InstrList[C2.StartIdx + C2.Len - 1];
1056 
1057         F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt).first;
1058         // Add back one instance of the sequence.
1059 
1060         if (F2.Sequence.size() > F2.Benefit)
1061           F2.Benefit = 0;
1062         else
1063           F2.Benefit -= F2.Sequence.size();
1064 
1065         C2.InCandidateList = false;
1066 
1067         DEBUG(dbgs() << "- Removed C2. \n";
1068               dbgs() << "--- Num fns left for C2: " << F2.OccurrenceCount
1069                      << "\n";
1070               dbgs() << "--- C2's benefit: " << F2.Benefit << "\n";);
1071 
1072       } else {
1073         // C2 is better, so remove C1 and update C1's OutlinedFunction to
1074         // reflect the removal.
1075         assert(F1.OccurrenceCount > 0 &&
1076                "Can't remove OutlinedFunction with no occurrences!");
1077         F1.OccurrenceCount--;
1078 
1079         // Remove the call overhead from the removed sequence.
1080         MachineBasicBlock::iterator StartIt = Mapper.InstrList[C1.StartIdx];
1081         MachineBasicBlock::iterator EndIt =
1082             Mapper.InstrList[C1.StartIdx + C1.Len - 1];
1083 
1084         F1.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt).first;
1085 
1086         // Add back one instance of the sequence.
1087         if (F1.Sequence.size() > F1.Benefit)
1088           F1.Benefit = 0;
1089         else
1090           F1.Benefit -= F1.Sequence.size();
1091 
1092         C1.InCandidateList = false;
1093 
1094         DEBUG(dbgs() << "- Removed C1. \n";
1095               dbgs() << "--- Num fns left for C1: " << F1.OccurrenceCount
1096                      << "\n";
1097               dbgs() << "--- C1's benefit: " << F1.Benefit << "\n";);
1098 
1099         // C1 is out, so we don't have to compare it against anyone else.
1100         break;
1101       }
1102     }
1103   }
1104 }
1105 
1106 unsigned
1107 MachineOutliner::buildCandidateList(std::vector<Candidate> &CandidateList,
1108                                     std::vector<OutlinedFunction> &FunctionList,
1109                                     SuffixTree &ST, InstructionMapper &Mapper,
1110                                     const TargetInstrInfo &TII) {
1111 
1112   std::vector<unsigned> CandidateSequence; // Current outlining candidate.
1113   size_t MaxCandidateLen = 0;              // Length of the longest candidate.
1114 
1115   MaxCandidateLen =
1116       findCandidates(ST, TII, Mapper, CandidateList, FunctionList);
1117 
1118   // Sort the candidates in decending order. This will simplify the outlining
1119   // process when we have to remove the candidates from the mapping by
1120   // allowing us to cut them out without keeping track of an offset.
1121   std::stable_sort(CandidateList.begin(), CandidateList.end());
1122 
1123   return MaxCandidateLen;
1124 }
1125 
1126 MachineFunction *
1127 MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
1128                                         InstructionMapper &Mapper) {
1129 
1130   // Create the function name. This should be unique. For now, just hash the
1131   // module name and include it in the function name plus the number of this
1132   // function.
1133   std::ostringstream NameStream;
1134   NameStream << "OUTLINED_FUNCTION_" << OF.Name;
1135 
1136   // Create the function using an IR-level function.
1137   LLVMContext &C = M.getContext();
1138   Function *F = dyn_cast<Function>(
1139       M.getOrInsertFunction(NameStream.str(), Type::getVoidTy(C)));
1140   assert(F && "Function was null!");
1141 
1142   // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
1143   // which gives us better results when we outline from linkonceodr functions.
1144   F->setLinkage(GlobalValue::PrivateLinkage);
1145   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1146 
1147   BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
1148   IRBuilder<> Builder(EntryBB);
1149   Builder.CreateRetVoid();
1150 
1151   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
1152   MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
1153   MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
1154   const TargetSubtargetInfo &STI = MF.getSubtarget();
1155   const TargetInstrInfo &TII = *STI.getInstrInfo();
1156 
1157   // Insert the new function into the module.
1158   MF.insert(MF.begin(), &MBB);
1159 
1160   TII.insertOutlinerPrologue(MBB, MF, OF.FrameClass);
1161 
1162   // Copy over the instructions for the function using the integer mappings in
1163   // its sequence.
1164   for (unsigned Str : OF.Sequence) {
1165     MachineInstr *NewMI =
1166         MF.CloneMachineInstr(Mapper.IntegerInstructionMap.find(Str)->second);
1167     NewMI->dropMemRefs();
1168 
1169     // Don't keep debug information for outlined instructions.
1170     // FIXME: This means outlined functions are currently undebuggable.
1171     NewMI->setDebugLoc(DebugLoc());
1172     MBB.insert(MBB.end(), NewMI);
1173   }
1174 
1175   TII.insertOutlinerEpilogue(MBB, MF, OF.FrameClass);
1176 
1177   return &MF;
1178 }
1179 
1180 bool MachineOutliner::outline(Module &M,
1181                               const ArrayRef<Candidate> &CandidateList,
1182                               std::vector<OutlinedFunction> &FunctionList,
1183                               InstructionMapper &Mapper) {
1184 
1185   bool OutlinedSomething = false;
1186 
1187   // Replace the candidates with calls to their respective outlined functions.
1188   for (const Candidate &C : CandidateList) {
1189 
1190     // Was the candidate removed during pruneOverlaps?
1191     if (!C.InCandidateList)
1192       continue;
1193 
1194     // If not, then look at its OutlinedFunction.
1195     OutlinedFunction &OF = FunctionList[C.FunctionIdx];
1196 
1197     // Was its OutlinedFunction made unbeneficial during pruneOverlaps?
1198     if (OF.OccurrenceCount < 2 || OF.Benefit < 1)
1199       continue;
1200 
1201     // If not, then outline it.
1202     assert(C.StartIdx < Mapper.InstrList.size() && "Candidate out of bounds!");
1203     MachineBasicBlock *MBB = (*Mapper.InstrList[C.StartIdx]).getParent();
1204     MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.StartIdx];
1205     unsigned EndIdx = C.StartIdx + C.Len - 1;
1206 
1207     assert(EndIdx < Mapper.InstrList.size() && "Candidate out of bounds!");
1208     MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
1209     assert(EndIt != MBB->end() && "EndIt out of bounds!");
1210 
1211     EndIt++; // Erase needs one past the end index.
1212 
1213     // Does this candidate have a function yet?
1214     if (!OF.MF) {
1215       OF.MF = createOutlinedFunction(M, OF, Mapper);
1216       FunctionsCreated++;
1217     }
1218 
1219     MachineFunction *MF = OF.MF;
1220     const TargetSubtargetInfo &STI = MF->getSubtarget();
1221     const TargetInstrInfo &TII = *STI.getInstrInfo();
1222 
1223     // Insert a call to the new function and erase the old sequence.
1224     TII.insertOutlinedCall(M, *MBB, StartIt, *MF, C.CallClass);
1225     StartIt = Mapper.InstrList[C.StartIdx];
1226     MBB->erase(StartIt, EndIt);
1227 
1228     OutlinedSomething = true;
1229 
1230     // Statistics.
1231     NumOutlined++;
1232   }
1233 
1234   DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
1235 
1236   return OutlinedSomething;
1237 }
1238 
1239 bool MachineOutliner::runOnModule(Module &M) {
1240 
1241   // Is there anything in the module at all?
1242   if (M.empty())
1243     return false;
1244 
1245   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
1246   const TargetSubtargetInfo &STI =
1247       MMI.getOrCreateMachineFunction(*M.begin()).getSubtarget();
1248   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
1249   const TargetInstrInfo *TII = STI.getInstrInfo();
1250 
1251   InstructionMapper Mapper;
1252 
1253   // Build instruction mappings for each function in the module.
1254   for (Function &F : M) {
1255     MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
1256 
1257     // Is the function empty? Safe to outline from?
1258     if (F.empty() || !TII->isFunctionSafeToOutlineFrom(MF))
1259       continue;
1260 
1261     // If it is, look at each MachineBasicBlock in the function.
1262     for (MachineBasicBlock &MBB : MF) {
1263 
1264       // Is there anything in MBB?
1265       if (MBB.empty())
1266         continue;
1267 
1268       // If yes, map it.
1269       Mapper.convertToUnsignedVec(MBB, *TRI, *TII);
1270     }
1271   }
1272 
1273   // Construct a suffix tree, use it to find candidates, and then outline them.
1274   SuffixTree ST(Mapper.UnsignedVec);
1275   std::vector<Candidate> CandidateList;
1276   std::vector<OutlinedFunction> FunctionList;
1277 
1278   // Find all of the outlining candidates.
1279   unsigned MaxCandidateLen =
1280       buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII);
1281 
1282   // Remove candidates that overlap with other candidates.
1283   pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII);
1284 
1285   // Outline each of the candidates and return true if something was outlined.
1286   return outline(M, CandidateList, FunctionList, Mapper);
1287 }
1288