1 //===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 // Implementation for the IROutliner which is used by the IROutliner Pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/IPO/IROutliner.h"
15 #include "llvm/Analysis/IRSimilarityIdentifier.h"
16 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
17 #include "llvm/Analysis/TargetTransformInfo.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/DebugInfoMetadata.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/Dominators.h"
22 #include "llvm/IR/Mangler.h"
23 #include "llvm/IR/PassManager.h"
24 #include "llvm/InitializePasses.h"
25 #include "llvm/Pass.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Transforms/IPO.h"
28 #include <map>
29 #include <set>
30 #include <vector>
31 
32 #define DEBUG_TYPE "iroutliner"
33 
34 using namespace llvm;
35 using namespace IRSimilarity;
36 
37 // A command flag to be used for debugging to exclude branches from similarity
38 // matching and outlining.
39 namespace llvm {
40 extern cl::opt<bool> DisableBranches;
41 } // namespace llvm
42 
43 // Set to true if the user wants the ir outliner to run on linkonceodr linkage
44 // functions. This is false by default because the linker can dedupe linkonceodr
45 // functions. Since the outliner is confined to a single module (modulo LTO),
46 // this is off by default. It should, however, be the default behavior in
47 // LTO.
48 static cl::opt<bool> EnableLinkOnceODRIROutlining(
49     "enable-linkonceodr-ir-outlining", cl::Hidden,
50     cl::desc("Enable the IR outliner on linkonceodr functions"),
51     cl::init(false));
52 
53 // This is a debug option to test small pieces of code to ensure that outlining
54 // works correctly.
55 static cl::opt<bool> NoCostModel(
56     "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
57     cl::desc("Debug option to outline greedily, without restriction that "
58              "calculated benefit outweighs cost"));
59 
60 /// The OutlinableGroup holds all the overarching information for outlining
61 /// a set of regions that are structurally similar to one another, such as the
62 /// types of the overall function, the output blocks, the sets of stores needed
63 /// and a list of the different regions. This information is used in the
64 /// deduplication of extracted regions with the same structure.
65 struct OutlinableGroup {
66   /// The sections that could be outlined
67   std::vector<OutlinableRegion *> Regions;
68 
69   /// The argument types for the function created as the overall function to
70   /// replace the extracted function for each region.
71   std::vector<Type *> ArgumentTypes;
72   /// The FunctionType for the overall function.
73   FunctionType *OutlinedFunctionType = nullptr;
74   /// The Function for the collective overall function.
75   Function *OutlinedFunction = nullptr;
76 
77   /// Flag for whether we should not consider this group of OutlinableRegions
78   /// for extraction.
79   bool IgnoreGroup = false;
80 
81   /// The return blocks for the overall function.
82   DenseMap<Value *, BasicBlock *> EndBBs;
83 
84   /// The PHIBlocks with their corresponding return block based on the return
85   /// value as the key.
86   DenseMap<Value *, BasicBlock *> PHIBlocks;
87 
88   /// A set containing the different GVN store sets needed. Each array contains
89   /// a sorted list of the different values that need to be stored into output
90   /// registers.
91   DenseSet<ArrayRef<unsigned>> OutputGVNCombinations;
92 
93   /// Flag for whether the \ref ArgumentTypes have been defined after the
94   /// extraction of the first region.
95   bool InputTypesSet = false;
96 
97   /// The number of input values in \ref ArgumentTypes.  Anything after this
98   /// index in ArgumentTypes is an output argument.
99   unsigned NumAggregateInputs = 0;
100 
101   /// The mapping of the canonical numbering of the values in outlined sections
102   /// to specific arguments.
103   DenseMap<unsigned, unsigned> CanonicalNumberToAggArg;
104 
105   /// The number of branches in the region target a basic block that is outside
106   /// of the region.
107   unsigned BranchesToOutside = 0;
108 
109   /// The number of instructions that will be outlined by extracting \ref
110   /// Regions.
111   InstructionCost Benefit = 0;
112   /// The number of added instructions needed for the outlining of the \ref
113   /// Regions.
114   InstructionCost Cost = 0;
115 
116   /// The argument that needs to be marked with the swifterr attribute.  If not
117   /// needed, there is no value.
118   Optional<unsigned> SwiftErrorArgument;
119 
120   /// For the \ref Regions, we look at every Value.  If it is a constant,
121   /// we check whether it is the same in Region.
122   ///
123   /// \param [in,out] NotSame contains the global value numbers where the
124   /// constant is not always the same, and must be passed in as an argument.
125   void findSameConstants(DenseSet<unsigned> &NotSame);
126 
127   /// For the regions, look at each set of GVN stores needed and account for
128   /// each combination.  Add an argument to the argument types if there is
129   /// more than one combination.
130   ///
131   /// \param [in] M - The module we are outlining from.
132   void collectGVNStoreSets(Module &M);
133 };
134 
135 /// Move the contents of \p SourceBB to before the last instruction of \p
136 /// TargetBB.
137 /// \param SourceBB - the BasicBlock to pull Instructions from.
138 /// \param TargetBB - the BasicBlock to put Instruction into.
139 static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
140   for (Instruction &I : llvm::make_early_inc_range(SourceBB))
141     I.moveBefore(TargetBB, TargetBB.end());
142 }
143 
144 /// A function to sort the keys of \p Map, which must be a mapping of constant
145 /// values to basic blocks and return it in \p SortedKeys
146 ///
147 /// \param SortedKeys - The vector the keys will be return in and sorted.
148 /// \param Map - The DenseMap containing keys to sort.
149 static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
150                                   DenseMap<Value *, BasicBlock *> &Map) {
151   for (auto &VtoBB : Map)
152     SortedKeys.push_back(VtoBB.first);
153 
154   stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
155     const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS);
156     const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
157     assert(RHSC && "Not a constant integer in return value?");
158     assert(LHSC && "Not a constant integer in return value?");
159 
160     return LHSC->getLimitedValue() < RHSC->getLimitedValue();
161   });
162 }
163 
164 Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
165                                                   Value *V) {
166   Optional<unsigned> GVN = Candidate->getGVN(V);
167   assert(GVN.hasValue() && "No GVN for incoming value");
168   Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
169   Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum);
170   Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
171   return FoundValueOpt.getValueOr(nullptr);
172 }
173 
174 void OutlinableRegion::splitCandidate() {
175   assert(!CandidateSplit && "Candidate already split!");
176 
177   Instruction *BackInst = Candidate->backInstruction();
178 
179   Instruction *EndInst = nullptr;
180   // Check whether the last instruction is a terminator, if it is, we do
181   // not split on the following instruction. We leave the block as it is.  We
182   // also check that this is not the last instruction in the Module, otherwise
183   // the check for whether the current following instruction matches the
184   // previously recorded instruction will be incorrect.
185   if (!BackInst->isTerminator() ||
186       BackInst->getParent() != &BackInst->getFunction()->back()) {
187     EndInst = Candidate->end()->Inst;
188     assert(EndInst && "Expected an end instruction?");
189   }
190 
191   // We check if the current instruction following the last instruction in the
192   // region is the same as the recorded instruction following the last
193   // instruction. If they do not match, there could be problems in rewriting
194   // the program after outlining, so we ignore it.
195   if (!BackInst->isTerminator() &&
196       EndInst != BackInst->getNextNonDebugInstruction())
197     return;
198 
199   Instruction *StartInst = (*Candidate->begin()).Inst;
200   assert(StartInst && "Expected a start instruction?");
201   StartBB = StartInst->getParent();
202   PrevBB = StartBB;
203 
204   // The basic block gets split like so:
205   // block:                 block:
206   //   inst1                  inst1
207   //   inst2                  inst2
208   //   region1               br block_to_outline
209   //   region2              block_to_outline:
210   //   region3          ->    region1
211   //   region4                region2
212   //   inst3                  region3
213   //   inst4                  region4
214   //                          br block_after_outline
215   //                        block_after_outline:
216   //                          inst3
217   //                          inst4
218 
219   std::string OriginalName = PrevBB->getName().str();
220 
221   StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
222   PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
223 
224   CandidateSplit = true;
225   if (!BackInst->isTerminator()) {
226     EndBB = EndInst->getParent();
227     FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
228     EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
229     FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
230     return;
231   }
232 
233   EndBB = BackInst->getParent();
234   EndsInBranch = true;
235   FollowBB = nullptr;
236 }
237 
238 void OutlinableRegion::reattachCandidate() {
239   assert(CandidateSplit && "Candidate is not split!");
240 
241   // The basic block gets reattached like so:
242   // block:                        block:
243   //   inst1                         inst1
244   //   inst2                         inst2
245   //   br block_to_outline           region1
246   // block_to_outline:        ->     region2
247   //   region1                       region3
248   //   region2                       region4
249   //   region3                       inst3
250   //   region4                       inst4
251   //   br block_after_outline
252   // block_after_outline:
253   //   inst3
254   //   inst4
255   assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
256 
257   // StartBB should only have one predecessor since we put an unconditional
258   // branch at the end of PrevBB when we split the BasicBlock.
259   PrevBB = StartBB->getSinglePredecessor();
260   assert(PrevBB != nullptr &&
261          "No Predecessor for the region start basic block!");
262 
263   assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
264   PrevBB->getTerminator()->eraseFromParent();
265 
266   moveBBContents(*StartBB, *PrevBB);
267 
268   BasicBlock *PlacementBB = PrevBB;
269   if (StartBB != EndBB)
270     PlacementBB = EndBB;
271   if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
272     assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
273     assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
274     PlacementBB->getTerminator()->eraseFromParent();
275     moveBBContents(*FollowBB, *PlacementBB);
276     PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
277     FollowBB->eraseFromParent();
278   }
279 
280   PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
281   StartBB->eraseFromParent();
282 
283   // Make sure to save changes back to the StartBB.
284   StartBB = PrevBB;
285   EndBB = nullptr;
286   PrevBB = nullptr;
287   FollowBB = nullptr;
288 
289   CandidateSplit = false;
290 }
291 
292 /// Find whether \p V matches the Constants previously found for the \p GVN.
293 ///
294 /// \param V - The value to check for consistency.
295 /// \param GVN - The global value number assigned to \p V.
296 /// \param GVNToConstant - The mapping of global value number to Constants.
297 /// \returns true if the Value matches the Constant mapped to by V and false if
298 /// it \p V is a Constant but does not match.
299 /// \returns None if \p V is not a Constant.
300 static Optional<bool>
301 constantMatches(Value *V, unsigned GVN,
302                 DenseMap<unsigned, Constant *> &GVNToConstant) {
303   // See if we have a constants
304   Constant *CST = dyn_cast<Constant>(V);
305   if (!CST)
306     return None;
307 
308   // Holds a mapping from a global value number to a Constant.
309   DenseMap<unsigned, Constant *>::iterator GVNToConstantIt;
310   bool Inserted;
311 
312 
313   // If we have a constant, try to make a new entry in the GVNToConstant.
314   std::tie(GVNToConstantIt, Inserted) =
315       GVNToConstant.insert(std::make_pair(GVN, CST));
316   // If it was found and is not equal, it is not the same. We do not
317   // handle this case yet, and exit early.
318   if (Inserted || (GVNToConstantIt->second == CST))
319     return true;
320 
321   return false;
322 }
323 
324 InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
325   InstructionCost Benefit = 0;
326 
327   // Estimate the benefit of outlining a specific sections of the program.  We
328   // delegate mostly this task to the TargetTransformInfo so that if the target
329   // has specific changes, we can have a more accurate estimate.
330 
331   // However, getInstructionCost delegates the code size calculation for
332   // arithmetic instructions to getArithmeticInstrCost in
333   // include/Analysis/TargetTransformImpl.h, where it always estimates that the
334   // code size for a division and remainder instruction to be equal to 4, and
335   // everything else to 1.  This is not an accurate representation of the
336   // division instruction for targets that have a native division instruction.
337   // To be overly conservative, we only add 1 to the number of instructions for
338   // each division instruction.
339   for (IRInstructionData &ID : *Candidate) {
340     Instruction *I = ID.Inst;
341     switch (I->getOpcode()) {
342     case Instruction::FDiv:
343     case Instruction::FRem:
344     case Instruction::SDiv:
345     case Instruction::SRem:
346     case Instruction::UDiv:
347     case Instruction::URem:
348       Benefit += 1;
349       break;
350     default:
351       Benefit += TTI.getInstructionCost(I, TargetTransformInfo::TCK_CodeSize);
352       break;
353     }
354   }
355 
356   return Benefit;
357 }
358 
359 /// Find whether \p Region matches the global value numbering to Constant
360 /// mapping found so far.
361 ///
362 /// \param Region - The OutlinableRegion we are checking for constants
363 /// \param GVNToConstant - The mapping of global value number to Constants.
364 /// \param NotSame - The set of global value numbers that do not have the same
365 /// constant in each region.
366 /// \returns true if all Constants are the same in every use of a Constant in \p
367 /// Region and false if not
368 static bool
369 collectRegionsConstants(OutlinableRegion &Region,
370                         DenseMap<unsigned, Constant *> &GVNToConstant,
371                         DenseSet<unsigned> &NotSame) {
372   bool ConstantsTheSame = true;
373 
374   IRSimilarityCandidate &C = *Region.Candidate;
375   for (IRInstructionData &ID : C) {
376 
377     // Iterate over the operands in an instruction. If the global value number,
378     // assigned by the IRSimilarityCandidate, has been seen before, we check if
379     // the the number has been found to be not the same value in each instance.
380     for (Value *V : ID.OperVals) {
381       Optional<unsigned> GVNOpt = C.getGVN(V);
382       assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
383       unsigned GVN = GVNOpt.getValue();
384 
385       // Check if this global value has been found to not be the same already.
386       if (NotSame.contains(GVN)) {
387         if (isa<Constant>(V))
388           ConstantsTheSame = false;
389         continue;
390       }
391 
392       // If it has been the same so far, we check the value for if the
393       // associated Constant value match the previous instances of the same
394       // global value number.  If the global value does not map to a Constant,
395       // it is considered to not be the same value.
396       Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
397       if (ConstantMatches.hasValue()) {
398         if (ConstantMatches.getValue())
399           continue;
400         else
401           ConstantsTheSame = false;
402       }
403 
404       // While this value is a register, it might not have been previously,
405       // make sure we don't already have a constant mapped to this global value
406       // number.
407       if (GVNToConstant.find(GVN) != GVNToConstant.end())
408         ConstantsTheSame = false;
409 
410       NotSame.insert(GVN);
411     }
412   }
413 
414   return ConstantsTheSame;
415 }
416 
417 void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
418   DenseMap<unsigned, Constant *> GVNToConstant;
419 
420   for (OutlinableRegion *Region : Regions)
421     collectRegionsConstants(*Region, GVNToConstant, NotSame);
422 }
423 
424 void OutlinableGroup::collectGVNStoreSets(Module &M) {
425   for (OutlinableRegion *OS : Regions)
426     OutputGVNCombinations.insert(OS->GVNStores);
427 
428   // We are adding an extracted argument to decide between which output path
429   // to use in the basic block.  It is used in a switch statement and only
430   // needs to be an integer.
431   if (OutputGVNCombinations.size() > 1)
432     ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
433 }
434 
435 /// Get the subprogram if it exists for one of the outlined regions.
436 ///
437 /// \param [in] Group - The set of regions to find a subprogram for.
438 /// \returns the subprogram if it exists, or nullptr.
439 static DISubprogram *getSubprogramOrNull(OutlinableGroup &Group) {
440   for (OutlinableRegion *OS : Group.Regions)
441     if (Function *F = OS->Call->getFunction())
442       if (DISubprogram *SP = F->getSubprogram())
443         return SP;
444 
445   return nullptr;
446 }
447 
448 Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
449                                      unsigned FunctionNameSuffix) {
450   assert(!Group.OutlinedFunction && "Function is already defined!");
451 
452   Type *RetTy = Type::getVoidTy(M.getContext());
453   // All extracted functions _should_ have the same return type at this point
454   // since the similarity identifier ensures that all branches outside of the
455   // region occur in the same place.
456 
457   // NOTE: Should we ever move to the model that uses a switch at every point
458   // needed, meaning that we could branch within the region or out, it is
459   // possible that we will need to switch to using the most general case all of
460   // the time.
461   for (OutlinableRegion *R : Group.Regions) {
462     Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
463     if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
464         (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
465       RetTy = ExtractedFuncType;
466   }
467 
468   Group.OutlinedFunctionType = FunctionType::get(
469       RetTy, Group.ArgumentTypes, false);
470 
471   // These functions will only be called from within the same module, so
472   // we can set an internal linkage.
473   Group.OutlinedFunction = Function::Create(
474       Group.OutlinedFunctionType, GlobalValue::InternalLinkage,
475       "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
476 
477   // Transfer the swifterr attribute to the correct function parameter.
478   if (Group.SwiftErrorArgument.hasValue())
479     Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(),
480                                          Attribute::SwiftError);
481 
482   Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
483   Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
484 
485   // If there's a DISubprogram associated with this outlined function, then
486   // emit debug info for the outlined function.
487   if (DISubprogram *SP = getSubprogramOrNull(Group)) {
488     Function *F = Group.OutlinedFunction;
489     // We have a DISubprogram. Get its DICompileUnit.
490     DICompileUnit *CU = SP->getUnit();
491     DIBuilder DB(M, true, CU);
492     DIFile *Unit = SP->getFile();
493     Mangler Mg;
494     // Get the mangled name of the function for the linkage name.
495     std::string Dummy;
496     llvm::raw_string_ostream MangledNameStream(Dummy);
497     Mg.getNameWithPrefix(MangledNameStream, F, false);
498 
499     DISubprogram *OutlinedSP = DB.createFunction(
500         Unit /* Context */, F->getName(), MangledNameStream.str(),
501         Unit /* File */,
502         0 /* Line 0 is reserved for compiler-generated code. */,
503         DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
504         0, /* Line 0 is reserved for compiler-generated code. */
505         DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
506         /* Outlined code is optimized code by definition. */
507         DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
508 
509     // Don't add any new variables to the subprogram.
510     DB.finalizeSubprogram(OutlinedSP);
511 
512     // Attach subprogram to the function.
513     F->setSubprogram(OutlinedSP);
514     // We're done with the DIBuilder.
515     DB.finalize();
516   }
517 
518   return Group.OutlinedFunction;
519 }
520 
521 /// Move each BasicBlock in \p Old to \p New.
522 ///
523 /// \param [in] Old - The function to move the basic blocks from.
524 /// \param [in] New - The function to move the basic blocks to.
525 /// \param [out] NewEnds - The return blocks of the new overall function.
526 static void moveFunctionData(Function &Old, Function &New,
527                              DenseMap<Value *, BasicBlock *> &NewEnds) {
528   for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
529     CurrBB.removeFromParent();
530     CurrBB.insertInto(&New);
531     Instruction *I = CurrBB.getTerminator();
532 
533     // For each block we find a return instruction is, it is a potential exit
534     // path for the function.  We keep track of each block based on the return
535     // value here.
536     if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
537       NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
538 
539     std::vector<Instruction *> DebugInsts;
540 
541     for (Instruction &Val : CurrBB) {
542       // We must handle the scoping of called functions differently than
543       // other outlined instructions.
544       if (!isa<CallInst>(&Val)) {
545         // Remove the debug information for outlined functions.
546         Val.setDebugLoc(DebugLoc());
547         continue;
548       }
549 
550       // From this point we are only handling call instructions.
551       CallInst *CI = cast<CallInst>(&Val);
552 
553       // We add any debug statements here, to be removed after.  Since the
554       // instructions originate from many different locations in the program,
555       // it will cause incorrect reporting from a debugger if we keep the
556       // same debug instructions.
557       if (isa<DbgInfoIntrinsic>(CI)) {
558         DebugInsts.push_back(&Val);
559         continue;
560       }
561 
562       // Edit the scope of called functions inside of outlined functions.
563       if (DISubprogram *SP = New.getSubprogram()) {
564         DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
565         Val.setDebugLoc(DI);
566       }
567     }
568 
569     for (Instruction *I : DebugInsts)
570       I->eraseFromParent();
571   }
572 
573   assert(NewEnds.size() > 0 && "No return instruction for new function?");
574 }
575 
576 /// Find the the constants that will need to be lifted into arguments
577 /// as they are not the same in each instance of the region.
578 ///
579 /// \param [in] C - The IRSimilarityCandidate containing the region we are
580 /// analyzing.
581 /// \param [in] NotSame - The set of global value numbers that do not have a
582 /// single Constant across all OutlinableRegions similar to \p C.
583 /// \param [out] Inputs - The list containing the global value numbers of the
584 /// arguments needed for the region of code.
585 static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame,
586                           std::vector<unsigned> &Inputs) {
587   DenseSet<unsigned> Seen;
588   // Iterate over the instructions, and find what constants will need to be
589   // extracted into arguments.
590   for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
591        IDIt != EndIDIt; IDIt++) {
592     for (Value *V : (*IDIt).OperVals) {
593       // Since these are stored before any outlining, they will be in the
594       // global value numbering.
595       unsigned GVN = C.getGVN(V).getValue();
596       if (isa<Constant>(V))
597         if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
598           Inputs.push_back(GVN);
599           Seen.insert(GVN);
600         }
601     }
602   }
603 }
604 
605 /// Find the GVN for the inputs that have been found by the CodeExtractor.
606 ///
607 /// \param [in] C - The IRSimilarityCandidate containing the region we are
608 /// analyzing.
609 /// \param [in] CurrentInputs - The set of inputs found by the
610 /// CodeExtractor.
611 /// \param [in] OutputMappings - The mapping of values that have been replaced
612 /// by a new output value.
613 /// \param [out] EndInputNumbers - The global value numbers for the extracted
614 /// arguments.
615 static void mapInputsToGVNs(IRSimilarityCandidate &C,
616                             SetVector<Value *> &CurrentInputs,
617                             const DenseMap<Value *, Value *> &OutputMappings,
618                             std::vector<unsigned> &EndInputNumbers) {
619   // Get the Global Value Number for each input.  We check if the Value has been
620   // replaced by a different value at output, and use the original value before
621   // replacement.
622   for (Value *Input : CurrentInputs) {
623     assert(Input && "Have a nullptr as an input");
624     if (OutputMappings.find(Input) != OutputMappings.end())
625       Input = OutputMappings.find(Input)->second;
626     assert(C.getGVN(Input).hasValue() &&
627            "Could not find a numbering for the given input");
628     EndInputNumbers.push_back(C.getGVN(Input).getValue());
629   }
630 }
631 
632 /// Find the original value for the \p ArgInput values if any one of them was
633 /// replaced during a previous extraction.
634 ///
635 /// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
636 /// \param [in] OutputMappings - The mapping of values that have been replaced
637 /// by a new output value.
638 /// \param [out] RemappedArgInputs - The remapped values according to
639 /// \p OutputMappings that will be extracted.
640 static void
641 remapExtractedInputs(const ArrayRef<Value *> ArgInputs,
642                      const DenseMap<Value *, Value *> &OutputMappings,
643                      SetVector<Value *> &RemappedArgInputs) {
644   // Get the global value number for each input that will be extracted as an
645   // argument by the code extractor, remapping if needed for reloaded values.
646   for (Value *Input : ArgInputs) {
647     if (OutputMappings.find(Input) != OutputMappings.end())
648       Input = OutputMappings.find(Input)->second;
649     RemappedArgInputs.insert(Input);
650   }
651 }
652 
653 /// Find the input GVNs and the output values for a region of Instructions.
654 /// Using the code extractor, we collect the inputs to the extracted function.
655 ///
656 /// The \p Region can be identified as needing to be ignored in this function.
657 /// It should be checked whether it should be ignored after a call to this
658 /// function.
659 ///
660 /// \param [in,out] Region - The region of code to be analyzed.
661 /// \param [out] InputGVNs - The global value numbers for the extracted
662 /// arguments.
663 /// \param [in] NotSame - The global value numbers in the region that do not
664 /// have the same constant value in the regions structurally similar to
665 /// \p Region.
666 /// \param [in] OutputMappings - The mapping of values that have been replaced
667 /// by a new output value after extraction.
668 /// \param [out] ArgInputs - The values of the inputs to the extracted function.
669 /// \param [out] Outputs - The set of values extracted by the CodeExtractor
670 /// as outputs.
671 static void getCodeExtractorArguments(
672     OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
673     DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
674     SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
675   IRSimilarityCandidate &C = *Region.Candidate;
676 
677   // OverallInputs are the inputs to the region found by the CodeExtractor,
678   // SinkCands and HoistCands are used by the CodeExtractor to find sunken
679   // allocas of values whose lifetimes are contained completely within the
680   // outlined region. PremappedInputs are the arguments found by the
681   // CodeExtractor, removing conditions such as sunken allocas, but that
682   // may need to be remapped due to the extracted output values replacing
683   // the original values. We use DummyOutputs for this first run of finding
684   // inputs and outputs since the outputs could change during findAllocas,
685   // the correct set of extracted outputs will be in the final Outputs ValueSet.
686   SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
687       DummyOutputs;
688 
689   // Use the code extractor to get the inputs and outputs, without sunken
690   // allocas or removing llvm.assumes.
691   CodeExtractor *CE = Region.CE;
692   CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
693   assert(Region.StartBB && "Region must have a start BasicBlock!");
694   Function *OrigF = Region.StartBB->getParent();
695   CodeExtractorAnalysisCache CEAC(*OrigF);
696   BasicBlock *Dummy = nullptr;
697 
698   // The region may be ineligible due to VarArgs in the parent function. In this
699   // case we ignore the region.
700   if (!CE->isEligible()) {
701     Region.IgnoreRegion = true;
702     return;
703   }
704 
705   // Find if any values are going to be sunk into the function when extracted
706   CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
707   CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
708 
709   // TODO: Support regions with sunken allocas: values whose lifetimes are
710   // contained completely within the outlined region.  These are not guaranteed
711   // to be the same in every region, so we must elevate them all to arguments
712   // when they appear.  If these values are not equal, it means there is some
713   // Input in OverallInputs that was removed for ArgInputs.
714   if (OverallInputs.size() != PremappedInputs.size()) {
715     Region.IgnoreRegion = true;
716     return;
717   }
718 
719   findConstants(C, NotSame, InputGVNs);
720 
721   mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
722 
723   remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
724                        ArgInputs);
725 
726   // Sort the GVNs, since we now have constants included in the \ref InputGVNs
727   // we need to make sure they are in a deterministic order.
728   stable_sort(InputGVNs);
729 }
730 
731 /// Look over the inputs and map each input argument to an argument in the
732 /// overall function for the OutlinableRegions.  This creates a way to replace
733 /// the arguments of the extracted function with the arguments of the new
734 /// overall function.
735 ///
736 /// \param [in,out] Region - The region of code to be analyzed.
737 /// \param [in] InputGVNs - The global value numbering of the input values
738 /// collected.
739 /// \param [in] ArgInputs - The values of the arguments to the extracted
740 /// function.
741 static void
742 findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
743                                         std::vector<unsigned> &InputGVNs,
744                                         SetVector<Value *> &ArgInputs) {
745 
746   IRSimilarityCandidate &C = *Region.Candidate;
747   OutlinableGroup &Group = *Region.Parent;
748 
749   // This counts the argument number in the overall function.
750   unsigned TypeIndex = 0;
751 
752   // This counts the argument number in the extracted function.
753   unsigned OriginalIndex = 0;
754 
755   // Find the mapping of the extracted arguments to the arguments for the
756   // overall function. Since there may be extra arguments in the overall
757   // function to account for the extracted constants, we have two different
758   // counters as we find extracted arguments, and as we come across overall
759   // arguments.
760 
761   // Additionally, in our first pass, for the first extracted function,
762   // we find argument locations for the canonical value numbering.  This
763   // numbering overrides any discovered location for the extracted code.
764   for (unsigned InputVal : InputGVNs) {
765     Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
766     assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?");
767     unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
768 
769     Optional<Value *> InputOpt = C.fromGVN(InputVal);
770     assert(InputOpt.hasValue() && "Global value number not found?");
771     Value *Input = InputOpt.getValue();
772 
773     DenseMap<unsigned, unsigned>::iterator AggArgIt =
774         Group.CanonicalNumberToAggArg.find(CanonicalNumber);
775 
776     if (!Group.InputTypesSet) {
777       Group.ArgumentTypes.push_back(Input->getType());
778       // If the input value has a swifterr attribute, make sure to mark the
779       // argument in the overall function.
780       if (Input->isSwiftError()) {
781         assert(
782             !Group.SwiftErrorArgument.hasValue() &&
783             "Argument already marked with swifterr for this OutlinableGroup!");
784         Group.SwiftErrorArgument = TypeIndex;
785       }
786     }
787 
788     // Check if we have a constant. If we do add it to the overall argument
789     // number to Constant map for the region, and continue to the next input.
790     if (Constant *CST = dyn_cast<Constant>(Input)) {
791       if (AggArgIt != Group.CanonicalNumberToAggArg.end())
792         Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
793       else {
794         Group.CanonicalNumberToAggArg.insert(
795             std::make_pair(CanonicalNumber, TypeIndex));
796         Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
797       }
798       TypeIndex++;
799       continue;
800     }
801 
802     // It is not a constant, we create the mapping from extracted argument list
803     // to the overall argument list, using the canonical location, if it exists.
804     assert(ArgInputs.count(Input) && "Input cannot be found!");
805 
806     if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
807       if (OriginalIndex != AggArgIt->second)
808         Region.ChangedArgOrder = true;
809       Region.ExtractedArgToAgg.insert(
810           std::make_pair(OriginalIndex, AggArgIt->second));
811       Region.AggArgToExtracted.insert(
812           std::make_pair(AggArgIt->second, OriginalIndex));
813     } else {
814       Group.CanonicalNumberToAggArg.insert(
815           std::make_pair(CanonicalNumber, TypeIndex));
816       Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
817       Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
818     }
819     OriginalIndex++;
820     TypeIndex++;
821   }
822 
823   // If the function type definitions for the OutlinableGroup holding the region
824   // have not been set, set the length of the inputs here.  We should have the
825   // same inputs for all of the different regions contained in the
826   // OutlinableGroup since they are all structurally similar to one another.
827   if (!Group.InputTypesSet) {
828     Group.NumAggregateInputs = TypeIndex;
829     Group.InputTypesSet = true;
830   }
831 
832   Region.NumExtractedInputs = OriginalIndex;
833 }
834 
835 /// Create a mapping of the output arguments for the \p Region to the output
836 /// arguments of the overall outlined function.
837 ///
838 /// \param [in,out] Region - The region of code to be analyzed.
839 /// \param [in] Outputs - The values found by the code extractor.
840 static void
841 findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
842                                           SetVector<Value *> &Outputs) {
843   OutlinableGroup &Group = *Region.Parent;
844   IRSimilarityCandidate &C = *Region.Candidate;
845 
846   SmallVector<BasicBlock *> BE;
847   DenseSet<BasicBlock *> BBSet;
848   C.getBasicBlocks(BBSet, BE);
849 
850   // Find the exits to the region.
851   SmallPtrSet<BasicBlock *, 1> Exits;
852   for (BasicBlock *Block : BE)
853     for (BasicBlock *Succ : successors(Block))
854       if (!BBSet.contains(Succ))
855         Exits.insert(Succ);
856 
857   // After determining which blocks exit to PHINodes, we add these PHINodes to
858   // the set of outputs to be processed.  We also check the incoming values of
859   // the PHINodes for whether they should no longer be considered outputs.
860   for (BasicBlock *ExitBB : Exits) {
861     for (PHINode &PN : ExitBB->phis()) {
862       // Find all incoming values from the outlining region.
863       SmallVector<unsigned, 2> IncomingVals;
864       for (unsigned Idx = 0; Idx < PN.getNumIncomingValues(); ++Idx)
865         if (BBSet.contains(PN.getIncomingBlock(Idx)))
866           IncomingVals.push_back(Idx);
867 
868       // Do not process PHI if there is one (or fewer) predecessor from region.
869       if (IncomingVals.size() <= 1)
870         continue;
871 
872       Region.IgnoreRegion = true;
873       return;
874     }
875   }
876 
877   // This counts the argument number in the extracted function.
878   unsigned OriginalIndex = Region.NumExtractedInputs;
879 
880   // This counts the argument number in the overall function.
881   unsigned TypeIndex = Group.NumAggregateInputs;
882   bool TypeFound;
883   DenseSet<unsigned> AggArgsUsed;
884 
885   // Iterate over the output types and identify if there is an aggregate pointer
886   // type whose base type matches the current output type. If there is, we mark
887   // that we will use this output register for this value. If not we add another
888   // type to the overall argument type list. We also store the GVNs used for
889   // stores to identify which values will need to be moved into an special
890   // block that holds the stores to the output registers.
891   for (Value *Output : Outputs) {
892     TypeFound = false;
893     // We can do this since it is a result value, and will have a number
894     // that is necessarily the same. BUT if in the future, the instructions
895     // do not have to be in same order, but are functionally the same, we will
896     // have to use a different scheme, as one-to-one correspondence is not
897     // guaranteed.
898     unsigned GlobalValue = C.getGVN(Output).getValue();
899     unsigned ArgumentSize = Group.ArgumentTypes.size();
900 
901     for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
902       if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
903         continue;
904 
905       if (AggArgsUsed.contains(Jdx))
906         continue;
907 
908       TypeFound = true;
909       AggArgsUsed.insert(Jdx);
910       Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
911       Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
912       Region.GVNStores.push_back(GlobalValue);
913       break;
914     }
915 
916     // We were unable to find an unused type in the output type set that matches
917     // the output, so we add a pointer type to the argument types of the overall
918     // function to handle this output and create a mapping to it.
919     if (!TypeFound) {
920       Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType()));
921       AggArgsUsed.insert(Group.ArgumentTypes.size() - 1);
922       Region.ExtractedArgToAgg.insert(
923           std::make_pair(OriginalIndex, Group.ArgumentTypes.size() - 1));
924       Region.AggArgToExtracted.insert(
925           std::make_pair(Group.ArgumentTypes.size() - 1, OriginalIndex));
926       Region.GVNStores.push_back(GlobalValue);
927     }
928 
929     stable_sort(Region.GVNStores);
930     OriginalIndex++;
931     TypeIndex++;
932   }
933 }
934 
935 void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
936                                       DenseSet<unsigned> &NotSame) {
937   std::vector<unsigned> Inputs;
938   SetVector<Value *> ArgInputs, Outputs;
939 
940   getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
941                             Outputs);
942 
943   if (Region.IgnoreRegion)
944     return;
945 
946   // Map the inputs found by the CodeExtractor to the arguments found for
947   // the overall function.
948   findExtractedInputToOverallInputMapping(Region, Inputs, ArgInputs);
949 
950   // Map the outputs found by the CodeExtractor to the arguments found for
951   // the overall function.
952   findExtractedOutputToOverallOutputMapping(Region, Outputs);
953 }
954 
955 /// Replace the extracted function in the Region with a call to the overall
956 /// function constructed from the deduplicated similar regions, replacing and
957 /// remapping the values passed to the extracted function as arguments to the
958 /// new arguments of the overall function.
959 ///
960 /// \param [in] M - The module to outline from.
961 /// \param [in] Region - The regions of extracted code to be replaced with a new
962 /// function.
963 /// \returns a call instruction with the replaced function.
964 CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
965   std::vector<Value *> NewCallArgs;
966   DenseMap<unsigned, unsigned>::iterator ArgPair;
967 
968   OutlinableGroup &Group = *Region.Parent;
969   CallInst *Call = Region.Call;
970   assert(Call && "Call to replace is nullptr?");
971   Function *AggFunc = Group.OutlinedFunction;
972   assert(AggFunc && "Function to replace with is nullptr?");
973 
974   // If the arguments are the same size, there are not values that need to be
975   // made into an argument, the argument ordering has not been change, or
976   // different output registers to handle.  We can simply replace the called
977   // function in this case.
978   if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
979     LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
980                       << *AggFunc << " with same number of arguments\n");
981     Call->setCalledFunction(AggFunc);
982     return Call;
983   }
984 
985   // We have a different number of arguments than the new function, so
986   // we need to use our previously mappings off extracted argument to overall
987   // function argument, and constants to overall function argument to create the
988   // new argument list.
989   for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
990 
991     if (AggArgIdx == AggFunc->arg_size() - 1 &&
992         Group.OutputGVNCombinations.size() > 1) {
993       // If we are on the last argument, and we need to differentiate between
994       // output blocks, add an integer to the argument list to determine
995       // what block to take
996       LLVM_DEBUG(dbgs() << "Set switch block argument to "
997                         << Region.OutputBlockNum << "\n");
998       NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
999                                              Region.OutputBlockNum));
1000       continue;
1001     }
1002 
1003     ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
1004     if (ArgPair != Region.AggArgToExtracted.end()) {
1005       Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
1006       // If we found the mapping from the extracted function to the overall
1007       // function, we simply add it to the argument list.  We use the same
1008       // value, it just needs to honor the new order of arguments.
1009       LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1010                         << *ArgumentValue << "\n");
1011       NewCallArgs.push_back(ArgumentValue);
1012       continue;
1013     }
1014 
1015     // If it is a constant, we simply add it to the argument list as a value.
1016     if (Region.AggArgToConstant.find(AggArgIdx) !=
1017         Region.AggArgToConstant.end()) {
1018       Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
1019       LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1020                         << *CST << "\n");
1021       NewCallArgs.push_back(CST);
1022       continue;
1023     }
1024 
1025     // Add a nullptr value if the argument is not found in the extracted
1026     // function.  If we cannot find a value, it means it is not in use
1027     // for the region, so we should not pass anything to it.
1028     LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1029     NewCallArgs.push_back(ConstantPointerNull::get(
1030         static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
1031   }
1032 
1033   LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1034                     << *AggFunc << " with new set of arguments\n");
1035   // Create the new call instruction and erase the old one.
1036   Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
1037                           Call);
1038 
1039   // It is possible that the call to the outlined function is either the first
1040   // instruction is in the new block, the last instruction, or both.  If either
1041   // of these is the case, we need to make sure that we replace the instruction
1042   // in the IRInstructionData struct with the new call.
1043   CallInst *OldCall = Region.Call;
1044   if (Region.NewFront->Inst == OldCall)
1045     Region.NewFront->Inst = Call;
1046   if (Region.NewBack->Inst == OldCall)
1047     Region.NewBack->Inst = Call;
1048 
1049   // Transfer any debug information.
1050   Call->setDebugLoc(Region.Call->getDebugLoc());
1051   // Since our output may determine which branch we go to, we make sure to
1052   // propogate this new call value through the module.
1053   OldCall->replaceAllUsesWith(Call);
1054 
1055   // Remove the old instruction.
1056   OldCall->eraseFromParent();
1057   Region.Call = Call;
1058 
1059   // Make sure that the argument in the new function has the SwiftError
1060   // argument.
1061   if (Group.SwiftErrorArgument.hasValue())
1062     Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
1063                        Attribute::SwiftError);
1064 
1065   return Call;
1066 }
1067 
1068 // Within an extracted function, replace the argument uses of the extracted
1069 // region with the arguments of the function for an OutlinableGroup.
1070 //
1071 /// \param [in] Region - The region of extracted code to be changed.
1072 /// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1073 /// region.
1074 /// \param [in] FirstFunction - A flag to indicate whether we are using this
1075 /// function to define the overall outlined function for all the regions, or
1076 /// if we are operating on one of the following regions.
1077 static void
1078 replaceArgumentUses(OutlinableRegion &Region,
1079                     DenseMap<Value *, BasicBlock *> &OutputBBs,
1080                     bool FirstFunction = false) {
1081   OutlinableGroup &Group = *Region.Parent;
1082   assert(Region.ExtractedFunction && "Region has no extracted function?");
1083 
1084   Function *DominatingFunction = Region.ExtractedFunction;
1085   if (FirstFunction)
1086     DominatingFunction = Group.OutlinedFunction;
1087   DominatorTree DT(*DominatingFunction);
1088 
1089   for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1090        ArgIdx++) {
1091     assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
1092                Region.ExtractedArgToAgg.end() &&
1093            "No mapping from extracted to outlined?");
1094     unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
1095     Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
1096     Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
1097     // The argument is an input, so we can simply replace it with the overall
1098     // argument value
1099     if (ArgIdx < Region.NumExtractedInputs) {
1100       LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1101                         << *Region.ExtractedFunction << " with " << *AggArg
1102                         << " in function " << *Group.OutlinedFunction << "\n");
1103       Arg->replaceAllUsesWith(AggArg);
1104       continue;
1105     }
1106 
1107     // If we are replacing an output, we place the store value in its own
1108     // block inside the overall function before replacing the use of the output
1109     // in the function.
1110     assert(Arg->hasOneUse() && "Output argument can only have one use");
1111     User *InstAsUser = Arg->user_back();
1112     assert(InstAsUser && "User is nullptr!");
1113 
1114     Instruction *I = cast<Instruction>(InstAsUser);
1115     BasicBlock *BB = I->getParent();
1116     SmallVector<BasicBlock *, 4> Descendants;
1117     DT.getDescendants(BB, Descendants);
1118     bool EdgeAdded = false;
1119     if (Descendants.size() == 0) {
1120       EdgeAdded = true;
1121       DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
1122       DT.getDescendants(BB, Descendants);
1123     }
1124 
1125     // Iterate over the following blocks, looking for return instructions,
1126     // if we find one, find the corresponding output block for the return value
1127     // and move our store instruction there.
1128     for (BasicBlock *DescendBB : Descendants) {
1129       ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
1130       if (!RI)
1131         continue;
1132       Value *RetVal = RI->getReturnValue();
1133       auto VBBIt = OutputBBs.find(RetVal);
1134       assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1135 
1136       // If this is storing a PHINode, we must make sure it is included in the
1137       // overall function.
1138       StoreInst *SI = cast<StoreInst>(I);
1139 
1140       Value *ValueOperand = SI->getValueOperand();
1141 
1142       StoreInst *NewI = cast<StoreInst>(I->clone());
1143       NewI->setDebugLoc(DebugLoc());
1144       BasicBlock *OutputBB = VBBIt->second;
1145       OutputBB->getInstList().push_back(NewI);
1146       LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1147                         << *OutputBB << "\n");
1148 
1149       if (FirstFunction)
1150         continue;
1151       Value *CorrVal =
1152           Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
1153       assert(CorrVal && "Value is nullptr?");
1154       NewI->setOperand(0, CorrVal);
1155     }
1156 
1157     // If we added an edge for basic blocks without a predecessor, we remove it
1158     // here.
1159     if (EdgeAdded)
1160       DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
1161     I->eraseFromParent();
1162 
1163     LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1164                       << *Region.ExtractedFunction << " with " << *AggArg
1165                       << " in function " << *Group.OutlinedFunction << "\n");
1166     Arg->replaceAllUsesWith(AggArg);
1167   }
1168 }
1169 
1170 /// Within an extracted function, replace the constants that need to be lifted
1171 /// into arguments with the actual argument.
1172 ///
1173 /// \param Region [in] - The region of extracted code to be changed.
1174 void replaceConstants(OutlinableRegion &Region) {
1175   OutlinableGroup &Group = *Region.Parent;
1176   // Iterate over the constants that need to be elevated into arguments
1177   for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1178     unsigned AggArgIdx = Const.first;
1179     Function *OutlinedFunction = Group.OutlinedFunction;
1180     assert(OutlinedFunction && "Overall Function is not defined?");
1181     Constant *CST = Const.second;
1182     Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
1183     // Identify the argument it will be elevated to, and replace instances of
1184     // that constant in the function.
1185 
1186     // TODO: If in the future constants do not have one global value number,
1187     // i.e. a constant 1 could be mapped to several values, this check will
1188     // have to be more strict.  It cannot be using only replaceUsesWithIf.
1189 
1190     LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1191                       << " in function " << *OutlinedFunction << " with "
1192                       << *Arg << "\n");
1193     CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
1194       if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
1195         return I->getFunction() == OutlinedFunction;
1196       return false;
1197     });
1198   }
1199 }
1200 
1201 /// It is possible that there is a basic block that already performs the same
1202 /// stores. This returns a duplicate block, if it exists
1203 ///
1204 /// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1205 /// \param OutputStoreBBs [in] The existing output blocks.
1206 /// \returns an optional value with the number output block if there is a match.
1207 Optional<unsigned> findDuplicateOutputBlock(
1208     DenseMap<Value *, BasicBlock *> &OutputBBs,
1209     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1210 
1211   bool Mismatch = false;
1212   unsigned MatchingNum = 0;
1213   // We compare the new set output blocks to the other sets of output blocks.
1214   // If they are the same number, and have identical instructions, they are
1215   // considered to be the same.
1216   for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1217     Mismatch = false;
1218     for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1219       DenseMap<Value *, BasicBlock *>::iterator OutputBBIt =
1220           OutputBBs.find(VToB.first);
1221       if (OutputBBIt == OutputBBs.end()) {
1222         Mismatch = true;
1223         break;
1224       }
1225 
1226       BasicBlock *CompBB = VToB.second;
1227       BasicBlock *OutputBB = OutputBBIt->second;
1228       if (CompBB->size() - 1 != OutputBB->size()) {
1229         Mismatch = true;
1230         break;
1231       }
1232 
1233       BasicBlock::iterator NIt = OutputBB->begin();
1234       for (Instruction &I : *CompBB) {
1235         if (isa<BranchInst>(&I))
1236           continue;
1237 
1238         if (!I.isIdenticalTo(&(*NIt))) {
1239           Mismatch = true;
1240           break;
1241         }
1242 
1243         NIt++;
1244       }
1245     }
1246 
1247     if (!Mismatch)
1248       return MatchingNum;
1249 
1250     MatchingNum++;
1251   }
1252 
1253   return None;
1254 }
1255 
1256 /// Remove empty output blocks from the outlined region.
1257 ///
1258 /// \param BlocksToPrune - Mapping of return values output blocks for the \p
1259 /// Region.
1260 /// \param Region - The OutlinableRegion we are analyzing.
1261 static bool
1262 analyzeAndPruneOutputBlocks(DenseMap<Value *, BasicBlock *> &BlocksToPrune,
1263                             OutlinableRegion &Region) {
1264   bool AllRemoved = true;
1265   Value *RetValueForBB;
1266   BasicBlock *NewBB;
1267   SmallVector<Value *, 4> ToRemove;
1268   // Iterate over the output blocks created in the outlined section.
1269   for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
1270     RetValueForBB = VtoBB.first;
1271     NewBB = VtoBB.second;
1272 
1273     // If there are no instructions, we remove it from the module, and also
1274     // mark the value for removal from the return value to output block mapping.
1275     if (NewBB->size() == 0) {
1276       NewBB->eraseFromParent();
1277       ToRemove.push_back(RetValueForBB);
1278       continue;
1279     }
1280 
1281     // Mark that we could not remove all the blocks since they were not all
1282     // empty.
1283     AllRemoved = false;
1284   }
1285 
1286   // Remove the return value from the mapping.
1287   for (Value *V : ToRemove)
1288     BlocksToPrune.erase(V);
1289 
1290   // Mark the region as having the no output scheme.
1291   if (AllRemoved)
1292     Region.OutputBlockNum = -1;
1293 
1294   return AllRemoved;
1295 }
1296 
1297 /// For the outlined section, move needed the StoreInsts for the output
1298 /// registers into their own block. Then, determine if there is a duplicate
1299 /// output block already created.
1300 ///
1301 /// \param [in] OG - The OutlinableGroup of regions to be outlined.
1302 /// \param [in] Region - The OutlinableRegion that is being analyzed.
1303 /// \param [in,out] OutputBBs - the blocks that stores for this region will be
1304 /// placed in.
1305 /// \param [in] EndBBs - the final blocks of the extracted function.
1306 /// \param [in] OutputMappings - OutputMappings the mapping of values that have
1307 /// been replaced by a new output value.
1308 /// \param [in,out] OutputStoreBBs - The existing output blocks.
1309 static void alignOutputBlockWithAggFunc(
1310     OutlinableGroup &OG, OutlinableRegion &Region,
1311     DenseMap<Value *, BasicBlock *> &OutputBBs,
1312     DenseMap<Value *, BasicBlock *> &EndBBs,
1313     const DenseMap<Value *, Value *> &OutputMappings,
1314     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1315   // If none of the output blocks have any instructions, this means that we do
1316   // not have to determine if it matches any of the other output schemes, and we
1317   // don't have to do anything else.
1318   if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
1319     return;
1320 
1321   // Determine is there is a duplicate set of blocks.
1322   Optional<unsigned> MatchingBB =
1323       findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
1324 
1325   // If there is, we remove the new output blocks.  If it does not,
1326   // we add it to our list of sets of output blocks.
1327   if (MatchingBB.hasValue()) {
1328     LLVM_DEBUG(dbgs() << "Set output block for region in function"
1329                       << Region.ExtractedFunction << " to "
1330                       << MatchingBB.getValue());
1331 
1332     Region.OutputBlockNum = MatchingBB.getValue();
1333     for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
1334       VtoBB.second->eraseFromParent();
1335     return;
1336   }
1337 
1338   Region.OutputBlockNum = OutputStoreBBs.size();
1339 
1340   Value *RetValueForBB;
1341   BasicBlock *NewBB;
1342   OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
1343   for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
1344     RetValueForBB = VtoBB.first;
1345     NewBB = VtoBB.second;
1346     DenseMap<Value *, BasicBlock *>::iterator VBBIt =
1347         EndBBs.find(RetValueForBB);
1348     LLVM_DEBUG(dbgs() << "Create output block for region in"
1349                       << Region.ExtractedFunction << " to "
1350                       << *NewBB);
1351     BranchInst::Create(VBBIt->second, NewBB);
1352     OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
1353   }
1354 }
1355 
1356 /// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
1357 /// before creating a basic block for each \p NewMap, and inserting into the new
1358 /// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
1359 ///
1360 /// \param OldMap [in] - The mapping to base the new mapping off of.
1361 /// \param NewMap [out] - The output mapping using the keys of \p OldMap.
1362 /// \param ParentFunc [in] - The function to put the new basic block in.
1363 /// \param BaseName [in] - The start of the BasicBlock names to be appended to
1364 /// by an index value.
1365 static void createAndInsertBasicBlocks(DenseMap<Value *, BasicBlock *> &OldMap,
1366                                        DenseMap<Value *, BasicBlock *> &NewMap,
1367                                        Function *ParentFunc, Twine BaseName) {
1368   unsigned Idx = 0;
1369   std::vector<Value *> SortedKeys;
1370 
1371   getSortedConstantKeys(SortedKeys, OldMap);
1372 
1373   for (Value *RetVal : SortedKeys) {
1374     BasicBlock *NewBB = BasicBlock::Create(
1375         ParentFunc->getContext(),
1376         Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
1377         ParentFunc);
1378     NewMap.insert(std::make_pair(RetVal, NewBB));
1379   }
1380 }
1381 
1382 /// Create the switch statement for outlined function to differentiate between
1383 /// all the output blocks.
1384 ///
1385 /// For the outlined section, determine if an outlined block already exists that
1386 /// matches the needed stores for the extracted section.
1387 /// \param [in] M - The module we are outlining from.
1388 /// \param [in] OG - The group of regions to be outlined.
1389 /// \param [in] EndBBs - The final blocks of the extracted function.
1390 /// \param [in,out] OutputStoreBBs - The existing output blocks.
1391 void createSwitchStatement(
1392     Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs,
1393     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1394   // We only need the switch statement if there is more than one store
1395   // combination.
1396   if (OG.OutputGVNCombinations.size() > 1) {
1397     Function *AggFunc = OG.OutlinedFunction;
1398     // Create a final block for each different return block.
1399     DenseMap<Value *, BasicBlock *> ReturnBBs;
1400     createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
1401 
1402     for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
1403       std::pair<Value *, BasicBlock *> &OutputBlock =
1404           *OG.EndBBs.find(RetBlockPair.first);
1405       BasicBlock *ReturnBlock = RetBlockPair.second;
1406       BasicBlock *EndBB = OutputBlock.second;
1407       Instruction *Term = EndBB->getTerminator();
1408       // Move the return value to the final block instead of the original exit
1409       // stub.
1410       Term->moveBefore(*ReturnBlock, ReturnBlock->end());
1411       // Put the switch statement in the old end basic block for the function
1412       // with a fall through to the new return block.
1413       LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
1414                         << OutputStoreBBs.size() << "\n");
1415       SwitchInst *SwitchI =
1416           SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
1417                              ReturnBlock, OutputStoreBBs.size(), EndBB);
1418 
1419       unsigned Idx = 0;
1420       for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
1421         DenseMap<Value *, BasicBlock *>::iterator OSBBIt =
1422             OutputStoreBB.find(OutputBlock.first);
1423 
1424         if (OSBBIt == OutputStoreBB.end())
1425           continue;
1426 
1427         BasicBlock *BB = OSBBIt->second;
1428         SwitchI->addCase(
1429             ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
1430         Term = BB->getTerminator();
1431         Term->setSuccessor(0, ReturnBlock);
1432         Idx++;
1433       }
1434     }
1435     return;
1436   }
1437 
1438   // If there needs to be stores, move them from the output blocks to their
1439   // corresponding ending block.
1440   if (OutputStoreBBs.size() == 1) {
1441     LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
1442                       << *OG.OutlinedFunction << "\n");
1443     DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
1444     for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
1445       DenseMap<Value *, BasicBlock *>::iterator EndBBIt =
1446           EndBBs.find(VBPair.first);
1447       assert(EndBBIt != EndBBs.end() && "Could not find end block");
1448       BasicBlock *EndBB = EndBBIt->second;
1449       BasicBlock *OutputBB = VBPair.second;
1450       Instruction *Term = OutputBB->getTerminator();
1451       Term->eraseFromParent();
1452       Term = EndBB->getTerminator();
1453       moveBBContents(*OutputBB, *EndBB);
1454       Term->moveBefore(*EndBB, EndBB->end());
1455       OutputBB->eraseFromParent();
1456     }
1457   }
1458 }
1459 
1460 /// Fill the new function that will serve as the replacement function for all of
1461 /// the extracted regions of a certain structure from the first region in the
1462 /// list of regions.  Replace this first region's extracted function with the
1463 /// new overall function.
1464 ///
1465 /// \param [in] M - The module we are outlining from.
1466 /// \param [in] CurrentGroup - The group of regions to be outlined.
1467 /// \param [in,out] OutputStoreBBs - The output blocks for each different
1468 /// set of stores needed for the different functions.
1469 /// \param [in,out] FuncsToRemove - Extracted functions to erase from module
1470 /// once outlining is complete.
1471 static void fillOverallFunction(
1472     Module &M, OutlinableGroup &CurrentGroup,
1473     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
1474     std::vector<Function *> &FuncsToRemove) {
1475   OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
1476 
1477   // Move first extracted function's instructions into new function.
1478   LLVM_DEBUG(dbgs() << "Move instructions from "
1479                     << *CurrentOS->ExtractedFunction << " to instruction "
1480                     << *CurrentGroup.OutlinedFunction << "\n");
1481   moveFunctionData(*CurrentOS->ExtractedFunction,
1482                    *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
1483 
1484   // Transfer the attributes from the function to the new function.
1485   for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
1486     CurrentGroup.OutlinedFunction->addFnAttr(A);
1487 
1488   // Create a new set of output blocks for the first extracted function.
1489   DenseMap<Value *, BasicBlock *> NewBBs;
1490   createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
1491                              CurrentGroup.OutlinedFunction, "output_block_0");
1492   CurrentOS->OutputBlockNum = 0;
1493 
1494   replaceArgumentUses(*CurrentOS, NewBBs, true);
1495   replaceConstants(*CurrentOS);
1496 
1497   // We first identify if any output blocks are empty, if they are we remove
1498   // them. We then create a branch instruction to the basic block to the return
1499   // block for the function for each non empty output block.
1500   if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
1501     OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
1502     for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
1503       DenseMap<Value *, BasicBlock *>::iterator VBBIt =
1504           CurrentGroup.EndBBs.find(VToBB.first);
1505       BasicBlock *EndBB = VBBIt->second;
1506       BranchInst::Create(EndBB, VToBB.second);
1507       OutputStoreBBs.back().insert(VToBB);
1508     }
1509   }
1510 
1511   // Replace the call to the extracted function with the outlined function.
1512   CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
1513 
1514   // We only delete the extracted functions at the end since we may need to
1515   // reference instructions contained in them for mapping purposes.
1516   FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
1517 }
1518 
1519 void IROutliner::deduplicateExtractedSections(
1520     Module &M, OutlinableGroup &CurrentGroup,
1521     std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
1522   createFunction(M, CurrentGroup, OutlinedFunctionNum);
1523 
1524   std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
1525 
1526   OutlinableRegion *CurrentOS;
1527 
1528   fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
1529 
1530   std::vector<Value *> SortedKeys;
1531   for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
1532     CurrentOS = CurrentGroup.Regions[Idx];
1533     AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction,
1534                                                *CurrentOS->ExtractedFunction);
1535 
1536     // Create a set of BasicBlocks, one for each return block, to hold the
1537     // needed store instructions.
1538     DenseMap<Value *, BasicBlock *> NewBBs;
1539     createAndInsertBasicBlocks(
1540         CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
1541         "output_block_" + Twine(static_cast<unsigned>(Idx)));
1542 
1543     replaceArgumentUses(*CurrentOS, NewBBs);
1544     alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
1545                                 CurrentGroup.EndBBs, OutputMappings,
1546                                 OutputStoreBBs);
1547 
1548     CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
1549     FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
1550   }
1551 
1552   // Create a switch statement to handle the different output schemes.
1553   createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
1554 
1555   OutlinedFunctionNum++;
1556 }
1557 
1558 /// Checks that the next instruction in the InstructionDataList matches the
1559 /// next instruction in the module.  If they do not, there could be the
1560 /// possibility that extra code has been inserted, and we must ignore it.
1561 ///
1562 /// \param ID - The IRInstructionData to check the next instruction of.
1563 /// \returns true if the InstructionDataList and actual instruction match.
1564 static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID) {
1565   // We check if there is a discrepancy between the InstructionDataList
1566   // and the actual next instruction in the module.  If there is, it means
1567   // that an extra instruction was added, likely by the CodeExtractor.
1568 
1569   // Since we do not have any similarity data about this particular
1570   // instruction, we cannot confidently outline it, and must discard this
1571   // candidate.
1572   IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
1573   Instruction *NextIDLInst = NextIDIt->Inst;
1574   Instruction *NextModuleInst = nullptr;
1575   if (!ID.Inst->isTerminator())
1576     NextModuleInst = ID.Inst->getNextNonDebugInstruction();
1577   else if (NextIDLInst != nullptr)
1578     NextModuleInst =
1579         &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
1580 
1581   if (NextIDLInst && NextIDLInst != NextModuleInst)
1582     return false;
1583 
1584   return true;
1585 }
1586 
1587 bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
1588     const OutlinableRegion &Region) {
1589   IRSimilarityCandidate *IRSC = Region.Candidate;
1590   unsigned StartIdx = IRSC->getStartIdx();
1591   unsigned EndIdx = IRSC->getEndIdx();
1592 
1593   // A check to make sure that we are not about to attempt to outline something
1594   // that has already been outlined.
1595   for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
1596     if (Outlined.contains(Idx))
1597       return false;
1598 
1599   // We check if the recorded instruction matches the actual next instruction,
1600   // if it does not, we fix it in the InstructionDataList.
1601   if (!Region.Candidate->backInstruction()->isTerminator()) {
1602     Instruction *NewEndInst =
1603         Region.Candidate->backInstruction()->getNextNonDebugInstruction();
1604     assert(NewEndInst && "Next instruction is a nullptr?");
1605     if (Region.Candidate->end()->Inst != NewEndInst) {
1606       IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
1607       IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
1608           IRInstructionData(*NewEndInst,
1609                             InstructionClassifier.visit(*NewEndInst), *IDL);
1610 
1611       // Insert the first IRInstructionData of the new region after the
1612       // last IRInstructionData of the IRSimilarityCandidate.
1613       IDL->insert(Region.Candidate->end(), *NewEndIRID);
1614     }
1615   }
1616 
1617   return none_of(*IRSC, [this](IRInstructionData &ID) {
1618     if (!nextIRInstructionDataMatchesNextInst(ID))
1619       return true;
1620 
1621     return !this->InstructionClassifier.visit(ID.Inst);
1622   });
1623 }
1624 
1625 void IROutliner::pruneIncompatibleRegions(
1626     std::vector<IRSimilarityCandidate> &CandidateVec,
1627     OutlinableGroup &CurrentGroup) {
1628   bool PreviouslyOutlined;
1629 
1630   // Sort from beginning to end, so the IRSimilarityCandidates are in order.
1631   stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
1632                                const IRSimilarityCandidate &RHS) {
1633     return LHS.getStartIdx() < RHS.getStartIdx();
1634   });
1635 
1636   IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
1637   // Since outlining a call and a branch instruction will be the same as only
1638   // outlinining a call instruction, we ignore it as a space saving.
1639   if (FirstCandidate.getLength() == 2) {
1640     if (isa<CallInst>(FirstCandidate.front()->Inst) &&
1641         isa<BranchInst>(FirstCandidate.back()->Inst))
1642         return;
1643   }
1644 
1645   unsigned CurrentEndIdx = 0;
1646   for (IRSimilarityCandidate &IRSC : CandidateVec) {
1647     PreviouslyOutlined = false;
1648     unsigned StartIdx = IRSC.getStartIdx();
1649     unsigned EndIdx = IRSC.getEndIdx();
1650 
1651     for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
1652       if (Outlined.contains(Idx)) {
1653         PreviouslyOutlined = true;
1654         break;
1655       }
1656 
1657     if (PreviouslyOutlined)
1658       continue;
1659 
1660     // Check over the instructions, and if the basic block has its address
1661     // taken for use somewhere else, we do not outline that block.
1662     bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
1663       return ID.Inst->getParent()->hasAddressTaken();
1664     });
1665 
1666     if (BBHasAddressTaken)
1667       continue;
1668 
1669     if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
1670         !OutlineFromLinkODRs)
1671       continue;
1672 
1673     // Greedily prune out any regions that will overlap with already chosen
1674     // regions.
1675     if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
1676       continue;
1677 
1678     bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
1679       if (!nextIRInstructionDataMatchesNextInst(ID))
1680         return true;
1681 
1682       return !this->InstructionClassifier.visit(ID.Inst);
1683     });
1684 
1685     if (BadInst)
1686       continue;
1687 
1688     OutlinableRegion *OS = new (RegionAllocator.Allocate())
1689         OutlinableRegion(IRSC, CurrentGroup);
1690     CurrentGroup.Regions.push_back(OS);
1691 
1692     CurrentEndIdx = EndIdx;
1693   }
1694 }
1695 
1696 InstructionCost
1697 IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
1698   InstructionCost RegionBenefit = 0;
1699   for (OutlinableRegion *Region : CurrentGroup.Regions) {
1700     TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
1701     // We add the number of instructions in the region to the benefit as an
1702     // estimate as to how much will be removed.
1703     RegionBenefit += Region->getBenefit(TTI);
1704     LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
1705                       << " saved instructions to overfall benefit.\n");
1706   }
1707 
1708   return RegionBenefit;
1709 }
1710 
1711 InstructionCost
1712 IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
1713   InstructionCost OverallCost = 0;
1714   for (OutlinableRegion *Region : CurrentGroup.Regions) {
1715     TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
1716 
1717     // Each output incurs a load after the call, so we add that to the cost.
1718     for (unsigned OutputGVN : Region->GVNStores) {
1719       Optional<Value *> OV = Region->Candidate->fromGVN(OutputGVN);
1720       assert(OV.hasValue() && "Could not find value for GVN?");
1721       Value *V = OV.getValue();
1722       InstructionCost LoadCost =
1723           TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
1724                               TargetTransformInfo::TCK_CodeSize);
1725 
1726       LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
1727                         << " instructions to cost for output of type "
1728                         << *V->getType() << "\n");
1729       OverallCost += LoadCost;
1730     }
1731   }
1732 
1733   return OverallCost;
1734 }
1735 
1736 /// Find the extra instructions needed to handle any output values for the
1737 /// region.
1738 ///
1739 /// \param [in] M - The Module to outline from.
1740 /// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
1741 /// \param [in] TTI - The TargetTransformInfo used to collect information for
1742 /// new instruction costs.
1743 /// \returns the additional cost to handle the outputs.
1744 static InstructionCost findCostForOutputBlocks(Module &M,
1745                                                OutlinableGroup &CurrentGroup,
1746                                                TargetTransformInfo &TTI) {
1747   InstructionCost OutputCost = 0;
1748   unsigned NumOutputBranches = 0;
1749 
1750   IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
1751   DenseSet<BasicBlock *> CandidateBlocks;
1752   Candidate.getBasicBlocks(CandidateBlocks);
1753 
1754   // Count the number of different output branches that point to blocks outside
1755   // of the region.
1756   DenseSet<BasicBlock *> FoundBlocks;
1757   for (IRInstructionData &ID : Candidate) {
1758     if (!isa<BranchInst>(ID.Inst))
1759       continue;
1760 
1761     for (Value *V : ID.OperVals) {
1762       BasicBlock *BB = static_cast<BasicBlock *>(V);
1763       DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB);
1764       if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB))
1765         continue;
1766       FoundBlocks.insert(BB);
1767       NumOutputBranches++;
1768     }
1769   }
1770 
1771   CurrentGroup.BranchesToOutside = NumOutputBranches;
1772 
1773   for (const ArrayRef<unsigned> &OutputUse :
1774        CurrentGroup.OutputGVNCombinations) {
1775     for (unsigned GVN : OutputUse) {
1776       Optional<Value *> OV = Candidate.fromGVN(GVN);
1777       assert(OV.hasValue() && "Could not find value for GVN?");
1778       Value *V = OV.getValue();
1779       InstructionCost StoreCost =
1780           TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
1781                               TargetTransformInfo::TCK_CodeSize);
1782 
1783       // An instruction cost is added for each store set that needs to occur for
1784       // various output combinations inside the function, plus a branch to
1785       // return to the exit block.
1786       LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
1787                         << " instructions to cost for output of type "
1788                         << *V->getType() << "\n");
1789       OutputCost += StoreCost * NumOutputBranches;
1790     }
1791 
1792     InstructionCost BranchCost =
1793         TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
1794     LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
1795                       << " a branch instruction\n");
1796     OutputCost += BranchCost * NumOutputBranches;
1797   }
1798 
1799   // If there is more than one output scheme, we must have a comparison and
1800   // branch for each different item in the switch statement.
1801   if (CurrentGroup.OutputGVNCombinations.size() > 1) {
1802     InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
1803         Instruction::ICmp, Type::getInt32Ty(M.getContext()),
1804         Type::getInt32Ty(M.getContext()), CmpInst::BAD_ICMP_PREDICATE,
1805         TargetTransformInfo::TCK_CodeSize);
1806     InstructionCost BranchCost =
1807         TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
1808 
1809     unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
1810     InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
1811 
1812     LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
1813                       << " instructions for each switch case for each different"
1814                       << " output path in a function\n");
1815     OutputCost += TotalCost * NumOutputBranches;
1816   }
1817 
1818   return OutputCost;
1819 }
1820 
1821 void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
1822   InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
1823   CurrentGroup.Benefit += RegionBenefit;
1824   LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
1825 
1826   InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
1827   CurrentGroup.Cost += OutputReloadCost;
1828   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1829 
1830   InstructionCost AverageRegionBenefit =
1831       RegionBenefit / CurrentGroup.Regions.size();
1832   unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
1833   unsigned NumRegions = CurrentGroup.Regions.size();
1834   TargetTransformInfo &TTI =
1835       getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
1836 
1837   // We add one region to the cost once, to account for the instructions added
1838   // inside of the newly created function.
1839   LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
1840                     << " instructions to cost for body of new function.\n");
1841   CurrentGroup.Cost += AverageRegionBenefit;
1842   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1843 
1844   // For each argument, we must add an instruction for loading the argument
1845   // out of the register and into a value inside of the newly outlined function.
1846   LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
1847                     << " instructions to cost for each argument in the new"
1848                     << " function.\n");
1849   CurrentGroup.Cost +=
1850       OverallArgumentNum * TargetTransformInfo::TCC_Basic;
1851   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1852 
1853   // Each argument needs to either be loaded into a register or onto the stack.
1854   // Some arguments will only be loaded into the stack once the argument
1855   // registers are filled.
1856   LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
1857                     << " instructions to cost for each argument in the new"
1858                     << " function " << NumRegions << " times for the "
1859                     << "needed argument handling at the call site.\n");
1860   CurrentGroup.Cost +=
1861       2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
1862   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1863 
1864   CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
1865   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1866 }
1867 
1868 void IROutliner::updateOutputMapping(OutlinableRegion &Region,
1869                                      ArrayRef<Value *> Outputs,
1870                                      LoadInst *LI) {
1871   // For and load instructions following the call
1872   Value *Operand = LI->getPointerOperand();
1873   Optional<unsigned> OutputIdx = None;
1874   // Find if the operand it is an output register.
1875   for (unsigned ArgIdx = Region.NumExtractedInputs;
1876        ArgIdx < Region.Call->arg_size(); ArgIdx++) {
1877     if (Operand == Region.Call->getArgOperand(ArgIdx)) {
1878       OutputIdx = ArgIdx - Region.NumExtractedInputs;
1879       break;
1880     }
1881   }
1882 
1883   // If we found an output register, place a mapping of the new value
1884   // to the original in the mapping.
1885   if (!OutputIdx.hasValue())
1886     return;
1887 
1888   if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
1889       OutputMappings.end()) {
1890     LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
1891                       << *Outputs[OutputIdx.getValue()] << "\n");
1892     OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()]));
1893   } else {
1894     Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second;
1895     LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
1896                       << *Outputs[OutputIdx.getValue()] << "\n");
1897     OutputMappings.insert(std::make_pair(LI, Orig));
1898   }
1899 }
1900 
1901 bool IROutliner::extractSection(OutlinableRegion &Region) {
1902   SetVector<Value *> ArgInputs, Outputs, SinkCands;
1903   assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
1904   BasicBlock *InitialStart = Region.StartBB;
1905   Function *OrigF = Region.StartBB->getParent();
1906   CodeExtractorAnalysisCache CEAC(*OrigF);
1907   Region.ExtractedFunction =
1908       Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
1909 
1910   // If the extraction was successful, find the BasicBlock, and reassign the
1911   // OutlinableRegion blocks
1912   if (!Region.ExtractedFunction) {
1913     LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
1914                       << "\n");
1915     Region.reattachCandidate();
1916     return false;
1917   }
1918 
1919   // Get the block containing the called branch, and reassign the blocks as
1920   // necessary.  If the original block still exists, it is because we ended on
1921   // a branch instruction, and so we move the contents into the block before
1922   // and assign the previous block correctly.
1923   User *InstAsUser = Region.ExtractedFunction->user_back();
1924   BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
1925   Region.PrevBB = RewrittenBB->getSinglePredecessor();
1926   assert(Region.PrevBB && "PrevBB is nullptr?");
1927   if (Region.PrevBB == InitialStart) {
1928     BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
1929     Instruction *BI = NewPrev->getTerminator();
1930     BI->eraseFromParent();
1931     moveBBContents(*InitialStart, *NewPrev);
1932     Region.PrevBB = NewPrev;
1933     InitialStart->eraseFromParent();
1934   }
1935 
1936   Region.StartBB = RewrittenBB;
1937   Region.EndBB = RewrittenBB;
1938 
1939   // The sequences of outlinable regions has now changed.  We must fix the
1940   // IRInstructionDataList for consistency.  Although they may not be illegal
1941   // instructions, they should not be compared with anything else as they
1942   // should not be outlined in this round.  So marking these as illegal is
1943   // allowed.
1944   IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
1945   Instruction *BeginRewritten = &*RewrittenBB->begin();
1946   Instruction *EndRewritten = &*RewrittenBB->begin();
1947   Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
1948       *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
1949   Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
1950       *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
1951 
1952   // Insert the first IRInstructionData of the new region in front of the
1953   // first IRInstructionData of the IRSimilarityCandidate.
1954   IDL->insert(Region.Candidate->begin(), *Region.NewFront);
1955   // Insert the first IRInstructionData of the new region after the
1956   // last IRInstructionData of the IRSimilarityCandidate.
1957   IDL->insert(Region.Candidate->end(), *Region.NewBack);
1958   // Remove the IRInstructionData from the IRSimilarityCandidate.
1959   IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
1960 
1961   assert(RewrittenBB != nullptr &&
1962          "Could not find a predecessor after extraction!");
1963 
1964   // Iterate over the new set of instructions to find the new call
1965   // instruction.
1966   for (Instruction &I : *RewrittenBB)
1967     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1968       if (Region.ExtractedFunction == CI->getCalledFunction())
1969         Region.Call = CI;
1970     } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
1971       updateOutputMapping(Region, Outputs.getArrayRef(), LI);
1972   Region.reattachCandidate();
1973   return true;
1974 }
1975 
1976 unsigned IROutliner::doOutline(Module &M) {
1977   // Find the possible similarity sections.
1978   InstructionClassifier.EnableBranches = !DisableBranches;
1979   IRSimilarityIdentifier &Identifier = getIRSI(M);
1980   SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
1981 
1982   // Sort them by size of extracted sections
1983   unsigned OutlinedFunctionNum = 0;
1984   // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
1985   // to sort them by the potential number of instructions to be outlined
1986   if (SimilarityCandidates.size() > 1)
1987     llvm::stable_sort(SimilarityCandidates,
1988                       [](const std::vector<IRSimilarityCandidate> &LHS,
1989                          const std::vector<IRSimilarityCandidate> &RHS) {
1990                         return LHS[0].getLength() * LHS.size() >
1991                                RHS[0].getLength() * RHS.size();
1992                       });
1993   // Creating OutlinableGroups for each SimilarityCandidate to be used in
1994   // each of the following for loops to avoid making an allocator.
1995   std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
1996 
1997   DenseSet<unsigned> NotSame;
1998   std::vector<OutlinableGroup *> NegativeCostGroups;
1999   std::vector<OutlinableRegion *> OutlinedRegions;
2000   // Iterate over the possible sets of similarity.
2001   unsigned PotentialGroupIdx = 0;
2002   for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2003     OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2004 
2005     // Remove entries that were previously outlined
2006     pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2007 
2008     // We pruned the number of regions to 0 to 1, meaning that it's not worth
2009     // trying to outlined since there is no compatible similar instance of this
2010     // code.
2011     if (CurrentGroup.Regions.size() < 2)
2012       continue;
2013 
2014     // Determine if there are any values that are the same constant throughout
2015     // each section in the set.
2016     NotSame.clear();
2017     CurrentGroup.findSameConstants(NotSame);
2018 
2019     if (CurrentGroup.IgnoreGroup)
2020       continue;
2021 
2022     // Create a CodeExtractor for each outlinable region. Identify inputs and
2023     // outputs for each section using the code extractor and create the argument
2024     // types for the Aggregate Outlining Function.
2025     OutlinedRegions.clear();
2026     for (OutlinableRegion *OS : CurrentGroup.Regions) {
2027       // Break the outlinable region out of its parent BasicBlock into its own
2028       // BasicBlocks (see function implementation).
2029       OS->splitCandidate();
2030 
2031       // There's a chance that when the region is split, extra instructions are
2032       // added to the region. This makes the region no longer viable
2033       // to be split, so we ignore it for outlining.
2034       if (!OS->CandidateSplit)
2035         continue;
2036 
2037       SmallVector<BasicBlock *> BE;
2038       DenseSet<BasicBlock *> BBSet;
2039       OS->Candidate->getBasicBlocks(BBSet, BE);
2040       OS->CE = new (ExtractorAllocator.Allocate())
2041           CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2042                         false, "outlined");
2043       findAddInputsOutputs(M, *OS, NotSame);
2044       if (!OS->IgnoreRegion)
2045         OutlinedRegions.push_back(OS);
2046 
2047       // We recombine the blocks together now that we have gathered all the
2048       // needed information.
2049       OS->reattachCandidate();
2050     }
2051 
2052     CurrentGroup.Regions = std::move(OutlinedRegions);
2053 
2054     if (CurrentGroup.Regions.empty())
2055       continue;
2056 
2057     CurrentGroup.collectGVNStoreSets(M);
2058 
2059     if (CostModel)
2060       findCostBenefit(M, CurrentGroup);
2061 
2062     // If we are adhering to the cost model, skip those groups where the cost
2063     // outweighs the benefits.
2064     if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2065       OptimizationRemarkEmitter &ORE =
2066           getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2067       ORE.emit([&]() {
2068         IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2069         OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2070                                    C->frontInstruction());
2071         R << "did not outline "
2072           << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2073           << " regions due to estimated increase of "
2074           << ore::NV("InstructionIncrease",
2075                      CurrentGroup.Cost - CurrentGroup.Benefit)
2076           << " instructions at locations ";
2077         interleave(
2078             CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2079             [&R](OutlinableRegion *Region) {
2080               R << ore::NV(
2081                   "DebugLoc",
2082                   Region->Candidate->frontInstruction()->getDebugLoc());
2083             },
2084             [&R]() { R << " "; });
2085         return R;
2086       });
2087       continue;
2088     }
2089 
2090     NegativeCostGroups.push_back(&CurrentGroup);
2091   }
2092 
2093   ExtractorAllocator.DestroyAll();
2094 
2095   if (NegativeCostGroups.size() > 1)
2096     stable_sort(NegativeCostGroups,
2097                 [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2098                   return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2099                 });
2100 
2101   std::vector<Function *> FuncsToRemove;
2102   for (OutlinableGroup *CG : NegativeCostGroups) {
2103     OutlinableGroup &CurrentGroup = *CG;
2104 
2105     OutlinedRegions.clear();
2106     for (OutlinableRegion *Region : CurrentGroup.Regions) {
2107       // We check whether our region is compatible with what has already been
2108       // outlined, and whether we need to ignore this item.
2109       if (!isCompatibleWithAlreadyOutlinedCode(*Region))
2110         continue;
2111       OutlinedRegions.push_back(Region);
2112     }
2113 
2114     if (OutlinedRegions.size() < 2)
2115       continue;
2116 
2117     // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2118     // we are still outlining enough regions to make up for the added cost.
2119     CurrentGroup.Regions = std::move(OutlinedRegions);
2120     if (CostModel) {
2121       CurrentGroup.Benefit = 0;
2122       CurrentGroup.Cost = 0;
2123       findCostBenefit(M, CurrentGroup);
2124       if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2125         continue;
2126     }
2127     OutlinedRegions.clear();
2128     for (OutlinableRegion *Region : CurrentGroup.Regions) {
2129       Region->splitCandidate();
2130       if (!Region->CandidateSplit)
2131         continue;
2132       OutlinedRegions.push_back(Region);
2133     }
2134 
2135     CurrentGroup.Regions = std::move(OutlinedRegions);
2136     if (CurrentGroup.Regions.size() < 2) {
2137       for (OutlinableRegion *R : CurrentGroup.Regions)
2138         R->reattachCandidate();
2139       continue;
2140     }
2141 
2142     LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2143                       << " and benefit " << CurrentGroup.Benefit << "\n");
2144 
2145     // Create functions out of all the sections, and mark them as outlined.
2146     OutlinedRegions.clear();
2147     for (OutlinableRegion *OS : CurrentGroup.Regions) {
2148       SmallVector<BasicBlock *> BE;
2149       DenseSet<BasicBlock *> BBSet;
2150       OS->Candidate->getBasicBlocks(BBSet, BE);
2151       OS->CE = new (ExtractorAllocator.Allocate())
2152           CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2153                         false, "outlined");
2154       bool FunctionOutlined = extractSection(*OS);
2155       if (FunctionOutlined) {
2156         unsigned StartIdx = OS->Candidate->getStartIdx();
2157         unsigned EndIdx = OS->Candidate->getEndIdx();
2158         for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2159           Outlined.insert(Idx);
2160 
2161         OutlinedRegions.push_back(OS);
2162       }
2163     }
2164 
2165     LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2166                       << " with benefit " << CurrentGroup.Benefit
2167                       << " and cost " << CurrentGroup.Cost << "\n");
2168 
2169     CurrentGroup.Regions = std::move(OutlinedRegions);
2170 
2171     if (CurrentGroup.Regions.empty())
2172       continue;
2173 
2174     OptimizationRemarkEmitter &ORE =
2175         getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2176     ORE.emit([&]() {
2177       IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2178       OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2179       R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2180         << " regions with decrease of "
2181         << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2182         << " instructions at locations ";
2183       interleave(
2184           CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2185           [&R](OutlinableRegion *Region) {
2186             R << ore::NV("DebugLoc",
2187                          Region->Candidate->frontInstruction()->getDebugLoc());
2188           },
2189           [&R]() { R << " "; });
2190       return R;
2191     });
2192 
2193     deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
2194                                  OutlinedFunctionNum);
2195   }
2196 
2197   for (Function *F : FuncsToRemove)
2198     F->eraseFromParent();
2199 
2200   return OutlinedFunctionNum;
2201 }
2202 
2203 bool IROutliner::run(Module &M) {
2204   CostModel = !NoCostModel;
2205   OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
2206 
2207   return doOutline(M) > 0;
2208 }
2209 
2210 // Pass Manager Boilerplate
2211 namespace {
2212 class IROutlinerLegacyPass : public ModulePass {
2213 public:
2214   static char ID;
2215   IROutlinerLegacyPass() : ModulePass(ID) {
2216     initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry());
2217   }
2218 
2219   void getAnalysisUsage(AnalysisUsage &AU) const override {
2220     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
2221     AU.addRequired<TargetTransformInfoWrapperPass>();
2222     AU.addRequired<IRSimilarityIdentifierWrapperPass>();
2223   }
2224 
2225   bool runOnModule(Module &M) override;
2226 };
2227 } // namespace
2228 
2229 bool IROutlinerLegacyPass::runOnModule(Module &M) {
2230   if (skipModule(M))
2231     return false;
2232 
2233   std::unique_ptr<OptimizationRemarkEmitter> ORE;
2234   auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2235     ORE.reset(new OptimizationRemarkEmitter(&F));
2236     return *ORE.get();
2237   };
2238 
2239   auto GTTI = [this](Function &F) -> TargetTransformInfo & {
2240     return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2241   };
2242 
2243   auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
2244     return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
2245   };
2246 
2247   return IROutliner(GTTI, GIRSI, GORE).run(M);
2248 }
2249 
2250 PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
2251   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2252 
2253   std::function<TargetTransformInfo &(Function &)> GTTI =
2254       [&FAM](Function &F) -> TargetTransformInfo & {
2255     return FAM.getResult<TargetIRAnalysis>(F);
2256   };
2257 
2258   std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
2259       [&AM](Module &M) -> IRSimilarityIdentifier & {
2260     return AM.getResult<IRSimilarityAnalysis>(M);
2261   };
2262 
2263   std::unique_ptr<OptimizationRemarkEmitter> ORE;
2264   std::function<OptimizationRemarkEmitter &(Function &)> GORE =
2265       [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2266     ORE.reset(new OptimizationRemarkEmitter(&F));
2267     return *ORE.get();
2268   };
2269 
2270   if (IROutliner(GTTI, GIRSI, GORE).run(M))
2271     return PreservedAnalyses::none();
2272   return PreservedAnalyses::all();
2273 }
2274 
2275 char IROutlinerLegacyPass::ID = 0;
2276 INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
2277                       false)
2278 INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass)
2279 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
2280 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
2281 INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
2282                     false)
2283 
2284 ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }
2285