1 //===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 // Implementation for the IROutliner which is used by the IROutliner Pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/IPO/IROutliner.h"
15 #include "llvm/Analysis/IRSimilarityIdentifier.h"
16 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
17 #include "llvm/Analysis/TargetTransformInfo.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/DIBuilder.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/DebugInfoMetadata.h"
22 #include "llvm/IR/Dominators.h"
23 #include "llvm/IR/Mangler.h"
24 #include "llvm/IR/PassManager.h"
25 #include "llvm/InitializePasses.h"
26 #include "llvm/Pass.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Transforms/IPO.h"
29 #include <set>
30 #include <vector>
31 
32 #define DEBUG_TYPE "iroutliner"
33 
34 using namespace llvm;
35 using namespace IRSimilarity;
36 
37 // A command flag to be used for debugging to exclude branches from similarity
38 // matching and outlining.
39 namespace llvm {
40 extern cl::opt<bool> DisableBranches;
41 
42 // A command flag to be used for debugging to indirect calls from similarity
43 // matching and outlining.
44 extern cl::opt<bool> DisableIndirectCalls;
45 
46 // A command flag to be used for debugging to exclude intrinsics from similarity
47 // matching and outlining.
48 extern cl::opt<bool> DisableIntrinsics;
49 
50 } // namespace llvm
51 
52 // Set to true if the user wants the ir outliner to run on linkonceodr linkage
53 // functions. This is false by default because the linker can dedupe linkonceodr
54 // functions. Since the outliner is confined to a single module (modulo LTO),
55 // this is off by default. It should, however, be the default behavior in
56 // LTO.
57 static cl::opt<bool> EnableLinkOnceODRIROutlining(
58     "enable-linkonceodr-ir-outlining", cl::Hidden,
59     cl::desc("Enable the IR outliner on linkonceodr functions"),
60     cl::init(false));
61 
62 // This is a debug option to test small pieces of code to ensure that outlining
63 // works correctly.
64 static cl::opt<bool> NoCostModel(
65     "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
66     cl::desc("Debug option to outline greedily, without restriction that "
67              "calculated benefit outweighs cost"));
68 
69 /// The OutlinableGroup holds all the overarching information for outlining
70 /// a set of regions that are structurally similar to one another, such as the
71 /// types of the overall function, the output blocks, the sets of stores needed
72 /// and a list of the different regions. This information is used in the
73 /// deduplication of extracted regions with the same structure.
74 struct OutlinableGroup {
75   /// The sections that could be outlined
76   std::vector<OutlinableRegion *> Regions;
77 
78   /// The argument types for the function created as the overall function to
79   /// replace the extracted function for each region.
80   std::vector<Type *> ArgumentTypes;
81   /// The FunctionType for the overall function.
82   FunctionType *OutlinedFunctionType = nullptr;
83   /// The Function for the collective overall function.
84   Function *OutlinedFunction = nullptr;
85 
86   /// Flag for whether we should not consider this group of OutlinableRegions
87   /// for extraction.
88   bool IgnoreGroup = false;
89 
90   /// The return blocks for the overall function.
91   DenseMap<Value *, BasicBlock *> EndBBs;
92 
93   /// The PHIBlocks with their corresponding return block based on the return
94   /// value as the key.
95   DenseMap<Value *, BasicBlock *> PHIBlocks;
96 
97   /// A set containing the different GVN store sets needed. Each array contains
98   /// a sorted list of the different values that need to be stored into output
99   /// registers.
100   DenseSet<ArrayRef<unsigned>> OutputGVNCombinations;
101 
102   /// Flag for whether the \ref ArgumentTypes have been defined after the
103   /// extraction of the first region.
104   bool InputTypesSet = false;
105 
106   /// The number of input values in \ref ArgumentTypes.  Anything after this
107   /// index in ArgumentTypes is an output argument.
108   unsigned NumAggregateInputs = 0;
109 
110   /// The mapping of the canonical numbering of the values in outlined sections
111   /// to specific arguments.
112   DenseMap<unsigned, unsigned> CanonicalNumberToAggArg;
113 
114   /// The number of branches in the region target a basic block that is outside
115   /// of the region.
116   unsigned BranchesToOutside = 0;
117 
118   /// Tracker counting backwards from the highest unsigned value possible to
119   /// avoid conflicting with the GVNs of assigned values.  We start at -3 since
120   /// -2 and -1 are assigned by the DenseMap.
121   unsigned PHINodeGVNTracker = -3;
122 
123   DenseMap<unsigned,
124            std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
125       PHINodeGVNToGVNs;
126   DenseMap<hash_code, unsigned> GVNsToPHINodeGVN;
127 
128   /// The number of instructions that will be outlined by extracting \ref
129   /// Regions.
130   InstructionCost Benefit = 0;
131   /// The number of added instructions needed for the outlining of the \ref
132   /// Regions.
133   InstructionCost Cost = 0;
134 
135   /// The argument that needs to be marked with the swifterr attribute.  If not
136   /// needed, there is no value.
137   Optional<unsigned> SwiftErrorArgument;
138 
139   /// For the \ref Regions, we look at every Value.  If it is a constant,
140   /// we check whether it is the same in Region.
141   ///
142   /// \param [in,out] NotSame contains the global value numbers where the
143   /// constant is not always the same, and must be passed in as an argument.
144   void findSameConstants(DenseSet<unsigned> &NotSame);
145 
146   /// For the regions, look at each set of GVN stores needed and account for
147   /// each combination.  Add an argument to the argument types if there is
148   /// more than one combination.
149   ///
150   /// \param [in] M - The module we are outlining from.
151   void collectGVNStoreSets(Module &M);
152 };
153 
154 /// Move the contents of \p SourceBB to before the last instruction of \p
155 /// TargetBB.
156 /// \param SourceBB - the BasicBlock to pull Instructions from.
157 /// \param TargetBB - the BasicBlock to put Instruction into.
158 static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
159   for (Instruction &I : llvm::make_early_inc_range(SourceBB))
160     I.moveBefore(TargetBB, TargetBB.end());
161 }
162 
163 /// A function to sort the keys of \p Map, which must be a mapping of constant
164 /// values to basic blocks and return it in \p SortedKeys
165 ///
166 /// \param SortedKeys - The vector the keys will be return in and sorted.
167 /// \param Map - The DenseMap containing keys to sort.
168 static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
169                                   DenseMap<Value *, BasicBlock *> &Map) {
170   for (auto &VtoBB : Map)
171     SortedKeys.push_back(VtoBB.first);
172 
173   stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
174     const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS);
175     const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
176     assert(RHSC && "Not a constant integer in return value?");
177     assert(LHSC && "Not a constant integer in return value?");
178 
179     return LHSC->getLimitedValue() < RHSC->getLimitedValue();
180   });
181 }
182 
183 Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
184                                                   Value *V) {
185   Optional<unsigned> GVN = Candidate->getGVN(V);
186   assert(GVN.hasValue() && "No GVN for incoming value");
187   Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
188   Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum);
189   Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
190   return FoundValueOpt.getValueOr(nullptr);
191 }
192 
193 BasicBlock *
194 OutlinableRegion::findCorrespondingBlockIn(const OutlinableRegion &Other,
195                                            BasicBlock *BB) {
196   Instruction *FirstNonPHI = BB->getFirstNonPHI();
197   assert(FirstNonPHI && "block is empty?");
198   Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI);
199   if (!CorrespondingVal)
200     return nullptr;
201   BasicBlock *CorrespondingBlock =
202       cast<Instruction>(CorrespondingVal)->getParent();
203   return CorrespondingBlock;
204 }
205 
206 /// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
207 /// in \p Included to branch to BasicBlock \p Replace if they currently branch
208 /// to the BasicBlock \p Find.  This is used to fix up the incoming basic blocks
209 /// when PHINodes are included in outlined regions.
210 ///
211 /// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
212 /// checked.
213 /// \param Find - The successor block to be replaced.
214 /// \param Replace - The new succesor block to branch to.
215 /// \param Included - The set of blocks about to be outlined.
216 static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find,
217                                       BasicBlock *Replace,
218                                       DenseSet<BasicBlock *> &Included) {
219   for (PHINode &PN : PHIBlock->phis()) {
220     for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
221          ++Idx) {
222       // Check if the incoming block is included in the set of blocks being
223       // outlined.
224       BasicBlock *Incoming = PN.getIncomingBlock(Idx);
225       if (!Included.contains(Incoming))
226         continue;
227 
228       BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator());
229       assert(BI && "Not a branch instruction?");
230       // Look over the branching instructions into this block to see if we
231       // used to branch to Find in this outlined block.
232       for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
233            Succ++) {
234         // If we have found the block to replace, we do so here.
235         if (BI->getSuccessor(Succ) != Find)
236           continue;
237         BI->setSuccessor(Succ, Replace);
238       }
239     }
240   }
241 }
242 
243 
244 void OutlinableRegion::splitCandidate() {
245   assert(!CandidateSplit && "Candidate already split!");
246 
247   Instruction *BackInst = Candidate->backInstruction();
248 
249   Instruction *EndInst = nullptr;
250   // Check whether the last instruction is a terminator, if it is, we do
251   // not split on the following instruction. We leave the block as it is.  We
252   // also check that this is not the last instruction in the Module, otherwise
253   // the check for whether the current following instruction matches the
254   // previously recorded instruction will be incorrect.
255   if (!BackInst->isTerminator() ||
256       BackInst->getParent() != &BackInst->getFunction()->back()) {
257     EndInst = Candidate->end()->Inst;
258     assert(EndInst && "Expected an end instruction?");
259   }
260 
261   // We check if the current instruction following the last instruction in the
262   // region is the same as the recorded instruction following the last
263   // instruction. If they do not match, there could be problems in rewriting
264   // the program after outlining, so we ignore it.
265   if (!BackInst->isTerminator() &&
266       EndInst != BackInst->getNextNonDebugInstruction())
267     return;
268 
269   Instruction *StartInst = (*Candidate->begin()).Inst;
270   assert(StartInst && "Expected a start instruction?");
271   StartBB = StartInst->getParent();
272   PrevBB = StartBB;
273 
274   DenseSet<BasicBlock *> BBSet;
275   Candidate->getBasicBlocks(BBSet);
276 
277   // We iterate over the instructions in the region, if we find a PHINode, we
278   // check if there are predecessors outside of the region, if there are,
279   // we ignore this region since we are unable to handle the severing of the
280   // phi node right now.
281 
282   // TODO: Handle extraneous inputs for PHINodes through variable number of
283   // inputs, similar to how outputs are handled.
284   BasicBlock::iterator It = StartInst->getIterator();
285   EndBB = BackInst->getParent();
286   BasicBlock *IBlock;
287   bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst;
288   while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
289     unsigned NumPredsOutsideRegion = 0;
290     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
291       if (!BBSet.contains(PN->getIncomingBlock(i))) {
292         ++NumPredsOutsideRegion;
293         continue;
294       }
295 
296       // We must consider the case there the incoming block to the PHINode is
297       // the same as the final block of the OutlinableRegion.  If this is the
298       // case, the branch from this block must also be outlined to be valid.
299       IBlock = PN->getIncomingBlock(i);
300       if (IBlock == EndBB && EndBBTermAndBackInstDifferent)
301         ++NumPredsOutsideRegion;
302     }
303 
304     if (NumPredsOutsideRegion > 1)
305       return;
306 
307     It++;
308   }
309 
310   // If the region starts with a PHINode, but is not the initial instruction of
311   // the BasicBlock, we ignore this region for now.
312   if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
313     return;
314 
315   // If the region ends with a PHINode, but does not contain all of the phi node
316   // instructions of the region, we ignore it for now.
317   if (isa<PHINode>(BackInst) &&
318       BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
319     return;
320 
321   // The basic block gets split like so:
322   // block:                 block:
323   //   inst1                  inst1
324   //   inst2                  inst2
325   //   region1               br block_to_outline
326   //   region2              block_to_outline:
327   //   region3          ->    region1
328   //   region4                region2
329   //   inst3                  region3
330   //   inst4                  region4
331   //                          br block_after_outline
332   //                        block_after_outline:
333   //                          inst3
334   //                          inst4
335 
336   std::string OriginalName = PrevBB->getName().str();
337 
338   StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
339   PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
340 
341   CandidateSplit = true;
342   if (!BackInst->isTerminator()) {
343     EndBB = EndInst->getParent();
344     FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
345     EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
346     FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
347   } else {
348     EndBB = BackInst->getParent();
349     EndsInBranch = true;
350     FollowBB = nullptr;
351   }
352 
353   // Refind the basic block set.
354   BBSet.clear();
355   Candidate->getBasicBlocks(BBSet);
356   // For the phi nodes in the new starting basic block of the region, we
357   // reassign the targets of the basic blocks branching instructions.
358   replaceTargetsFromPHINode(StartBB, PrevBB, StartBB, BBSet);
359   if (FollowBB)
360     replaceTargetsFromPHINode(FollowBB, EndBB, FollowBB, BBSet);
361 }
362 
363 void OutlinableRegion::reattachCandidate() {
364   assert(CandidateSplit && "Candidate is not split!");
365 
366   // The basic block gets reattached like so:
367   // block:                        block:
368   //   inst1                         inst1
369   //   inst2                         inst2
370   //   br block_to_outline           region1
371   // block_to_outline:        ->     region2
372   //   region1                       region3
373   //   region2                       region4
374   //   region3                       inst3
375   //   region4                       inst4
376   //   br block_after_outline
377   // block_after_outline:
378   //   inst3
379   //   inst4
380   assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
381 
382   assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
383   PrevBB->getTerminator()->eraseFromParent();
384 
385   // If we reattaching after outlining, we iterate over the phi nodes to
386   // the initial block, and reassign the branch instructions of the incoming
387   // blocks to the block we are remerging into.
388   if (!ExtractedFunction) {
389     DenseSet<BasicBlock *> BBSet;
390     Candidate->getBasicBlocks(BBSet);
391 
392     replaceTargetsFromPHINode(StartBB, StartBB, PrevBB, BBSet);
393     if (!EndsInBranch)
394       replaceTargetsFromPHINode(FollowBB, FollowBB, EndBB, BBSet);
395   }
396 
397   moveBBContents(*StartBB, *PrevBB);
398 
399   BasicBlock *PlacementBB = PrevBB;
400   if (StartBB != EndBB)
401     PlacementBB = EndBB;
402   if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
403     assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
404     assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
405     PlacementBB->getTerminator()->eraseFromParent();
406     moveBBContents(*FollowBB, *PlacementBB);
407     PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
408     FollowBB->eraseFromParent();
409   }
410 
411   PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
412   StartBB->eraseFromParent();
413 
414   // Make sure to save changes back to the StartBB.
415   StartBB = PrevBB;
416   EndBB = nullptr;
417   PrevBB = nullptr;
418   FollowBB = nullptr;
419 
420   CandidateSplit = false;
421 }
422 
423 /// Find whether \p V matches the Constants previously found for the \p GVN.
424 ///
425 /// \param V - The value to check for consistency.
426 /// \param GVN - The global value number assigned to \p V.
427 /// \param GVNToConstant - The mapping of global value number to Constants.
428 /// \returns true if the Value matches the Constant mapped to by V and false if
429 /// it \p V is a Constant but does not match.
430 /// \returns None if \p V is not a Constant.
431 static Optional<bool>
432 constantMatches(Value *V, unsigned GVN,
433                 DenseMap<unsigned, Constant *> &GVNToConstant) {
434   // See if we have a constants
435   Constant *CST = dyn_cast<Constant>(V);
436   if (!CST)
437     return None;
438 
439   // Holds a mapping from a global value number to a Constant.
440   DenseMap<unsigned, Constant *>::iterator GVNToConstantIt;
441   bool Inserted;
442 
443 
444   // If we have a constant, try to make a new entry in the GVNToConstant.
445   std::tie(GVNToConstantIt, Inserted) =
446       GVNToConstant.insert(std::make_pair(GVN, CST));
447   // If it was found and is not equal, it is not the same. We do not
448   // handle this case yet, and exit early.
449   if (Inserted || (GVNToConstantIt->second == CST))
450     return true;
451 
452   return false;
453 }
454 
455 InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
456   InstructionCost Benefit = 0;
457 
458   // Estimate the benefit of outlining a specific sections of the program.  We
459   // delegate mostly this task to the TargetTransformInfo so that if the target
460   // has specific changes, we can have a more accurate estimate.
461 
462   // However, getInstructionCost delegates the code size calculation for
463   // arithmetic instructions to getArithmeticInstrCost in
464   // include/Analysis/TargetTransformImpl.h, where it always estimates that the
465   // code size for a division and remainder instruction to be equal to 4, and
466   // everything else to 1.  This is not an accurate representation of the
467   // division instruction for targets that have a native division instruction.
468   // To be overly conservative, we only add 1 to the number of instructions for
469   // each division instruction.
470   for (IRInstructionData &ID : *Candidate) {
471     Instruction *I = ID.Inst;
472     switch (I->getOpcode()) {
473     case Instruction::FDiv:
474     case Instruction::FRem:
475     case Instruction::SDiv:
476     case Instruction::SRem:
477     case Instruction::UDiv:
478     case Instruction::URem:
479       Benefit += 1;
480       break;
481     default:
482       Benefit += TTI.getInstructionCost(I, TargetTransformInfo::TCK_CodeSize);
483       break;
484     }
485   }
486 
487   return Benefit;
488 }
489 
490 /// Check the \p OutputMappings structure for value \p Input, if it exists
491 /// it has been used as an output for outlining, and has been renamed, and we
492 /// return the new value, otherwise, we return the same value.
493 ///
494 /// \param OutputMappings [in] - The mapping of values to their renamed value
495 /// after being used as an output for an outlined region.
496 /// \param Input [in] - The value to find the remapped value of, if it exists.
497 /// \return The remapped value if it has been renamed, and the same value if has
498 /// not.
499 static Value *findOutputMapping(const DenseMap<Value *, Value *> OutputMappings,
500                                 Value *Input) {
501   DenseMap<Value *, Value *>::const_iterator OutputMapping =
502       OutputMappings.find(Input);
503   if (OutputMapping != OutputMappings.end())
504     return OutputMapping->second;
505   return Input;
506 }
507 
508 /// Find whether \p Region matches the global value numbering to Constant
509 /// mapping found so far.
510 ///
511 /// \param Region - The OutlinableRegion we are checking for constants
512 /// \param GVNToConstant - The mapping of global value number to Constants.
513 /// \param NotSame - The set of global value numbers that do not have the same
514 /// constant in each region.
515 /// \returns true if all Constants are the same in every use of a Constant in \p
516 /// Region and false if not
517 static bool
518 collectRegionsConstants(OutlinableRegion &Region,
519                         DenseMap<unsigned, Constant *> &GVNToConstant,
520                         DenseSet<unsigned> &NotSame) {
521   bool ConstantsTheSame = true;
522 
523   IRSimilarityCandidate &C = *Region.Candidate;
524   for (IRInstructionData &ID : C) {
525 
526     // Iterate over the operands in an instruction. If the global value number,
527     // assigned by the IRSimilarityCandidate, has been seen before, we check if
528     // the the number has been found to be not the same value in each instance.
529     for (Value *V : ID.OperVals) {
530       Optional<unsigned> GVNOpt = C.getGVN(V);
531       assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
532       unsigned GVN = GVNOpt.getValue();
533 
534       // Check if this global value has been found to not be the same already.
535       if (NotSame.contains(GVN)) {
536         if (isa<Constant>(V))
537           ConstantsTheSame = false;
538         continue;
539       }
540 
541       // If it has been the same so far, we check the value for if the
542       // associated Constant value match the previous instances of the same
543       // global value number.  If the global value does not map to a Constant,
544       // it is considered to not be the same value.
545       Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
546       if (ConstantMatches.hasValue()) {
547         if (ConstantMatches.getValue())
548           continue;
549         else
550           ConstantsTheSame = false;
551       }
552 
553       // While this value is a register, it might not have been previously,
554       // make sure we don't already have a constant mapped to this global value
555       // number.
556       if (GVNToConstant.find(GVN) != GVNToConstant.end())
557         ConstantsTheSame = false;
558 
559       NotSame.insert(GVN);
560     }
561   }
562 
563   return ConstantsTheSame;
564 }
565 
566 void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
567   DenseMap<unsigned, Constant *> GVNToConstant;
568 
569   for (OutlinableRegion *Region : Regions)
570     collectRegionsConstants(*Region, GVNToConstant, NotSame);
571 }
572 
573 void OutlinableGroup::collectGVNStoreSets(Module &M) {
574   for (OutlinableRegion *OS : Regions)
575     OutputGVNCombinations.insert(OS->GVNStores);
576 
577   // We are adding an extracted argument to decide between which output path
578   // to use in the basic block.  It is used in a switch statement and only
579   // needs to be an integer.
580   if (OutputGVNCombinations.size() > 1)
581     ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
582 }
583 
584 /// Get the subprogram if it exists for one of the outlined regions.
585 ///
586 /// \param [in] Group - The set of regions to find a subprogram for.
587 /// \returns the subprogram if it exists, or nullptr.
588 static DISubprogram *getSubprogramOrNull(OutlinableGroup &Group) {
589   for (OutlinableRegion *OS : Group.Regions)
590     if (Function *F = OS->Call->getFunction())
591       if (DISubprogram *SP = F->getSubprogram())
592         return SP;
593 
594   return nullptr;
595 }
596 
597 Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
598                                      unsigned FunctionNameSuffix) {
599   assert(!Group.OutlinedFunction && "Function is already defined!");
600 
601   Type *RetTy = Type::getVoidTy(M.getContext());
602   // All extracted functions _should_ have the same return type at this point
603   // since the similarity identifier ensures that all branches outside of the
604   // region occur in the same place.
605 
606   // NOTE: Should we ever move to the model that uses a switch at every point
607   // needed, meaning that we could branch within the region or out, it is
608   // possible that we will need to switch to using the most general case all of
609   // the time.
610   for (OutlinableRegion *R : Group.Regions) {
611     Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
612     if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
613         (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
614       RetTy = ExtractedFuncType;
615   }
616 
617   Group.OutlinedFunctionType = FunctionType::get(
618       RetTy, Group.ArgumentTypes, false);
619 
620   // These functions will only be called from within the same module, so
621   // we can set an internal linkage.
622   Group.OutlinedFunction = Function::Create(
623       Group.OutlinedFunctionType, GlobalValue::InternalLinkage,
624       "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
625 
626   // Transfer the swifterr attribute to the correct function parameter.
627   if (Group.SwiftErrorArgument.hasValue())
628     Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(),
629                                          Attribute::SwiftError);
630 
631   Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
632   Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
633 
634   // If there's a DISubprogram associated with this outlined function, then
635   // emit debug info for the outlined function.
636   if (DISubprogram *SP = getSubprogramOrNull(Group)) {
637     Function *F = Group.OutlinedFunction;
638     // We have a DISubprogram. Get its DICompileUnit.
639     DICompileUnit *CU = SP->getUnit();
640     DIBuilder DB(M, true, CU);
641     DIFile *Unit = SP->getFile();
642     Mangler Mg;
643     // Get the mangled name of the function for the linkage name.
644     std::string Dummy;
645     llvm::raw_string_ostream MangledNameStream(Dummy);
646     Mg.getNameWithPrefix(MangledNameStream, F, false);
647 
648     DISubprogram *OutlinedSP = DB.createFunction(
649         Unit /* Context */, F->getName(), MangledNameStream.str(),
650         Unit /* File */,
651         0 /* Line 0 is reserved for compiler-generated code. */,
652         DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
653         0, /* Line 0 is reserved for compiler-generated code. */
654         DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
655         /* Outlined code is optimized code by definition. */
656         DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
657 
658     // Don't add any new variables to the subprogram.
659     DB.finalizeSubprogram(OutlinedSP);
660 
661     // Attach subprogram to the function.
662     F->setSubprogram(OutlinedSP);
663     // We're done with the DIBuilder.
664     DB.finalize();
665   }
666 
667   return Group.OutlinedFunction;
668 }
669 
670 /// Move each BasicBlock in \p Old to \p New.
671 ///
672 /// \param [in] Old - The function to move the basic blocks from.
673 /// \param [in] New - The function to move the basic blocks to.
674 /// \param [out] NewEnds - The return blocks of the new overall function.
675 static void moveFunctionData(Function &Old, Function &New,
676                              DenseMap<Value *, BasicBlock *> &NewEnds) {
677   for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
678     CurrBB.removeFromParent();
679     CurrBB.insertInto(&New);
680     Instruction *I = CurrBB.getTerminator();
681 
682     // For each block we find a return instruction is, it is a potential exit
683     // path for the function.  We keep track of each block based on the return
684     // value here.
685     if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
686       NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
687 
688     std::vector<Instruction *> DebugInsts;
689 
690     for (Instruction &Val : CurrBB) {
691       // We must handle the scoping of called functions differently than
692       // other outlined instructions.
693       if (!isa<CallInst>(&Val)) {
694         // Remove the debug information for outlined functions.
695         Val.setDebugLoc(DebugLoc());
696 
697         // Loop info metadata may contain line locations. Update them to have no
698         // value in the new subprogram since the outlined code could be from
699         // several locations.
700         auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * {
701           if (DISubprogram *SP = New.getSubprogram())
702             if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
703               return DILocation::get(New.getContext(), Loc->getLine(),
704                                      Loc->getColumn(), SP, nullptr);
705           return MD;
706         };
707         updateLoopMetadataDebugLocations(Val, updateLoopInfoLoc);
708         continue;
709       }
710 
711       // From this point we are only handling call instructions.
712       CallInst *CI = cast<CallInst>(&Val);
713 
714       // We add any debug statements here, to be removed after.  Since the
715       // instructions originate from many different locations in the program,
716       // it will cause incorrect reporting from a debugger if we keep the
717       // same debug instructions.
718       if (isa<DbgInfoIntrinsic>(CI)) {
719         DebugInsts.push_back(&Val);
720         continue;
721       }
722 
723       // Edit the scope of called functions inside of outlined functions.
724       if (DISubprogram *SP = New.getSubprogram()) {
725         DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
726         Val.setDebugLoc(DI);
727       }
728     }
729 
730     for (Instruction *I : DebugInsts)
731       I->eraseFromParent();
732   }
733 }
734 
735 /// Find the the constants that will need to be lifted into arguments
736 /// as they are not the same in each instance of the region.
737 ///
738 /// \param [in] C - The IRSimilarityCandidate containing the region we are
739 /// analyzing.
740 /// \param [in] NotSame - The set of global value numbers that do not have a
741 /// single Constant across all OutlinableRegions similar to \p C.
742 /// \param [out] Inputs - The list containing the global value numbers of the
743 /// arguments needed for the region of code.
744 static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame,
745                           std::vector<unsigned> &Inputs) {
746   DenseSet<unsigned> Seen;
747   // Iterate over the instructions, and find what constants will need to be
748   // extracted into arguments.
749   for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
750        IDIt != EndIDIt; IDIt++) {
751     for (Value *V : (*IDIt).OperVals) {
752       // Since these are stored before any outlining, they will be in the
753       // global value numbering.
754       unsigned GVN = C.getGVN(V).getValue();
755       if (isa<Constant>(V))
756         if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
757           Inputs.push_back(GVN);
758           Seen.insert(GVN);
759         }
760     }
761   }
762 }
763 
764 /// Find the GVN for the inputs that have been found by the CodeExtractor.
765 ///
766 /// \param [in] C - The IRSimilarityCandidate containing the region we are
767 /// analyzing.
768 /// \param [in] CurrentInputs - The set of inputs found by the
769 /// CodeExtractor.
770 /// \param [in] OutputMappings - The mapping of values that have been replaced
771 /// by a new output value.
772 /// \param [out] EndInputNumbers - The global value numbers for the extracted
773 /// arguments.
774 static void mapInputsToGVNs(IRSimilarityCandidate &C,
775                             SetVector<Value *> &CurrentInputs,
776                             const DenseMap<Value *, Value *> &OutputMappings,
777                             std::vector<unsigned> &EndInputNumbers) {
778   // Get the Global Value Number for each input.  We check if the Value has been
779   // replaced by a different value at output, and use the original value before
780   // replacement.
781   for (Value *Input : CurrentInputs) {
782     assert(Input && "Have a nullptr as an input");
783     if (OutputMappings.find(Input) != OutputMappings.end())
784       Input = OutputMappings.find(Input)->second;
785     assert(C.getGVN(Input).hasValue() &&
786            "Could not find a numbering for the given input");
787     EndInputNumbers.push_back(C.getGVN(Input).getValue());
788   }
789 }
790 
791 /// Find the original value for the \p ArgInput values if any one of them was
792 /// replaced during a previous extraction.
793 ///
794 /// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
795 /// \param [in] OutputMappings - The mapping of values that have been replaced
796 /// by a new output value.
797 /// \param [out] RemappedArgInputs - The remapped values according to
798 /// \p OutputMappings that will be extracted.
799 static void
800 remapExtractedInputs(const ArrayRef<Value *> ArgInputs,
801                      const DenseMap<Value *, Value *> &OutputMappings,
802                      SetVector<Value *> &RemappedArgInputs) {
803   // Get the global value number for each input that will be extracted as an
804   // argument by the code extractor, remapping if needed for reloaded values.
805   for (Value *Input : ArgInputs) {
806     if (OutputMappings.find(Input) != OutputMappings.end())
807       Input = OutputMappings.find(Input)->second;
808     RemappedArgInputs.insert(Input);
809   }
810 }
811 
812 /// Find the input GVNs and the output values for a region of Instructions.
813 /// Using the code extractor, we collect the inputs to the extracted function.
814 ///
815 /// The \p Region can be identified as needing to be ignored in this function.
816 /// It should be checked whether it should be ignored after a call to this
817 /// function.
818 ///
819 /// \param [in,out] Region - The region of code to be analyzed.
820 /// \param [out] InputGVNs - The global value numbers for the extracted
821 /// arguments.
822 /// \param [in] NotSame - The global value numbers in the region that do not
823 /// have the same constant value in the regions structurally similar to
824 /// \p Region.
825 /// \param [in] OutputMappings - The mapping of values that have been replaced
826 /// by a new output value after extraction.
827 /// \param [out] ArgInputs - The values of the inputs to the extracted function.
828 /// \param [out] Outputs - The set of values extracted by the CodeExtractor
829 /// as outputs.
830 static void getCodeExtractorArguments(
831     OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
832     DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
833     SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
834   IRSimilarityCandidate &C = *Region.Candidate;
835 
836   // OverallInputs are the inputs to the region found by the CodeExtractor,
837   // SinkCands and HoistCands are used by the CodeExtractor to find sunken
838   // allocas of values whose lifetimes are contained completely within the
839   // outlined region. PremappedInputs are the arguments found by the
840   // CodeExtractor, removing conditions such as sunken allocas, but that
841   // may need to be remapped due to the extracted output values replacing
842   // the original values. We use DummyOutputs for this first run of finding
843   // inputs and outputs since the outputs could change during findAllocas,
844   // the correct set of extracted outputs will be in the final Outputs ValueSet.
845   SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
846       DummyOutputs;
847 
848   // Use the code extractor to get the inputs and outputs, without sunken
849   // allocas or removing llvm.assumes.
850   CodeExtractor *CE = Region.CE;
851   CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
852   assert(Region.StartBB && "Region must have a start BasicBlock!");
853   Function *OrigF = Region.StartBB->getParent();
854   CodeExtractorAnalysisCache CEAC(*OrigF);
855   BasicBlock *Dummy = nullptr;
856 
857   // The region may be ineligible due to VarArgs in the parent function. In this
858   // case we ignore the region.
859   if (!CE->isEligible()) {
860     Region.IgnoreRegion = true;
861     return;
862   }
863 
864   // Find if any values are going to be sunk into the function when extracted
865   CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
866   CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
867 
868   // TODO: Support regions with sunken allocas: values whose lifetimes are
869   // contained completely within the outlined region.  These are not guaranteed
870   // to be the same in every region, so we must elevate them all to arguments
871   // when they appear.  If these values are not equal, it means there is some
872   // Input in OverallInputs that was removed for ArgInputs.
873   if (OverallInputs.size() != PremappedInputs.size()) {
874     Region.IgnoreRegion = true;
875     return;
876   }
877 
878   findConstants(C, NotSame, InputGVNs);
879 
880   mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
881 
882   remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
883                        ArgInputs);
884 
885   // Sort the GVNs, since we now have constants included in the \ref InputGVNs
886   // we need to make sure they are in a deterministic order.
887   stable_sort(InputGVNs);
888 }
889 
890 /// Look over the inputs and map each input argument to an argument in the
891 /// overall function for the OutlinableRegions.  This creates a way to replace
892 /// the arguments of the extracted function with the arguments of the new
893 /// overall function.
894 ///
895 /// \param [in,out] Region - The region of code to be analyzed.
896 /// \param [in] InputGVNs - The global value numbering of the input values
897 /// collected.
898 /// \param [in] ArgInputs - The values of the arguments to the extracted
899 /// function.
900 static void
901 findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
902                                         std::vector<unsigned> &InputGVNs,
903                                         SetVector<Value *> &ArgInputs) {
904 
905   IRSimilarityCandidate &C = *Region.Candidate;
906   OutlinableGroup &Group = *Region.Parent;
907 
908   // This counts the argument number in the overall function.
909   unsigned TypeIndex = 0;
910 
911   // This counts the argument number in the extracted function.
912   unsigned OriginalIndex = 0;
913 
914   // Find the mapping of the extracted arguments to the arguments for the
915   // overall function. Since there may be extra arguments in the overall
916   // function to account for the extracted constants, we have two different
917   // counters as we find extracted arguments, and as we come across overall
918   // arguments.
919 
920   // Additionally, in our first pass, for the first extracted function,
921   // we find argument locations for the canonical value numbering.  This
922   // numbering overrides any discovered location for the extracted code.
923   for (unsigned InputVal : InputGVNs) {
924     Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
925     assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?");
926     unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
927 
928     Optional<Value *> InputOpt = C.fromGVN(InputVal);
929     assert(InputOpt.hasValue() && "Global value number not found?");
930     Value *Input = InputOpt.getValue();
931 
932     DenseMap<unsigned, unsigned>::iterator AggArgIt =
933         Group.CanonicalNumberToAggArg.find(CanonicalNumber);
934 
935     if (!Group.InputTypesSet) {
936       Group.ArgumentTypes.push_back(Input->getType());
937       // If the input value has a swifterr attribute, make sure to mark the
938       // argument in the overall function.
939       if (Input->isSwiftError()) {
940         assert(
941             !Group.SwiftErrorArgument.hasValue() &&
942             "Argument already marked with swifterr for this OutlinableGroup!");
943         Group.SwiftErrorArgument = TypeIndex;
944       }
945     }
946 
947     // Check if we have a constant. If we do add it to the overall argument
948     // number to Constant map for the region, and continue to the next input.
949     if (Constant *CST = dyn_cast<Constant>(Input)) {
950       if (AggArgIt != Group.CanonicalNumberToAggArg.end())
951         Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
952       else {
953         Group.CanonicalNumberToAggArg.insert(
954             std::make_pair(CanonicalNumber, TypeIndex));
955         Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
956       }
957       TypeIndex++;
958       continue;
959     }
960 
961     // It is not a constant, we create the mapping from extracted argument list
962     // to the overall argument list, using the canonical location, if it exists.
963     assert(ArgInputs.count(Input) && "Input cannot be found!");
964 
965     if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
966       if (OriginalIndex != AggArgIt->second)
967         Region.ChangedArgOrder = true;
968       Region.ExtractedArgToAgg.insert(
969           std::make_pair(OriginalIndex, AggArgIt->second));
970       Region.AggArgToExtracted.insert(
971           std::make_pair(AggArgIt->second, OriginalIndex));
972     } else {
973       Group.CanonicalNumberToAggArg.insert(
974           std::make_pair(CanonicalNumber, TypeIndex));
975       Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
976       Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
977     }
978     OriginalIndex++;
979     TypeIndex++;
980   }
981 
982   // If the function type definitions for the OutlinableGroup holding the region
983   // have not been set, set the length of the inputs here.  We should have the
984   // same inputs for all of the different regions contained in the
985   // OutlinableGroup since they are all structurally similar to one another.
986   if (!Group.InputTypesSet) {
987     Group.NumAggregateInputs = TypeIndex;
988     Group.InputTypesSet = true;
989   }
990 
991   Region.NumExtractedInputs = OriginalIndex;
992 }
993 
994 /// Check if the \p V has any uses outside of the region other than \p PN.
995 ///
996 /// \param V [in] - The value to check.
997 /// \param PHILoc [in] - The location in the PHINode of \p V.
998 /// \param PN [in] - The PHINode using \p V.
999 /// \param Exits [in] - The potential blocks we exit to from the outlined
1000 /// region.
1001 /// \param BlocksInRegion [in] - The basic blocks contained in the region.
1002 /// \returns true if \p V has any use soutside its region other than \p PN.
1003 static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
1004                             SmallPtrSet<BasicBlock *, 1> &Exits,
1005                             DenseSet<BasicBlock *> &BlocksInRegion) {
1006   // We check to see if the value is used by the PHINode from some other
1007   // predecessor not included in the region.  If it is, we make sure
1008   // to keep it as an output.
1009   if (any_of(llvm::seq<unsigned>(0, PN.getNumIncomingValues()),
1010              [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
1011                return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
1012                        !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
1013              }))
1014     return true;
1015 
1016   // Check if the value is used by any other instructions outside the region.
1017   return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) {
1018     Instruction *I = dyn_cast<Instruction>(U);
1019     if (!I)
1020       return false;
1021 
1022     // If the use of the item is inside the region, we skip it.  Uses
1023     // inside the region give us useful information about how the item could be
1024     // used as an output.
1025     BasicBlock *Parent = I->getParent();
1026     if (BlocksInRegion.contains(Parent))
1027       return false;
1028 
1029     // If it's not a PHINode then we definitely know the use matters.  This
1030     // output value will not completely combined with another item in a PHINode
1031     // as it is directly reference by another non-phi instruction
1032     if (!isa<PHINode>(I))
1033       return true;
1034 
1035     // If we have a PHINode outside one of the exit locations, then it
1036     // can be considered an outside use as well.  If there is a PHINode
1037     // contained in the Exit where this values use matters, it will be
1038     // caught when we analyze that PHINode.
1039     if (!Exits.contains(Parent))
1040       return true;
1041 
1042     return false;
1043   });
1044 }
1045 
1046 /// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
1047 /// considered outputs. A PHINodes is an output when more than one incoming
1048 /// value has been marked by the CodeExtractor as an output.
1049 ///
1050 /// \param CurrentExitFromRegion [in] - The block to analyze.
1051 /// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
1052 /// region.
1053 /// \param RegionBlocks [in] - The basic blocks in the region.
1054 /// \param Outputs [in, out] - The existing outputs for the region, we may add
1055 /// PHINodes to this as we find that they replace output values.
1056 /// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
1057 /// totally replaced  by a PHINode.
1058 /// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
1059 /// in PHINodes, but have other uses, and should still be considered outputs.
1060 static void analyzeExitPHIsForOutputUses(
1061     BasicBlock *CurrentExitFromRegion,
1062     SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
1063     DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
1064     DenseSet<Value *> &OutputsReplacedByPHINode,
1065     DenseSet<Value *> &OutputsWithNonPhiUses) {
1066   for (PHINode &PN : CurrentExitFromRegion->phis()) {
1067     // Find all incoming values from the outlining region.
1068     SmallVector<unsigned, 2> IncomingVals;
1069     for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
1070       if (RegionBlocks.contains(PN.getIncomingBlock(I)))
1071         IncomingVals.push_back(I);
1072 
1073     // Do not process PHI if there are no predecessors from region.
1074     unsigned NumIncomingVals = IncomingVals.size();
1075     if (NumIncomingVals == 0)
1076       continue;
1077 
1078     // If there is one predecessor, we mark it as a value that needs to be kept
1079     // as an output.
1080     if (NumIncomingVals == 1) {
1081       Value *V = PN.getIncomingValue(*IncomingVals.begin());
1082       OutputsWithNonPhiUses.insert(V);
1083       OutputsReplacedByPHINode.erase(V);
1084       continue;
1085     }
1086 
1087     // This PHINode will be used as an output value, so we add it to our list.
1088     Outputs.insert(&PN);
1089 
1090     // Not all of the incoming values should be ignored as other inputs and
1091     // outputs may have uses in outlined region.  If they have other uses
1092     // outside of the single PHINode we should not skip over it.
1093     for (unsigned Idx : IncomingVals) {
1094       Value *V = PN.getIncomingValue(Idx);
1095       if (outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) {
1096         OutputsWithNonPhiUses.insert(V);
1097         OutputsReplacedByPHINode.erase(V);
1098         continue;
1099       }
1100       if (!OutputsWithNonPhiUses.contains(V))
1101         OutputsReplacedByPHINode.insert(V);
1102     }
1103   }
1104 }
1105 
1106 // Represents the type for the unsigned number denoting the output number for
1107 // phi node, along with the canonical number for the exit block.
1108 using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
1109 // The list of canonical numbers for the incoming values to a PHINode.
1110 using CanonList = SmallVector<unsigned, 2>;
1111 // The pair type representing the set of canonical values being combined in the
1112 // PHINode, along with the location data for the PHINode.
1113 using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
1114 
1115 /// Encode \p PND as an integer for easy lookup based on the argument location,
1116 /// the parent BasicBlock canonical numbering, and the canonical numbering of
1117 /// the values stored in the PHINode.
1118 ///
1119 /// \param PND - The data to hash.
1120 /// \returns The hash code of \p PND.
1121 static hash_code encodePHINodeData(PHINodeData &PND) {
1122   return llvm::hash_combine(
1123       llvm::hash_value(PND.first.first), llvm::hash_value(PND.first.second),
1124       llvm::hash_combine_range(PND.second.begin(), PND.second.end()));
1125 }
1126 
1127 /// Create a special GVN for PHINodes that will be used outside of
1128 /// the region.  We create a hash code based on the Canonical number of the
1129 /// parent BasicBlock, the canonical numbering of the values stored in the
1130 /// PHINode and the aggregate argument location.  This is used to find whether
1131 /// this PHINode type has been given a canonical numbering already.  If not, we
1132 /// assign it a value and store it for later use.  The value is returned to
1133 /// identify different output schemes for the set of regions.
1134 ///
1135 /// \param Region - The region that \p PN is an output for.
1136 /// \param PN - The PHINode we are analyzing.
1137 /// \param Blocks - The blocks for the region we are analyzing.
1138 /// \param AggArgIdx - The argument \p PN will be stored into.
1139 /// \returns An optional holding the assigned canonical number, or None if
1140 /// there is some attribute of the PHINode blocking it from being used.
1141 static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
1142                                            PHINode *PN,
1143                                            DenseSet<BasicBlock *> &Blocks,
1144                                            unsigned AggArgIdx) {
1145   OutlinableGroup &Group = *Region.Parent;
1146   IRSimilarityCandidate &Cand = *Region.Candidate;
1147   BasicBlock *PHIBB = PN->getParent();
1148   CanonList PHIGVNs;
1149   Value *Incoming;
1150   BasicBlock *IncomingBlock;
1151   for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1152     Incoming = PN->getIncomingValue(Idx);
1153     IncomingBlock = PN->getIncomingBlock(Idx);
1154     // If we cannot find a GVN, and the incoming block is included in the region
1155     // this means that the input to the PHINode is not included in the region we
1156     // are trying to analyze, meaning, that if it was outlined, we would be
1157     // adding an extra input.  We ignore this case for now, and so ignore the
1158     // region.
1159     Optional<unsigned> OGVN = Cand.getGVN(Incoming);
1160     if (!OGVN.hasValue() && (Blocks.find(IncomingBlock) != Blocks.end())) {
1161       Region.IgnoreRegion = true;
1162       return None;
1163     }
1164 
1165     // If the incoming block isn't in the region, we don't have to worry about
1166     // this incoming value.
1167     if (Blocks.find(IncomingBlock) == Blocks.end())
1168       continue;
1169 
1170     // Collect the canonical numbers of the values in the PHINode.
1171     unsigned GVN = OGVN.getValue();
1172     OGVN = Cand.getCanonicalNum(GVN);
1173     assert(OGVN.hasValue() && "No GVN found for incoming value?");
1174     PHIGVNs.push_back(*OGVN);
1175   }
1176 
1177   // Now that we have the GVNs for the incoming values, we are going to combine
1178   // them with the GVN of the incoming bock, and the output location of the
1179   // PHINode to generate a hash value representing this instance of the PHINode.
1180   DenseMap<hash_code, unsigned>::iterator GVNToPHIIt;
1181   DenseMap<unsigned, PHINodeData>::iterator PHIToGVNIt;
1182   Optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
1183   assert(BBGVN.hasValue() && "Could not find GVN for the incoming block!");
1184 
1185   BBGVN = Cand.getCanonicalNum(BBGVN.getValue());
1186   assert(BBGVN.hasValue() &&
1187          "Could not find canonical number for the incoming block!");
1188   // Create a pair of the exit block canonical value, and the aggregate
1189   // argument location, connected to the canonical numbers stored in the
1190   // PHINode.
1191   PHINodeData TemporaryPair =
1192       std::make_pair(std::make_pair(BBGVN.getValue(), AggArgIdx), PHIGVNs);
1193   hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
1194 
1195   // Look for and create a new entry in our connection between canonical
1196   // numbers for PHINodes, and the set of objects we just created.
1197   GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash);
1198   if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
1199     bool Inserted = false;
1200     std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert(
1201         std::make_pair(Group.PHINodeGVNTracker, TemporaryPair));
1202     std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert(
1203         std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--));
1204   }
1205 
1206   return GVNToPHIIt->second;
1207 }
1208 
1209 /// Create a mapping of the output arguments for the \p Region to the output
1210 /// arguments of the overall outlined function.
1211 ///
1212 /// \param [in,out] Region - The region of code to be analyzed.
1213 /// \param [in] Outputs - The values found by the code extractor.
1214 static void
1215 findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
1216                                           SetVector<Value *> &Outputs) {
1217   OutlinableGroup &Group = *Region.Parent;
1218   IRSimilarityCandidate &C = *Region.Candidate;
1219 
1220   SmallVector<BasicBlock *> BE;
1221   DenseSet<BasicBlock *> BlocksInRegion;
1222   C.getBasicBlocks(BlocksInRegion, BE);
1223 
1224   // Find the exits to the region.
1225   SmallPtrSet<BasicBlock *, 1> Exits;
1226   for (BasicBlock *Block : BE)
1227     for (BasicBlock *Succ : successors(Block))
1228       if (!BlocksInRegion.contains(Succ))
1229         Exits.insert(Succ);
1230 
1231   // After determining which blocks exit to PHINodes, we add these PHINodes to
1232   // the set of outputs to be processed.  We also check the incoming values of
1233   // the PHINodes for whether they should no longer be considered outputs.
1234   DenseSet<Value *> OutputsReplacedByPHINode;
1235   DenseSet<Value *> OutputsWithNonPhiUses;
1236   for (BasicBlock *ExitBB : Exits)
1237     analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs,
1238                                  OutputsReplacedByPHINode,
1239                                  OutputsWithNonPhiUses);
1240 
1241   // This counts the argument number in the extracted function.
1242   unsigned OriginalIndex = Region.NumExtractedInputs;
1243 
1244   // This counts the argument number in the overall function.
1245   unsigned TypeIndex = Group.NumAggregateInputs;
1246   bool TypeFound;
1247   DenseSet<unsigned> AggArgsUsed;
1248 
1249   // Iterate over the output types and identify if there is an aggregate pointer
1250   // type whose base type matches the current output type. If there is, we mark
1251   // that we will use this output register for this value. If not we add another
1252   // type to the overall argument type list. We also store the GVNs used for
1253   // stores to identify which values will need to be moved into an special
1254   // block that holds the stores to the output registers.
1255   for (Value *Output : Outputs) {
1256     TypeFound = false;
1257     // We can do this since it is a result value, and will have a number
1258     // that is necessarily the same. BUT if in the future, the instructions
1259     // do not have to be in same order, but are functionally the same, we will
1260     // have to use a different scheme, as one-to-one correspondence is not
1261     // guaranteed.
1262     unsigned ArgumentSize = Group.ArgumentTypes.size();
1263 
1264     // If the output is combined in a PHINode, we make sure to skip over it.
1265     if (OutputsReplacedByPHINode.contains(Output))
1266       continue;
1267 
1268     unsigned AggArgIdx = 0;
1269     for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
1270       if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
1271         continue;
1272 
1273       if (AggArgsUsed.contains(Jdx))
1274         continue;
1275 
1276       TypeFound = true;
1277       AggArgsUsed.insert(Jdx);
1278       Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
1279       Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
1280       AggArgIdx = Jdx;
1281       break;
1282     }
1283 
1284     // We were unable to find an unused type in the output type set that matches
1285     // the output, so we add a pointer type to the argument types of the overall
1286     // function to handle this output and create a mapping to it.
1287     if (!TypeFound) {
1288       Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType()));
1289       // Mark the new pointer type as the last value in the aggregate argument
1290       // list.
1291       unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
1292       AggArgsUsed.insert(ArgTypeIdx);
1293       Region.ExtractedArgToAgg.insert(
1294           std::make_pair(OriginalIndex, ArgTypeIdx));
1295       Region.AggArgToExtracted.insert(
1296           std::make_pair(ArgTypeIdx, OriginalIndex));
1297       AggArgIdx = ArgTypeIdx;
1298     }
1299 
1300     // TODO: Adapt to the extra input from the PHINode.
1301     PHINode *PN = dyn_cast<PHINode>(Output);
1302 
1303     Optional<unsigned> GVN;
1304     if (PN && !BlocksInRegion.contains(PN->getParent())) {
1305       // Values outside the region can be combined into PHINode when we
1306       // have multiple exits. We collect both of these into a list to identify
1307       // which values are being used in the PHINode. Each list identifies a
1308       // different PHINode, and a different output. We store the PHINode as it's
1309       // own canonical value.  These canonical values are also dependent on the
1310       // output argument it is saved to.
1311 
1312       // If two PHINodes have the same canonical values, but different aggregate
1313       // argument locations, then they will have distinct Canonical Values.
1314       GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx);
1315       if (!GVN.hasValue())
1316         return;
1317     } else {
1318       // If we do not have a PHINode we use the global value numbering for the
1319       // output value, to find the canonical number to add to the set of stored
1320       // values.
1321       GVN = C.getGVN(Output);
1322       GVN = C.getCanonicalNum(*GVN);
1323     }
1324 
1325     // Each region has a potentially unique set of outputs.  We save which
1326     // values are output in a list of canonical values so we can differentiate
1327     // among the different store schemes.
1328     Region.GVNStores.push_back(*GVN);
1329 
1330     OriginalIndex++;
1331     TypeIndex++;
1332   }
1333 
1334   // We sort the stored values to make sure that we are not affected by analysis
1335   // order when determining what combination of items were stored.
1336   stable_sort(Region.GVNStores);
1337 }
1338 
1339 void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
1340                                       DenseSet<unsigned> &NotSame) {
1341   std::vector<unsigned> Inputs;
1342   SetVector<Value *> ArgInputs, Outputs;
1343 
1344   getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
1345                             Outputs);
1346 
1347   if (Region.IgnoreRegion)
1348     return;
1349 
1350   // Map the inputs found by the CodeExtractor to the arguments found for
1351   // the overall function.
1352   findExtractedInputToOverallInputMapping(Region, Inputs, ArgInputs);
1353 
1354   // Map the outputs found by the CodeExtractor to the arguments found for
1355   // the overall function.
1356   findExtractedOutputToOverallOutputMapping(Region, Outputs);
1357 }
1358 
1359 /// Replace the extracted function in the Region with a call to the overall
1360 /// function constructed from the deduplicated similar regions, replacing and
1361 /// remapping the values passed to the extracted function as arguments to the
1362 /// new arguments of the overall function.
1363 ///
1364 /// \param [in] M - The module to outline from.
1365 /// \param [in] Region - The regions of extracted code to be replaced with a new
1366 /// function.
1367 /// \returns a call instruction with the replaced function.
1368 CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
1369   std::vector<Value *> NewCallArgs;
1370   DenseMap<unsigned, unsigned>::iterator ArgPair;
1371 
1372   OutlinableGroup &Group = *Region.Parent;
1373   CallInst *Call = Region.Call;
1374   assert(Call && "Call to replace is nullptr?");
1375   Function *AggFunc = Group.OutlinedFunction;
1376   assert(AggFunc && "Function to replace with is nullptr?");
1377 
1378   // If the arguments are the same size, there are not values that need to be
1379   // made into an argument, the argument ordering has not been change, or
1380   // different output registers to handle.  We can simply replace the called
1381   // function in this case.
1382   if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
1383     LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1384                       << *AggFunc << " with same number of arguments\n");
1385     Call->setCalledFunction(AggFunc);
1386     return Call;
1387   }
1388 
1389   // We have a different number of arguments than the new function, so
1390   // we need to use our previously mappings off extracted argument to overall
1391   // function argument, and constants to overall function argument to create the
1392   // new argument list.
1393   for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
1394 
1395     if (AggArgIdx == AggFunc->arg_size() - 1 &&
1396         Group.OutputGVNCombinations.size() > 1) {
1397       // If we are on the last argument, and we need to differentiate between
1398       // output blocks, add an integer to the argument list to determine
1399       // what block to take
1400       LLVM_DEBUG(dbgs() << "Set switch block argument to "
1401                         << Region.OutputBlockNum << "\n");
1402       NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
1403                                              Region.OutputBlockNum));
1404       continue;
1405     }
1406 
1407     ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
1408     if (ArgPair != Region.AggArgToExtracted.end()) {
1409       Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
1410       // If we found the mapping from the extracted function to the overall
1411       // function, we simply add it to the argument list.  We use the same
1412       // value, it just needs to honor the new order of arguments.
1413       LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1414                         << *ArgumentValue << "\n");
1415       NewCallArgs.push_back(ArgumentValue);
1416       continue;
1417     }
1418 
1419     // If it is a constant, we simply add it to the argument list as a value.
1420     if (Region.AggArgToConstant.find(AggArgIdx) !=
1421         Region.AggArgToConstant.end()) {
1422       Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
1423       LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1424                         << *CST << "\n");
1425       NewCallArgs.push_back(CST);
1426       continue;
1427     }
1428 
1429     // Add a nullptr value if the argument is not found in the extracted
1430     // function.  If we cannot find a value, it means it is not in use
1431     // for the region, so we should not pass anything to it.
1432     LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1433     NewCallArgs.push_back(ConstantPointerNull::get(
1434         static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
1435   }
1436 
1437   LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1438                     << *AggFunc << " with new set of arguments\n");
1439   // Create the new call instruction and erase the old one.
1440   Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
1441                           Call);
1442 
1443   // It is possible that the call to the outlined function is either the first
1444   // instruction is in the new block, the last instruction, or both.  If either
1445   // of these is the case, we need to make sure that we replace the instruction
1446   // in the IRInstructionData struct with the new call.
1447   CallInst *OldCall = Region.Call;
1448   if (Region.NewFront->Inst == OldCall)
1449     Region.NewFront->Inst = Call;
1450   if (Region.NewBack->Inst == OldCall)
1451     Region.NewBack->Inst = Call;
1452 
1453   // Transfer any debug information.
1454   Call->setDebugLoc(Region.Call->getDebugLoc());
1455   // Since our output may determine which branch we go to, we make sure to
1456   // propogate this new call value through the module.
1457   OldCall->replaceAllUsesWith(Call);
1458 
1459   // Remove the old instruction.
1460   OldCall->eraseFromParent();
1461   Region.Call = Call;
1462 
1463   // Make sure that the argument in the new function has the SwiftError
1464   // argument.
1465   if (Group.SwiftErrorArgument.hasValue())
1466     Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
1467                        Attribute::SwiftError);
1468 
1469   return Call;
1470 }
1471 
1472 /// Find or create a BasicBlock in the outlined function containing PhiBlocks
1473 /// for \p RetVal.
1474 ///
1475 /// \param Group - The OutlinableGroup containing the information about the
1476 /// overall outlined function.
1477 /// \param RetVal - The return value or exit option that we are currently
1478 /// evaluating.
1479 /// \returns The found or newly created BasicBlock to contain the needed
1480 /// PHINodes to be used as outputs.
1481 static BasicBlock *findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal) {
1482   DenseMap<Value *, BasicBlock *>::iterator PhiBlockForRetVal,
1483       ReturnBlockForRetVal;
1484   PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
1485   ReturnBlockForRetVal = Group.EndBBs.find(RetVal);
1486   assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
1487          "Could not find output value!");
1488   BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
1489 
1490   // Find if a PHIBlock exists for this return value already.  If it is
1491   // the first time we are analyzing this, we will not, so we record it.
1492   PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
1493   if (PhiBlockForRetVal != Group.PHIBlocks.end())
1494     return PhiBlockForRetVal->second;
1495 
1496   // If we did not find a block, we create one, and insert it into the
1497   // overall function and record it.
1498   bool Inserted = false;
1499   BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block",
1500                                             ReturnBB->getParent());
1501   std::tie(PhiBlockForRetVal, Inserted) =
1502       Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1503 
1504   // We find the predecessors of the return block in the newly created outlined
1505   // function in order to point them to the new PHIBlock rather than the already
1506   // existing return block.
1507   SmallVector<BranchInst *, 2> BranchesToChange;
1508   for (BasicBlock *Pred : predecessors(ReturnBB))
1509     BranchesToChange.push_back(cast<BranchInst>(Pred->getTerminator()));
1510 
1511   // Now we mark the branch instructions found, and change the references of the
1512   // return block to the newly created PHIBlock.
1513   for (BranchInst *BI : BranchesToChange)
1514     for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) {
1515       if (BI->getSuccessor(Succ) != ReturnBB)
1516         continue;
1517       BI->setSuccessor(Succ, PHIBlock);
1518     }
1519 
1520   BranchInst::Create(ReturnBB, PHIBlock);
1521 
1522   return PhiBlockForRetVal->second;
1523 }
1524 
1525 /// For the function call now representing the \p Region, find the passed value
1526 /// to that call that represents Argument \p A at the call location if the
1527 /// call has already been replaced with a call to the  overall, aggregate
1528 /// function.
1529 ///
1530 /// \param A - The Argument to get the passed value for.
1531 /// \param Region - The extracted Region corresponding to the outlined function.
1532 /// \returns The Value representing \p A at the call site.
1533 static Value *
1534 getPassedArgumentInAlreadyOutlinedFunction(const Argument *A,
1535                                            const OutlinableRegion &Region) {
1536   // If we don't need to adjust the argument number at all (since the call
1537   // has already been replaced by a call to the overall outlined function)
1538   // we can just get the specified argument.
1539   return Region.Call->getArgOperand(A->getArgNo());
1540 }
1541 
1542 /// For the function call now representing the \p Region, find the passed value
1543 /// to that call that represents Argument \p A at the call location if the
1544 /// call has only been replaced by the call to the aggregate function.
1545 ///
1546 /// \param A - The Argument to get the passed value for.
1547 /// \param Region - The extracted Region corresponding to the outlined function.
1548 /// \returns The Value representing \p A at the call site.
1549 static Value *
1550 getPassedArgumentAndAdjustArgumentLocation(const Argument *A,
1551                                            const OutlinableRegion &Region) {
1552   unsigned ArgNum = A->getArgNo();
1553 
1554   // If it is a constant, we can look at our mapping from when we created
1555   // the outputs to figure out what the constant value is.
1556   if (Region.AggArgToConstant.count(ArgNum))
1557     return Region.AggArgToConstant.find(ArgNum)->second;
1558 
1559   // If it is not a constant, and we are not looking at the overall function, we
1560   // need to adjust which argument we are looking at.
1561   ArgNum = Region.AggArgToExtracted.find(ArgNum)->second;
1562   return Region.Call->getArgOperand(ArgNum);
1563 }
1564 
1565 /// Find the canonical numbering for the incoming Values into the PHINode \p PN.
1566 ///
1567 /// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1568 /// \param Region [in] - The OutlinableRegion containing \p PN.
1569 /// \param OutputMappings [in] - The mapping of output values from outlined
1570 /// region to their original values.
1571 /// \param CanonNums [out] - The canonical numbering for the incoming values to
1572 /// \p PN paired with their incoming block.
1573 /// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
1574 /// of \p Region rather than the overall function's call.
1575 static void findCanonNumsForPHI(
1576     PHINode *PN, OutlinableRegion &Region,
1577     const DenseMap<Value *, Value *> &OutputMappings,
1578     SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
1579     bool ReplacedWithOutlinedCall = true) {
1580   // Iterate over the incoming values.
1581   for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1582     Value *IVal = PN->getIncomingValue(Idx);
1583     BasicBlock *IBlock = PN->getIncomingBlock(Idx);
1584     // If we have an argument as incoming value, we need to grab the passed
1585     // value from the call itself.
1586     if (Argument *A = dyn_cast<Argument>(IVal)) {
1587       if (ReplacedWithOutlinedCall)
1588         IVal = getPassedArgumentInAlreadyOutlinedFunction(A, Region);
1589       else
1590         IVal = getPassedArgumentAndAdjustArgumentLocation(A, Region);
1591     }
1592 
1593     // Get the original value if it has been replaced by an output value.
1594     IVal = findOutputMapping(OutputMappings, IVal);
1595 
1596     // Find and add the canonical number for the incoming value.
1597     Optional<unsigned> GVN = Region.Candidate->getGVN(IVal);
1598     assert(GVN.hasValue() && "No GVN for incoming value");
1599     Optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN);
1600     assert(CanonNum.hasValue() && "No Canonical Number for GVN");
1601     CanonNums.push_back(std::make_pair(*CanonNum, IBlock));
1602   }
1603 }
1604 
1605 /// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
1606 /// in order to condense the number of instructions added to the outlined
1607 /// function.
1608 ///
1609 /// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1610 /// \param Region [in] - The OutlinableRegion containing \p PN.
1611 /// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
1612 /// \p PN in.
1613 /// \param OutputMappings [in] - The mapping of output values from outlined
1614 /// region to their original values.
1615 /// \param UsedPHIs [in, out] - The PHINodes in the block that have already been
1616 /// matched.
1617 /// \return the newly found or created PHINode in \p OverallPhiBlock.
1618 static PHINode*
1619 findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
1620                        BasicBlock *OverallPhiBlock,
1621                        const DenseMap<Value *, Value *> &OutputMappings,
1622                        DenseSet<PHINode *> &UsedPHIs) {
1623   OutlinableGroup &Group = *Region.Parent;
1624 
1625 
1626   // A list of the canonical numbering assigned to each incoming value, paired
1627   // with the incoming block for the PHINode passed into this function.
1628   SmallVector<std::pair<unsigned, BasicBlock *>> PNCanonNums;
1629 
1630   // We have to use the extracted function since we have merged this region into
1631   // the overall function yet.  We make sure to reassign the argument numbering
1632   // since it is possible that the argument ordering is different between the
1633   // functions.
1634   findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums,
1635                       /* ReplacedWithOutlinedCall = */ false);
1636 
1637   OutlinableRegion *FirstRegion = Group.Regions[0];
1638 
1639   // A list of the canonical numbering assigned to each incoming value, paired
1640   // with the incoming block for the PHINode that we are currently comparing
1641   // the passed PHINode to.
1642   SmallVector<std::pair<unsigned, BasicBlock *>> CurrentCanonNums;
1643 
1644   // Find the Canonical Numbering for each PHINode, if it matches, we replace
1645   // the uses of the PHINode we are searching for, with the found PHINode.
1646   for (PHINode &CurrPN : OverallPhiBlock->phis()) {
1647     // If this PHINode has already been matched to another PHINode to be merged,
1648     // we skip it.
1649     if (UsedPHIs.find(&CurrPN) != UsedPHIs.end())
1650       continue;
1651 
1652     CurrentCanonNums.clear();
1653     findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
1654                         /* ReplacedWithOutlinedCall = */ true);
1655 
1656     // If the list of incoming values is not the same length, then they cannot
1657     // match since there is not an analogue for each incoming value.
1658     if (PNCanonNums.size() != CurrentCanonNums.size())
1659       continue;
1660 
1661     bool FoundMatch = true;
1662 
1663     // We compare the canonical value for each incoming value in the passed
1664     // in PHINode to one already present in the outlined region.  If the
1665     // incoming values do not match, then the PHINodes do not match.
1666 
1667     // We also check to make sure that the incoming block matches as well by
1668     // finding the corresponding incoming block in the combined outlined region
1669     // for the current outlined region.
1670     for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
1671       std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
1672       std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
1673       if (ToCompareTo.first != ToAdd.first) {
1674         FoundMatch = false;
1675         break;
1676       }
1677 
1678       BasicBlock *CorrespondingBlock =
1679           Region.findCorrespondingBlockIn(*FirstRegion, ToAdd.second);
1680       assert(CorrespondingBlock && "Found block is nullptr");
1681       if (CorrespondingBlock != ToCompareTo.second) {
1682         FoundMatch = false;
1683         break;
1684       }
1685     }
1686 
1687     // If all incoming values and branches matched, then we can merge
1688     // into the found PHINode.
1689     if (FoundMatch) {
1690       UsedPHIs.insert(&CurrPN);
1691       return &CurrPN;
1692     }
1693   }
1694 
1695   // If we've made it here, it means we weren't able to replace the PHINode, so
1696   // we must insert it ourselves.
1697   PHINode *NewPN = cast<PHINode>(PN.clone());
1698   NewPN->insertBefore(&*OverallPhiBlock->begin());
1699   for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
1700        Idx++) {
1701     Value *IncomingVal = NewPN->getIncomingValue(Idx);
1702     BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx);
1703 
1704     // Find corresponding basic block in the overall function for the incoming
1705     // block.
1706     BasicBlock *BlockToUse =
1707         Region.findCorrespondingBlockIn(*FirstRegion, IncomingBlock);
1708     NewPN->setIncomingBlock(Idx, BlockToUse);
1709 
1710     // If we have an argument we make sure we replace using the argument from
1711     // the correct function.
1712     if (Argument *A = dyn_cast<Argument>(IncomingVal)) {
1713       Value *Val = Group.OutlinedFunction->getArg(A->getArgNo());
1714       NewPN->setIncomingValue(Idx, Val);
1715       continue;
1716     }
1717 
1718     // Find the corresponding value in the overall function.
1719     IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
1720     Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
1721     assert(Val && "Value is nullptr?");
1722     NewPN->setIncomingValue(Idx, Val);
1723   }
1724   return NewPN;
1725 }
1726 
1727 // Within an extracted function, replace the argument uses of the extracted
1728 // region with the arguments of the function for an OutlinableGroup.
1729 //
1730 /// \param [in] Region - The region of extracted code to be changed.
1731 /// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1732 /// region.
1733 /// \param [in] FirstFunction - A flag to indicate whether we are using this
1734 /// function to define the overall outlined function for all the regions, or
1735 /// if we are operating on one of the following regions.
1736 static void
1737 replaceArgumentUses(OutlinableRegion &Region,
1738                     DenseMap<Value *, BasicBlock *> &OutputBBs,
1739                     const DenseMap<Value *, Value *> &OutputMappings,
1740                     bool FirstFunction = false) {
1741   OutlinableGroup &Group = *Region.Parent;
1742   assert(Region.ExtractedFunction && "Region has no extracted function?");
1743 
1744   Function *DominatingFunction = Region.ExtractedFunction;
1745   if (FirstFunction)
1746     DominatingFunction = Group.OutlinedFunction;
1747   DominatorTree DT(*DominatingFunction);
1748   DenseSet<PHINode *> UsedPHIs;
1749 
1750   for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1751        ArgIdx++) {
1752     assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
1753                Region.ExtractedArgToAgg.end() &&
1754            "No mapping from extracted to outlined?");
1755     unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
1756     Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
1757     Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
1758     // The argument is an input, so we can simply replace it with the overall
1759     // argument value
1760     if (ArgIdx < Region.NumExtractedInputs) {
1761       LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1762                         << *Region.ExtractedFunction << " with " << *AggArg
1763                         << " in function " << *Group.OutlinedFunction << "\n");
1764       Arg->replaceAllUsesWith(AggArg);
1765       continue;
1766     }
1767 
1768     // If we are replacing an output, we place the store value in its own
1769     // block inside the overall function before replacing the use of the output
1770     // in the function.
1771     assert(Arg->hasOneUse() && "Output argument can only have one use");
1772     User *InstAsUser = Arg->user_back();
1773     assert(InstAsUser && "User is nullptr!");
1774 
1775     Instruction *I = cast<Instruction>(InstAsUser);
1776     BasicBlock *BB = I->getParent();
1777     SmallVector<BasicBlock *, 4> Descendants;
1778     DT.getDescendants(BB, Descendants);
1779     bool EdgeAdded = false;
1780     if (Descendants.size() == 0) {
1781       EdgeAdded = true;
1782       DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
1783       DT.getDescendants(BB, Descendants);
1784     }
1785 
1786     // Iterate over the following blocks, looking for return instructions,
1787     // if we find one, find the corresponding output block for the return value
1788     // and move our store instruction there.
1789     for (BasicBlock *DescendBB : Descendants) {
1790       ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
1791       if (!RI)
1792         continue;
1793       Value *RetVal = RI->getReturnValue();
1794       auto VBBIt = OutputBBs.find(RetVal);
1795       assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1796 
1797       // If this is storing a PHINode, we must make sure it is included in the
1798       // overall function.
1799       StoreInst *SI = cast<StoreInst>(I);
1800 
1801       Value *ValueOperand = SI->getValueOperand();
1802 
1803       StoreInst *NewI = cast<StoreInst>(I->clone());
1804       NewI->setDebugLoc(DebugLoc());
1805       BasicBlock *OutputBB = VBBIt->second;
1806       OutputBB->getInstList().push_back(NewI);
1807       LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1808                         << *OutputBB << "\n");
1809 
1810       // If this is storing a PHINode, we must make sure it is included in the
1811       // overall function.
1812       if (!isa<PHINode>(ValueOperand) ||
1813           Region.Candidate->getGVN(ValueOperand).hasValue()) {
1814         if (FirstFunction)
1815           continue;
1816         Value *CorrVal =
1817             Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
1818         assert(CorrVal && "Value is nullptr?");
1819         NewI->setOperand(0, CorrVal);
1820         continue;
1821       }
1822       PHINode *PN = cast<PHINode>(SI->getValueOperand());
1823       // If it has a value, it was not split by the code extractor, which
1824       // is what we are looking for.
1825       if (Region.Candidate->getGVN(PN).hasValue())
1826         continue;
1827 
1828       // We record the parent block for the PHINode in the Region so that
1829       // we can exclude it from checks later on.
1830       Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent()));
1831 
1832       // If this is the first function, we do not need to worry about mergiing
1833       // this with any other block in the overall outlined function, so we can
1834       // just continue.
1835       if (FirstFunction) {
1836         BasicBlock *PHIBlock = PN->getParent();
1837         Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1838         continue;
1839       }
1840 
1841       // We look for the aggregate block that contains the PHINodes leading into
1842       // this exit path. If we can't find one, we create one.
1843       BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
1844 
1845       // For our PHINode, we find the combined canonical numbering, and
1846       // attempt to find a matching PHINode in the overall PHIBlock.  If we
1847       // cannot, we copy the PHINode and move it into this new block.
1848       PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock,
1849                                               OutputMappings, UsedPHIs);
1850       NewI->setOperand(0, NewPN);
1851     }
1852 
1853     // If we added an edge for basic blocks without a predecessor, we remove it
1854     // here.
1855     if (EdgeAdded)
1856       DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
1857     I->eraseFromParent();
1858 
1859     LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1860                       << *Region.ExtractedFunction << " with " << *AggArg
1861                       << " in function " << *Group.OutlinedFunction << "\n");
1862     Arg->replaceAllUsesWith(AggArg);
1863   }
1864 }
1865 
1866 /// Within an extracted function, replace the constants that need to be lifted
1867 /// into arguments with the actual argument.
1868 ///
1869 /// \param Region [in] - The region of extracted code to be changed.
1870 void replaceConstants(OutlinableRegion &Region) {
1871   OutlinableGroup &Group = *Region.Parent;
1872   // Iterate over the constants that need to be elevated into arguments
1873   for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1874     unsigned AggArgIdx = Const.first;
1875     Function *OutlinedFunction = Group.OutlinedFunction;
1876     assert(OutlinedFunction && "Overall Function is not defined?");
1877     Constant *CST = Const.second;
1878     Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
1879     // Identify the argument it will be elevated to, and replace instances of
1880     // that constant in the function.
1881 
1882     // TODO: If in the future constants do not have one global value number,
1883     // i.e. a constant 1 could be mapped to several values, this check will
1884     // have to be more strict.  It cannot be using only replaceUsesWithIf.
1885 
1886     LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1887                       << " in function " << *OutlinedFunction << " with "
1888                       << *Arg << "\n");
1889     CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
1890       if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
1891         return I->getFunction() == OutlinedFunction;
1892       return false;
1893     });
1894   }
1895 }
1896 
1897 /// It is possible that there is a basic block that already performs the same
1898 /// stores. This returns a duplicate block, if it exists
1899 ///
1900 /// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1901 /// \param OutputStoreBBs [in] The existing output blocks.
1902 /// \returns an optional value with the number output block if there is a match.
1903 Optional<unsigned> findDuplicateOutputBlock(
1904     DenseMap<Value *, BasicBlock *> &OutputBBs,
1905     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1906 
1907   bool Mismatch = false;
1908   unsigned MatchingNum = 0;
1909   // We compare the new set output blocks to the other sets of output blocks.
1910   // If they are the same number, and have identical instructions, they are
1911   // considered to be the same.
1912   for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1913     Mismatch = false;
1914     for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1915       DenseMap<Value *, BasicBlock *>::iterator OutputBBIt =
1916           OutputBBs.find(VToB.first);
1917       if (OutputBBIt == OutputBBs.end()) {
1918         Mismatch = true;
1919         break;
1920       }
1921 
1922       BasicBlock *CompBB = VToB.second;
1923       BasicBlock *OutputBB = OutputBBIt->second;
1924       if (CompBB->size() - 1 != OutputBB->size()) {
1925         Mismatch = true;
1926         break;
1927       }
1928 
1929       BasicBlock::iterator NIt = OutputBB->begin();
1930       for (Instruction &I : *CompBB) {
1931         if (isa<BranchInst>(&I))
1932           continue;
1933 
1934         if (!I.isIdenticalTo(&(*NIt))) {
1935           Mismatch = true;
1936           break;
1937         }
1938 
1939         NIt++;
1940       }
1941     }
1942 
1943     if (!Mismatch)
1944       return MatchingNum;
1945 
1946     MatchingNum++;
1947   }
1948 
1949   return None;
1950 }
1951 
1952 /// Remove empty output blocks from the outlined region.
1953 ///
1954 /// \param BlocksToPrune - Mapping of return values output blocks for the \p
1955 /// Region.
1956 /// \param Region - The OutlinableRegion we are analyzing.
1957 static bool
1958 analyzeAndPruneOutputBlocks(DenseMap<Value *, BasicBlock *> &BlocksToPrune,
1959                             OutlinableRegion &Region) {
1960   bool AllRemoved = true;
1961   Value *RetValueForBB;
1962   BasicBlock *NewBB;
1963   SmallVector<Value *, 4> ToRemove;
1964   // Iterate over the output blocks created in the outlined section.
1965   for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
1966     RetValueForBB = VtoBB.first;
1967     NewBB = VtoBB.second;
1968 
1969     // If there are no instructions, we remove it from the module, and also
1970     // mark the value for removal from the return value to output block mapping.
1971     if (NewBB->size() == 0) {
1972       NewBB->eraseFromParent();
1973       ToRemove.push_back(RetValueForBB);
1974       continue;
1975     }
1976 
1977     // Mark that we could not remove all the blocks since they were not all
1978     // empty.
1979     AllRemoved = false;
1980   }
1981 
1982   // Remove the return value from the mapping.
1983   for (Value *V : ToRemove)
1984     BlocksToPrune.erase(V);
1985 
1986   // Mark the region as having the no output scheme.
1987   if (AllRemoved)
1988     Region.OutputBlockNum = -1;
1989 
1990   return AllRemoved;
1991 }
1992 
1993 /// For the outlined section, move needed the StoreInsts for the output
1994 /// registers into their own block. Then, determine if there is a duplicate
1995 /// output block already created.
1996 ///
1997 /// \param [in] OG - The OutlinableGroup of regions to be outlined.
1998 /// \param [in] Region - The OutlinableRegion that is being analyzed.
1999 /// \param [in,out] OutputBBs - the blocks that stores for this region will be
2000 /// placed in.
2001 /// \param [in] EndBBs - the final blocks of the extracted function.
2002 /// \param [in] OutputMappings - OutputMappings the mapping of values that have
2003 /// been replaced by a new output value.
2004 /// \param [in,out] OutputStoreBBs - The existing output blocks.
2005 static void alignOutputBlockWithAggFunc(
2006     OutlinableGroup &OG, OutlinableRegion &Region,
2007     DenseMap<Value *, BasicBlock *> &OutputBBs,
2008     DenseMap<Value *, BasicBlock *> &EndBBs,
2009     const DenseMap<Value *, Value *> &OutputMappings,
2010     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2011   // If none of the output blocks have any instructions, this means that we do
2012   // not have to determine if it matches any of the other output schemes, and we
2013   // don't have to do anything else.
2014   if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
2015     return;
2016 
2017   // Determine is there is a duplicate set of blocks.
2018   Optional<unsigned> MatchingBB =
2019       findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
2020 
2021   // If there is, we remove the new output blocks.  If it does not,
2022   // we add it to our list of sets of output blocks.
2023   if (MatchingBB.hasValue()) {
2024     LLVM_DEBUG(dbgs() << "Set output block for region in function"
2025                       << Region.ExtractedFunction << " to "
2026                       << MatchingBB.getValue());
2027 
2028     Region.OutputBlockNum = MatchingBB.getValue();
2029     for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
2030       VtoBB.second->eraseFromParent();
2031     return;
2032   }
2033 
2034   Region.OutputBlockNum = OutputStoreBBs.size();
2035 
2036   Value *RetValueForBB;
2037   BasicBlock *NewBB;
2038   OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2039   for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
2040     RetValueForBB = VtoBB.first;
2041     NewBB = VtoBB.second;
2042     DenseMap<Value *, BasicBlock *>::iterator VBBIt =
2043         EndBBs.find(RetValueForBB);
2044     LLVM_DEBUG(dbgs() << "Create output block for region in"
2045                       << Region.ExtractedFunction << " to "
2046                       << *NewBB);
2047     BranchInst::Create(VBBIt->second, NewBB);
2048     OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
2049   }
2050 }
2051 
2052 /// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
2053 /// before creating a basic block for each \p NewMap, and inserting into the new
2054 /// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
2055 ///
2056 /// \param OldMap [in] - The mapping to base the new mapping off of.
2057 /// \param NewMap [out] - The output mapping using the keys of \p OldMap.
2058 /// \param ParentFunc [in] - The function to put the new basic block in.
2059 /// \param BaseName [in] - The start of the BasicBlock names to be appended to
2060 /// by an index value.
2061 static void createAndInsertBasicBlocks(DenseMap<Value *, BasicBlock *> &OldMap,
2062                                        DenseMap<Value *, BasicBlock *> &NewMap,
2063                                        Function *ParentFunc, Twine BaseName) {
2064   unsigned Idx = 0;
2065   std::vector<Value *> SortedKeys;
2066 
2067   getSortedConstantKeys(SortedKeys, OldMap);
2068 
2069   for (Value *RetVal : SortedKeys) {
2070     BasicBlock *NewBB = BasicBlock::Create(
2071         ParentFunc->getContext(),
2072         Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
2073         ParentFunc);
2074     NewMap.insert(std::make_pair(RetVal, NewBB));
2075   }
2076 }
2077 
2078 /// Create the switch statement for outlined function to differentiate between
2079 /// all the output blocks.
2080 ///
2081 /// For the outlined section, determine if an outlined block already exists that
2082 /// matches the needed stores for the extracted section.
2083 /// \param [in] M - The module we are outlining from.
2084 /// \param [in] OG - The group of regions to be outlined.
2085 /// \param [in] EndBBs - The final blocks of the extracted function.
2086 /// \param [in,out] OutputStoreBBs - The existing output blocks.
2087 void createSwitchStatement(
2088     Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs,
2089     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2090   // We only need the switch statement if there is more than one store
2091   // combination, or there is more than one set of output blocks.  The first
2092   // will occur when we store different sets of values for two different
2093   // regions.  The second will occur when we have two outputs that are combined
2094   // in a PHINode outside of the region in one outlined instance, and are used
2095   // seaparately in another. This will create the same set of OutputGVNs, but
2096   // will generate two different output schemes.
2097   if (OG.OutputGVNCombinations.size() > 1) {
2098     Function *AggFunc = OG.OutlinedFunction;
2099     // Create a final block for each different return block.
2100     DenseMap<Value *, BasicBlock *> ReturnBBs;
2101     createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
2102 
2103     for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
2104       std::pair<Value *, BasicBlock *> &OutputBlock =
2105           *OG.EndBBs.find(RetBlockPair.first);
2106       BasicBlock *ReturnBlock = RetBlockPair.second;
2107       BasicBlock *EndBB = OutputBlock.second;
2108       Instruction *Term = EndBB->getTerminator();
2109       // Move the return value to the final block instead of the original exit
2110       // stub.
2111       Term->moveBefore(*ReturnBlock, ReturnBlock->end());
2112       // Put the switch statement in the old end basic block for the function
2113       // with a fall through to the new return block.
2114       LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
2115                         << OutputStoreBBs.size() << "\n");
2116       SwitchInst *SwitchI =
2117           SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
2118                              ReturnBlock, OutputStoreBBs.size(), EndBB);
2119 
2120       unsigned Idx = 0;
2121       for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
2122         DenseMap<Value *, BasicBlock *>::iterator OSBBIt =
2123             OutputStoreBB.find(OutputBlock.first);
2124 
2125         if (OSBBIt == OutputStoreBB.end())
2126           continue;
2127 
2128         BasicBlock *BB = OSBBIt->second;
2129         SwitchI->addCase(
2130             ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
2131         Term = BB->getTerminator();
2132         Term->setSuccessor(0, ReturnBlock);
2133         Idx++;
2134       }
2135     }
2136     return;
2137   }
2138 
2139   assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
2140 
2141   // If there needs to be stores, move them from the output blocks to their
2142   // corresponding ending block.  We do not check that the OutputGVNCombinations
2143   // is equal to 1 here since that could just been the case where there are 0
2144   // outputs. Instead, we check whether there is more than one set of output
2145   // blocks since this is the only case where we would have to move the
2146   // stores, and erase the extraneous blocks.
2147   if (OutputStoreBBs.size() == 1) {
2148     LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
2149                       << *OG.OutlinedFunction << "\n");
2150     DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
2151     for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
2152       DenseMap<Value *, BasicBlock *>::iterator EndBBIt =
2153           EndBBs.find(VBPair.first);
2154       assert(EndBBIt != EndBBs.end() && "Could not find end block");
2155       BasicBlock *EndBB = EndBBIt->second;
2156       BasicBlock *OutputBB = VBPair.second;
2157       Instruction *Term = OutputBB->getTerminator();
2158       Term->eraseFromParent();
2159       Term = EndBB->getTerminator();
2160       moveBBContents(*OutputBB, *EndBB);
2161       Term->moveBefore(*EndBB, EndBB->end());
2162       OutputBB->eraseFromParent();
2163     }
2164   }
2165 }
2166 
2167 /// Fill the new function that will serve as the replacement function for all of
2168 /// the extracted regions of a certain structure from the first region in the
2169 /// list of regions.  Replace this first region's extracted function with the
2170 /// new overall function.
2171 ///
2172 /// \param [in] M - The module we are outlining from.
2173 /// \param [in] CurrentGroup - The group of regions to be outlined.
2174 /// \param [in,out] OutputStoreBBs - The output blocks for each different
2175 /// set of stores needed for the different functions.
2176 /// \param [in,out] FuncsToRemove - Extracted functions to erase from module
2177 /// once outlining is complete.
2178 /// \param [in] OutputMappings - Extracted functions to erase from module
2179 /// once outlining is complete.
2180 static void fillOverallFunction(
2181     Module &M, OutlinableGroup &CurrentGroup,
2182     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
2183     std::vector<Function *> &FuncsToRemove,
2184     const DenseMap<Value *, Value *> &OutputMappings) {
2185   OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
2186 
2187   // Move first extracted function's instructions into new function.
2188   LLVM_DEBUG(dbgs() << "Move instructions from "
2189                     << *CurrentOS->ExtractedFunction << " to instruction "
2190                     << *CurrentGroup.OutlinedFunction << "\n");
2191   moveFunctionData(*CurrentOS->ExtractedFunction,
2192                    *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
2193 
2194   // Transfer the attributes from the function to the new function.
2195   for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
2196     CurrentGroup.OutlinedFunction->addFnAttr(A);
2197 
2198   // Create a new set of output blocks for the first extracted function.
2199   DenseMap<Value *, BasicBlock *> NewBBs;
2200   createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
2201                              CurrentGroup.OutlinedFunction, "output_block_0");
2202   CurrentOS->OutputBlockNum = 0;
2203 
2204   replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true);
2205   replaceConstants(*CurrentOS);
2206 
2207   // We first identify if any output blocks are empty, if they are we remove
2208   // them. We then create a branch instruction to the basic block to the return
2209   // block for the function for each non empty output block.
2210   if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
2211     OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2212     for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
2213       DenseMap<Value *, BasicBlock *>::iterator VBBIt =
2214           CurrentGroup.EndBBs.find(VToBB.first);
2215       BasicBlock *EndBB = VBBIt->second;
2216       BranchInst::Create(EndBB, VToBB.second);
2217       OutputStoreBBs.back().insert(VToBB);
2218     }
2219   }
2220 
2221   // Replace the call to the extracted function with the outlined function.
2222   CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2223 
2224   // We only delete the extracted functions at the end since we may need to
2225   // reference instructions contained in them for mapping purposes.
2226   FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2227 }
2228 
2229 void IROutliner::deduplicateExtractedSections(
2230     Module &M, OutlinableGroup &CurrentGroup,
2231     std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
2232   createFunction(M, CurrentGroup, OutlinedFunctionNum);
2233 
2234   std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
2235 
2236   OutlinableRegion *CurrentOS;
2237 
2238   fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove,
2239                       OutputMappings);
2240 
2241   std::vector<Value *> SortedKeys;
2242   for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
2243     CurrentOS = CurrentGroup.Regions[Idx];
2244     AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction,
2245                                                *CurrentOS->ExtractedFunction);
2246 
2247     // Create a set of BasicBlocks, one for each return block, to hold the
2248     // needed store instructions.
2249     DenseMap<Value *, BasicBlock *> NewBBs;
2250     createAndInsertBasicBlocks(
2251         CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
2252         "output_block_" + Twine(static_cast<unsigned>(Idx)));
2253     replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings);
2254     alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
2255                                 CurrentGroup.EndBBs, OutputMappings,
2256                                 OutputStoreBBs);
2257 
2258     CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2259     FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2260   }
2261 
2262   // Create a switch statement to handle the different output schemes.
2263   createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
2264 
2265   OutlinedFunctionNum++;
2266 }
2267 
2268 /// Checks that the next instruction in the InstructionDataList matches the
2269 /// next instruction in the module.  If they do not, there could be the
2270 /// possibility that extra code has been inserted, and we must ignore it.
2271 ///
2272 /// \param ID - The IRInstructionData to check the next instruction of.
2273 /// \returns true if the InstructionDataList and actual instruction match.
2274 static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID) {
2275   // We check if there is a discrepancy between the InstructionDataList
2276   // and the actual next instruction in the module.  If there is, it means
2277   // that an extra instruction was added, likely by the CodeExtractor.
2278 
2279   // Since we do not have any similarity data about this particular
2280   // instruction, we cannot confidently outline it, and must discard this
2281   // candidate.
2282   IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
2283   Instruction *NextIDLInst = NextIDIt->Inst;
2284   Instruction *NextModuleInst = nullptr;
2285   if (!ID.Inst->isTerminator())
2286     NextModuleInst = ID.Inst->getNextNonDebugInstruction();
2287   else if (NextIDLInst != nullptr)
2288     NextModuleInst =
2289         &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
2290 
2291   if (NextIDLInst && NextIDLInst != NextModuleInst)
2292     return false;
2293 
2294   return true;
2295 }
2296 
2297 bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
2298     const OutlinableRegion &Region) {
2299   IRSimilarityCandidate *IRSC = Region.Candidate;
2300   unsigned StartIdx = IRSC->getStartIdx();
2301   unsigned EndIdx = IRSC->getEndIdx();
2302 
2303   // A check to make sure that we are not about to attempt to outline something
2304   // that has already been outlined.
2305   for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2306     if (Outlined.contains(Idx))
2307       return false;
2308 
2309   // We check if the recorded instruction matches the actual next instruction,
2310   // if it does not, we fix it in the InstructionDataList.
2311   if (!Region.Candidate->backInstruction()->isTerminator()) {
2312     Instruction *NewEndInst =
2313         Region.Candidate->backInstruction()->getNextNonDebugInstruction();
2314     assert(NewEndInst && "Next instruction is a nullptr?");
2315     if (Region.Candidate->end()->Inst != NewEndInst) {
2316       IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2317       IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
2318           IRInstructionData(*NewEndInst,
2319                             InstructionClassifier.visit(*NewEndInst), *IDL);
2320 
2321       // Insert the first IRInstructionData of the new region after the
2322       // last IRInstructionData of the IRSimilarityCandidate.
2323       IDL->insert(Region.Candidate->end(), *NewEndIRID);
2324     }
2325   }
2326 
2327   return none_of(*IRSC, [this](IRInstructionData &ID) {
2328     if (!nextIRInstructionDataMatchesNextInst(ID))
2329       return true;
2330 
2331     return !this->InstructionClassifier.visit(ID.Inst);
2332   });
2333 }
2334 
2335 void IROutliner::pruneIncompatibleRegions(
2336     std::vector<IRSimilarityCandidate> &CandidateVec,
2337     OutlinableGroup &CurrentGroup) {
2338   bool PreviouslyOutlined;
2339 
2340   // Sort from beginning to end, so the IRSimilarityCandidates are in order.
2341   stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
2342                                const IRSimilarityCandidate &RHS) {
2343     return LHS.getStartIdx() < RHS.getStartIdx();
2344   });
2345 
2346   IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
2347   // Since outlining a call and a branch instruction will be the same as only
2348   // outlinining a call instruction, we ignore it as a space saving.
2349   if (FirstCandidate.getLength() == 2) {
2350     if (isa<CallInst>(FirstCandidate.front()->Inst) &&
2351         isa<BranchInst>(FirstCandidate.back()->Inst))
2352       return;
2353   }
2354 
2355   unsigned CurrentEndIdx = 0;
2356   for (IRSimilarityCandidate &IRSC : CandidateVec) {
2357     PreviouslyOutlined = false;
2358     unsigned StartIdx = IRSC.getStartIdx();
2359     unsigned EndIdx = IRSC.getEndIdx();
2360 
2361     for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2362       if (Outlined.contains(Idx)) {
2363         PreviouslyOutlined = true;
2364         break;
2365       }
2366 
2367     if (PreviouslyOutlined)
2368       continue;
2369 
2370     // Check over the instructions, and if the basic block has its address
2371     // taken for use somewhere else, we do not outline that block.
2372     bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
2373       return ID.Inst->getParent()->hasAddressTaken();
2374     });
2375 
2376     if (BBHasAddressTaken)
2377       continue;
2378 
2379     if (IRSC.getFunction()->hasOptNone())
2380       continue;
2381 
2382     if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
2383         !OutlineFromLinkODRs)
2384       continue;
2385 
2386     // Greedily prune out any regions that will overlap with already chosen
2387     // regions.
2388     if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
2389       continue;
2390 
2391     bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
2392       if (!nextIRInstructionDataMatchesNextInst(ID))
2393         return true;
2394 
2395       return !this->InstructionClassifier.visit(ID.Inst);
2396     });
2397 
2398     if (BadInst)
2399       continue;
2400 
2401     OutlinableRegion *OS = new (RegionAllocator.Allocate())
2402         OutlinableRegion(IRSC, CurrentGroup);
2403     CurrentGroup.Regions.push_back(OS);
2404 
2405     CurrentEndIdx = EndIdx;
2406   }
2407 }
2408 
2409 InstructionCost
2410 IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
2411   InstructionCost RegionBenefit = 0;
2412   for (OutlinableRegion *Region : CurrentGroup.Regions) {
2413     TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2414     // We add the number of instructions in the region to the benefit as an
2415     // estimate as to how much will be removed.
2416     RegionBenefit += Region->getBenefit(TTI);
2417     LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
2418                       << " saved instructions to overfall benefit.\n");
2419   }
2420 
2421   return RegionBenefit;
2422 }
2423 
2424 /// For the \p OutputCanon number passed in find the value represented by this
2425 /// canonical number. If it is from a PHINode, we pick the first incoming
2426 /// value and return that Value instead.
2427 ///
2428 /// \param Region - The OutlinableRegion to get the Value from.
2429 /// \param OutputCanon - The canonical number to find the Value from.
2430 /// \returns The Value represented by a canonical number \p OutputCanon in \p
2431 /// Region.
2432 static Value *findOutputValueInRegion(OutlinableRegion &Region,
2433                                       unsigned OutputCanon) {
2434   OutlinableGroup &CurrentGroup = *Region.Parent;
2435   // If the value is greater than the value in the tracker, we have a
2436   // PHINode and will instead use one of the incoming values to find the
2437   // type.
2438   if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
2439     auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon);
2440     assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
2441            "Could not find GVN set for PHINode number!");
2442     assert(It->second.second.size() > 0 && "PHINode does not have any values!");
2443     OutputCanon = *It->second.second.begin();
2444   }
2445   Optional<unsigned> OGVN = Region.Candidate->fromCanonicalNum(OutputCanon);
2446   assert(OGVN.hasValue() && "Could not find GVN for Canonical Number?");
2447   Optional<Value *> OV = Region.Candidate->fromGVN(*OGVN);
2448   assert(OV.hasValue() && "Could not find value for GVN?");
2449   return *OV;
2450 }
2451 
2452 InstructionCost
2453 IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
2454   InstructionCost OverallCost = 0;
2455   for (OutlinableRegion *Region : CurrentGroup.Regions) {
2456     TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2457 
2458     // Each output incurs a load after the call, so we add that to the cost.
2459     for (unsigned OutputCanon : Region->GVNStores) {
2460       Value *V = findOutputValueInRegion(*Region, OutputCanon);
2461       InstructionCost LoadCost =
2462           TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2463                               TargetTransformInfo::TCK_CodeSize);
2464 
2465       LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
2466                         << " instructions to cost for output of type "
2467                         << *V->getType() << "\n");
2468       OverallCost += LoadCost;
2469     }
2470   }
2471 
2472   return OverallCost;
2473 }
2474 
2475 /// Find the extra instructions needed to handle any output values for the
2476 /// region.
2477 ///
2478 /// \param [in] M - The Module to outline from.
2479 /// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
2480 /// \param [in] TTI - The TargetTransformInfo used to collect information for
2481 /// new instruction costs.
2482 /// \returns the additional cost to handle the outputs.
2483 static InstructionCost findCostForOutputBlocks(Module &M,
2484                                                OutlinableGroup &CurrentGroup,
2485                                                TargetTransformInfo &TTI) {
2486   InstructionCost OutputCost = 0;
2487   unsigned NumOutputBranches = 0;
2488 
2489   OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
2490   IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
2491   DenseSet<BasicBlock *> CandidateBlocks;
2492   Candidate.getBasicBlocks(CandidateBlocks);
2493 
2494   // Count the number of different output branches that point to blocks outside
2495   // of the region.
2496   DenseSet<BasicBlock *> FoundBlocks;
2497   for (IRInstructionData &ID : Candidate) {
2498     if (!isa<BranchInst>(ID.Inst))
2499       continue;
2500 
2501     for (Value *V : ID.OperVals) {
2502       BasicBlock *BB = static_cast<BasicBlock *>(V);
2503       DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB);
2504       if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB))
2505         continue;
2506       FoundBlocks.insert(BB);
2507       NumOutputBranches++;
2508     }
2509   }
2510 
2511   CurrentGroup.BranchesToOutside = NumOutputBranches;
2512 
2513   for (const ArrayRef<unsigned> &OutputUse :
2514        CurrentGroup.OutputGVNCombinations) {
2515     for (unsigned OutputCanon : OutputUse) {
2516       Value *V = findOutputValueInRegion(FirstRegion, OutputCanon);
2517       InstructionCost StoreCost =
2518           TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2519                               TargetTransformInfo::TCK_CodeSize);
2520 
2521       // An instruction cost is added for each store set that needs to occur for
2522       // various output combinations inside the function, plus a branch to
2523       // return to the exit block.
2524       LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
2525                         << " instructions to cost for output of type "
2526                         << *V->getType() << "\n");
2527       OutputCost += StoreCost * NumOutputBranches;
2528     }
2529 
2530     InstructionCost BranchCost =
2531         TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
2532     LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
2533                       << " a branch instruction\n");
2534     OutputCost += BranchCost * NumOutputBranches;
2535   }
2536 
2537   // If there is more than one output scheme, we must have a comparison and
2538   // branch for each different item in the switch statement.
2539   if (CurrentGroup.OutputGVNCombinations.size() > 1) {
2540     InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
2541         Instruction::ICmp, Type::getInt32Ty(M.getContext()),
2542         Type::getInt32Ty(M.getContext()), CmpInst::BAD_ICMP_PREDICATE,
2543         TargetTransformInfo::TCK_CodeSize);
2544     InstructionCost BranchCost =
2545         TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
2546 
2547     unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
2548     InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
2549 
2550     LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
2551                       << " instructions for each switch case for each different"
2552                       << " output path in a function\n");
2553     OutputCost += TotalCost * NumOutputBranches;
2554   }
2555 
2556   return OutputCost;
2557 }
2558 
2559 void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
2560   InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
2561   CurrentGroup.Benefit += RegionBenefit;
2562   LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
2563 
2564   InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
2565   CurrentGroup.Cost += OutputReloadCost;
2566   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2567 
2568   InstructionCost AverageRegionBenefit =
2569       RegionBenefit / CurrentGroup.Regions.size();
2570   unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
2571   unsigned NumRegions = CurrentGroup.Regions.size();
2572   TargetTransformInfo &TTI =
2573       getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
2574 
2575   // We add one region to the cost once, to account for the instructions added
2576   // inside of the newly created function.
2577   LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
2578                     << " instructions to cost for body of new function.\n");
2579   CurrentGroup.Cost += AverageRegionBenefit;
2580   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2581 
2582   // For each argument, we must add an instruction for loading the argument
2583   // out of the register and into a value inside of the newly outlined function.
2584   LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2585                     << " instructions to cost for each argument in the new"
2586                     << " function.\n");
2587   CurrentGroup.Cost +=
2588       OverallArgumentNum * TargetTransformInfo::TCC_Basic;
2589   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2590 
2591   // Each argument needs to either be loaded into a register or onto the stack.
2592   // Some arguments will only be loaded into the stack once the argument
2593   // registers are filled.
2594   LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2595                     << " instructions to cost for each argument in the new"
2596                     << " function " << NumRegions << " times for the "
2597                     << "needed argument handling at the call site.\n");
2598   CurrentGroup.Cost +=
2599       2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
2600   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2601 
2602   CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
2603   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2604 }
2605 
2606 void IROutliner::updateOutputMapping(OutlinableRegion &Region,
2607                                      ArrayRef<Value *> Outputs,
2608                                      LoadInst *LI) {
2609   // For and load instructions following the call
2610   Value *Operand = LI->getPointerOperand();
2611   Optional<unsigned> OutputIdx = None;
2612   // Find if the operand it is an output register.
2613   for (unsigned ArgIdx = Region.NumExtractedInputs;
2614        ArgIdx < Region.Call->arg_size(); ArgIdx++) {
2615     if (Operand == Region.Call->getArgOperand(ArgIdx)) {
2616       OutputIdx = ArgIdx - Region.NumExtractedInputs;
2617       break;
2618     }
2619   }
2620 
2621   // If we found an output register, place a mapping of the new value
2622   // to the original in the mapping.
2623   if (!OutputIdx.hasValue())
2624     return;
2625 
2626   if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
2627       OutputMappings.end()) {
2628     LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
2629                       << *Outputs[OutputIdx.getValue()] << "\n");
2630     OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()]));
2631   } else {
2632     Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second;
2633     LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
2634                       << *Outputs[OutputIdx.getValue()] << "\n");
2635     OutputMappings.insert(std::make_pair(LI, Orig));
2636   }
2637 }
2638 
2639 bool IROutliner::extractSection(OutlinableRegion &Region) {
2640   SetVector<Value *> ArgInputs, Outputs, SinkCands;
2641   assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
2642   BasicBlock *InitialStart = Region.StartBB;
2643   Function *OrigF = Region.StartBB->getParent();
2644   CodeExtractorAnalysisCache CEAC(*OrigF);
2645   Region.ExtractedFunction =
2646       Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
2647 
2648   // If the extraction was successful, find the BasicBlock, and reassign the
2649   // OutlinableRegion blocks
2650   if (!Region.ExtractedFunction) {
2651     LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
2652                       << "\n");
2653     Region.reattachCandidate();
2654     return false;
2655   }
2656 
2657   // Get the block containing the called branch, and reassign the blocks as
2658   // necessary.  If the original block still exists, it is because we ended on
2659   // a branch instruction, and so we move the contents into the block before
2660   // and assign the previous block correctly.
2661   User *InstAsUser = Region.ExtractedFunction->user_back();
2662   BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
2663   Region.PrevBB = RewrittenBB->getSinglePredecessor();
2664   assert(Region.PrevBB && "PrevBB is nullptr?");
2665   if (Region.PrevBB == InitialStart) {
2666     BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
2667     Instruction *BI = NewPrev->getTerminator();
2668     BI->eraseFromParent();
2669     moveBBContents(*InitialStart, *NewPrev);
2670     Region.PrevBB = NewPrev;
2671     InitialStart->eraseFromParent();
2672   }
2673 
2674   Region.StartBB = RewrittenBB;
2675   Region.EndBB = RewrittenBB;
2676 
2677   // The sequences of outlinable regions has now changed.  We must fix the
2678   // IRInstructionDataList for consistency.  Although they may not be illegal
2679   // instructions, they should not be compared with anything else as they
2680   // should not be outlined in this round.  So marking these as illegal is
2681   // allowed.
2682   IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2683   Instruction *BeginRewritten = &*RewrittenBB->begin();
2684   Instruction *EndRewritten = &*RewrittenBB->begin();
2685   Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
2686       *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
2687   Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
2688       *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
2689 
2690   // Insert the first IRInstructionData of the new region in front of the
2691   // first IRInstructionData of the IRSimilarityCandidate.
2692   IDL->insert(Region.Candidate->begin(), *Region.NewFront);
2693   // Insert the first IRInstructionData of the new region after the
2694   // last IRInstructionData of the IRSimilarityCandidate.
2695   IDL->insert(Region.Candidate->end(), *Region.NewBack);
2696   // Remove the IRInstructionData from the IRSimilarityCandidate.
2697   IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
2698 
2699   assert(RewrittenBB != nullptr &&
2700          "Could not find a predecessor after extraction!");
2701 
2702   // Iterate over the new set of instructions to find the new call
2703   // instruction.
2704   for (Instruction &I : *RewrittenBB)
2705     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2706       if (Region.ExtractedFunction == CI->getCalledFunction())
2707         Region.Call = CI;
2708     } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
2709       updateOutputMapping(Region, Outputs.getArrayRef(), LI);
2710   Region.reattachCandidate();
2711   return true;
2712 }
2713 
2714 unsigned IROutliner::doOutline(Module &M) {
2715   // Find the possible similarity sections.
2716   InstructionClassifier.EnableBranches = !DisableBranches;
2717   InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
2718   InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
2719 
2720   IRSimilarityIdentifier &Identifier = getIRSI(M);
2721   SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
2722 
2723   // Sort them by size of extracted sections
2724   unsigned OutlinedFunctionNum = 0;
2725   // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
2726   // to sort them by the potential number of instructions to be outlined
2727   if (SimilarityCandidates.size() > 1)
2728     llvm::stable_sort(SimilarityCandidates,
2729                       [](const std::vector<IRSimilarityCandidate> &LHS,
2730                          const std::vector<IRSimilarityCandidate> &RHS) {
2731                         return LHS[0].getLength() * LHS.size() >
2732                                RHS[0].getLength() * RHS.size();
2733                       });
2734   // Creating OutlinableGroups for each SimilarityCandidate to be used in
2735   // each of the following for loops to avoid making an allocator.
2736   std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
2737 
2738   DenseSet<unsigned> NotSame;
2739   std::vector<OutlinableGroup *> NegativeCostGroups;
2740   std::vector<OutlinableRegion *> OutlinedRegions;
2741   // Iterate over the possible sets of similarity.
2742   unsigned PotentialGroupIdx = 0;
2743   for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2744     OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2745 
2746     // Remove entries that were previously outlined
2747     pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2748 
2749     // We pruned the number of regions to 0 to 1, meaning that it's not worth
2750     // trying to outlined since there is no compatible similar instance of this
2751     // code.
2752     if (CurrentGroup.Regions.size() < 2)
2753       continue;
2754 
2755     // Determine if there are any values that are the same constant throughout
2756     // each section in the set.
2757     NotSame.clear();
2758     CurrentGroup.findSameConstants(NotSame);
2759 
2760     if (CurrentGroup.IgnoreGroup)
2761       continue;
2762 
2763     // Create a CodeExtractor for each outlinable region. Identify inputs and
2764     // outputs for each section using the code extractor and create the argument
2765     // types for the Aggregate Outlining Function.
2766     OutlinedRegions.clear();
2767     for (OutlinableRegion *OS : CurrentGroup.Regions) {
2768       // Break the outlinable region out of its parent BasicBlock into its own
2769       // BasicBlocks (see function implementation).
2770       OS->splitCandidate();
2771 
2772       // There's a chance that when the region is split, extra instructions are
2773       // added to the region. This makes the region no longer viable
2774       // to be split, so we ignore it for outlining.
2775       if (!OS->CandidateSplit)
2776         continue;
2777 
2778       SmallVector<BasicBlock *> BE;
2779       DenseSet<BasicBlock *> BlocksInRegion;
2780       OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2781       OS->CE = new (ExtractorAllocator.Allocate())
2782           CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2783                         false, nullptr, "outlined");
2784       findAddInputsOutputs(M, *OS, NotSame);
2785       if (!OS->IgnoreRegion)
2786         OutlinedRegions.push_back(OS);
2787 
2788       // We recombine the blocks together now that we have gathered all the
2789       // needed information.
2790       OS->reattachCandidate();
2791     }
2792 
2793     CurrentGroup.Regions = std::move(OutlinedRegions);
2794 
2795     if (CurrentGroup.Regions.empty())
2796       continue;
2797 
2798     CurrentGroup.collectGVNStoreSets(M);
2799 
2800     if (CostModel)
2801       findCostBenefit(M, CurrentGroup);
2802 
2803     // If we are adhering to the cost model, skip those groups where the cost
2804     // outweighs the benefits.
2805     if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2806       OptimizationRemarkEmitter &ORE =
2807           getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2808       ORE.emit([&]() {
2809         IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2810         OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2811                                    C->frontInstruction());
2812         R << "did not outline "
2813           << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2814           << " regions due to estimated increase of "
2815           << ore::NV("InstructionIncrease",
2816                      CurrentGroup.Cost - CurrentGroup.Benefit)
2817           << " instructions at locations ";
2818         interleave(
2819             CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2820             [&R](OutlinableRegion *Region) {
2821               R << ore::NV(
2822                   "DebugLoc",
2823                   Region->Candidate->frontInstruction()->getDebugLoc());
2824             },
2825             [&R]() { R << " "; });
2826         return R;
2827       });
2828       continue;
2829     }
2830 
2831     NegativeCostGroups.push_back(&CurrentGroup);
2832   }
2833 
2834   ExtractorAllocator.DestroyAll();
2835 
2836   if (NegativeCostGroups.size() > 1)
2837     stable_sort(NegativeCostGroups,
2838                 [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2839                   return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2840                 });
2841 
2842   std::vector<Function *> FuncsToRemove;
2843   for (OutlinableGroup *CG : NegativeCostGroups) {
2844     OutlinableGroup &CurrentGroup = *CG;
2845 
2846     OutlinedRegions.clear();
2847     for (OutlinableRegion *Region : CurrentGroup.Regions) {
2848       // We check whether our region is compatible with what has already been
2849       // outlined, and whether we need to ignore this item.
2850       if (!isCompatibleWithAlreadyOutlinedCode(*Region))
2851         continue;
2852       OutlinedRegions.push_back(Region);
2853     }
2854 
2855     if (OutlinedRegions.size() < 2)
2856       continue;
2857 
2858     // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2859     // we are still outlining enough regions to make up for the added cost.
2860     CurrentGroup.Regions = std::move(OutlinedRegions);
2861     if (CostModel) {
2862       CurrentGroup.Benefit = 0;
2863       CurrentGroup.Cost = 0;
2864       findCostBenefit(M, CurrentGroup);
2865       if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2866         continue;
2867     }
2868     OutlinedRegions.clear();
2869     for (OutlinableRegion *Region : CurrentGroup.Regions) {
2870       Region->splitCandidate();
2871       if (!Region->CandidateSplit)
2872         continue;
2873       OutlinedRegions.push_back(Region);
2874     }
2875 
2876     CurrentGroup.Regions = std::move(OutlinedRegions);
2877     if (CurrentGroup.Regions.size() < 2) {
2878       for (OutlinableRegion *R : CurrentGroup.Regions)
2879         R->reattachCandidate();
2880       continue;
2881     }
2882 
2883     LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2884                       << " and benefit " << CurrentGroup.Benefit << "\n");
2885 
2886     // Create functions out of all the sections, and mark them as outlined.
2887     OutlinedRegions.clear();
2888     for (OutlinableRegion *OS : CurrentGroup.Regions) {
2889       SmallVector<BasicBlock *> BE;
2890       DenseSet<BasicBlock *> BlocksInRegion;
2891       OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2892       OS->CE = new (ExtractorAllocator.Allocate())
2893           CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2894                         false, nullptr, "outlined");
2895       bool FunctionOutlined = extractSection(*OS);
2896       if (FunctionOutlined) {
2897         unsigned StartIdx = OS->Candidate->getStartIdx();
2898         unsigned EndIdx = OS->Candidate->getEndIdx();
2899         for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2900           Outlined.insert(Idx);
2901 
2902         OutlinedRegions.push_back(OS);
2903       }
2904     }
2905 
2906     LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2907                       << " with benefit " << CurrentGroup.Benefit
2908                       << " and cost " << CurrentGroup.Cost << "\n");
2909 
2910     CurrentGroup.Regions = std::move(OutlinedRegions);
2911 
2912     if (CurrentGroup.Regions.empty())
2913       continue;
2914 
2915     OptimizationRemarkEmitter &ORE =
2916         getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2917     ORE.emit([&]() {
2918       IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2919       OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2920       R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2921         << " regions with decrease of "
2922         << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2923         << " instructions at locations ";
2924       interleave(
2925           CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2926           [&R](OutlinableRegion *Region) {
2927             R << ore::NV("DebugLoc",
2928                          Region->Candidate->frontInstruction()->getDebugLoc());
2929           },
2930           [&R]() { R << " "; });
2931       return R;
2932     });
2933 
2934     deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
2935                                  OutlinedFunctionNum);
2936   }
2937 
2938   for (Function *F : FuncsToRemove)
2939     F->eraseFromParent();
2940 
2941   return OutlinedFunctionNum;
2942 }
2943 
2944 bool IROutliner::run(Module &M) {
2945   CostModel = !NoCostModel;
2946   OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
2947 
2948   return doOutline(M) > 0;
2949 }
2950 
2951 // Pass Manager Boilerplate
2952 namespace {
2953 class IROutlinerLegacyPass : public ModulePass {
2954 public:
2955   static char ID;
2956   IROutlinerLegacyPass() : ModulePass(ID) {
2957     initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry());
2958   }
2959 
2960   void getAnalysisUsage(AnalysisUsage &AU) const override {
2961     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
2962     AU.addRequired<TargetTransformInfoWrapperPass>();
2963     AU.addRequired<IRSimilarityIdentifierWrapperPass>();
2964   }
2965 
2966   bool runOnModule(Module &M) override;
2967 };
2968 } // namespace
2969 
2970 bool IROutlinerLegacyPass::runOnModule(Module &M) {
2971   if (skipModule(M))
2972     return false;
2973 
2974   std::unique_ptr<OptimizationRemarkEmitter> ORE;
2975   auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2976     ORE.reset(new OptimizationRemarkEmitter(&F));
2977     return *ORE;
2978   };
2979 
2980   auto GTTI = [this](Function &F) -> TargetTransformInfo & {
2981     return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2982   };
2983 
2984   auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
2985     return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
2986   };
2987 
2988   return IROutliner(GTTI, GIRSI, GORE).run(M);
2989 }
2990 
2991 PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
2992   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2993 
2994   std::function<TargetTransformInfo &(Function &)> GTTI =
2995       [&FAM](Function &F) -> TargetTransformInfo & {
2996     return FAM.getResult<TargetIRAnalysis>(F);
2997   };
2998 
2999   std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
3000       [&AM](Module &M) -> IRSimilarityIdentifier & {
3001     return AM.getResult<IRSimilarityAnalysis>(M);
3002   };
3003 
3004   std::unique_ptr<OptimizationRemarkEmitter> ORE;
3005   std::function<OptimizationRemarkEmitter &(Function &)> GORE =
3006       [&ORE](Function &F) -> OptimizationRemarkEmitter & {
3007     ORE.reset(new OptimizationRemarkEmitter(&F));
3008     return *ORE;
3009   };
3010 
3011   if (IROutliner(GTTI, GIRSI, GORE).run(M))
3012     return PreservedAnalyses::none();
3013   return PreservedAnalyses::all();
3014 }
3015 
3016 char IROutlinerLegacyPass::ID = 0;
3017 INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
3018                       false)
3019 INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass)
3020 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
3021 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
3022 INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
3023                     false)
3024 
3025 ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }
3026