1 //===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 // Implementation for the IROutliner which is used by the IROutliner Pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/IPO/IROutliner.h"
15 #include "llvm/Analysis/IRSimilarityIdentifier.h"
16 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
17 #include "llvm/Analysis/TargetTransformInfo.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/DebugInfoMetadata.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/Dominators.h"
22 #include "llvm/IR/Mangler.h"
23 #include "llvm/IR/PassManager.h"
24 #include "llvm/InitializePasses.h"
25 #include "llvm/Pass.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Transforms/IPO.h"
28 #include <map>
29 #include <set>
30 #include <vector>
31 
32 #define DEBUG_TYPE "iroutliner"
33 
34 using namespace llvm;
35 using namespace IRSimilarity;
36 
37 // A command flag to be used for debugging to exclude branches from similarity
38 // matching and outlining.
39 namespace llvm {
40 extern cl::opt<bool> DisableBranches;
41 
42 // A command flag to be used for debugging to indirect calls from similarity
43 // matching and outlining.
44 extern cl::opt<bool> DisableIndirectCalls;
45 
46 // A command flag to be used for debugging to exclude intrinsics from similarity
47 // matching and outlining.
48 extern cl::opt<bool> DisableIntrinsics;
49 
50 } // namespace llvm
51 
52 // Set to true if the user wants the ir outliner to run on linkonceodr linkage
53 // functions. This is false by default because the linker can dedupe linkonceodr
54 // functions. Since the outliner is confined to a single module (modulo LTO),
55 // this is off by default. It should, however, be the default behavior in
56 // LTO.
57 static cl::opt<bool> EnableLinkOnceODRIROutlining(
58     "enable-linkonceodr-ir-outlining", cl::Hidden,
59     cl::desc("Enable the IR outliner on linkonceodr functions"),
60     cl::init(false));
61 
62 // This is a debug option to test small pieces of code to ensure that outlining
63 // works correctly.
64 static cl::opt<bool> NoCostModel(
65     "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
66     cl::desc("Debug option to outline greedily, without restriction that "
67              "calculated benefit outweighs cost"));
68 
69 /// The OutlinableGroup holds all the overarching information for outlining
70 /// a set of regions that are structurally similar to one another, such as the
71 /// types of the overall function, the output blocks, the sets of stores needed
72 /// and a list of the different regions. This information is used in the
73 /// deduplication of extracted regions with the same structure.
74 struct OutlinableGroup {
75   /// The sections that could be outlined
76   std::vector<OutlinableRegion *> Regions;
77 
78   /// The argument types for the function created as the overall function to
79   /// replace the extracted function for each region.
80   std::vector<Type *> ArgumentTypes;
81   /// The FunctionType for the overall function.
82   FunctionType *OutlinedFunctionType = nullptr;
83   /// The Function for the collective overall function.
84   Function *OutlinedFunction = nullptr;
85 
86   /// Flag for whether we should not consider this group of OutlinableRegions
87   /// for extraction.
88   bool IgnoreGroup = false;
89 
90   /// The return blocks for the overall function.
91   DenseMap<Value *, BasicBlock *> EndBBs;
92 
93   /// The PHIBlocks with their corresponding return block based on the return
94   /// value as the key.
95   DenseMap<Value *, BasicBlock *> PHIBlocks;
96 
97   /// A set containing the different GVN store sets needed. Each array contains
98   /// a sorted list of the different values that need to be stored into output
99   /// registers.
100   DenseSet<ArrayRef<unsigned>> OutputGVNCombinations;
101 
102   /// Flag for whether the \ref ArgumentTypes have been defined after the
103   /// extraction of the first region.
104   bool InputTypesSet = false;
105 
106   /// The number of input values in \ref ArgumentTypes.  Anything after this
107   /// index in ArgumentTypes is an output argument.
108   unsigned NumAggregateInputs = 0;
109 
110   /// The mapping of the canonical numbering of the values in outlined sections
111   /// to specific arguments.
112   DenseMap<unsigned, unsigned> CanonicalNumberToAggArg;
113 
114   /// The number of branches in the region target a basic block that is outside
115   /// of the region.
116   unsigned BranchesToOutside = 0;
117 
118   /// Tracker counting backwards from the highest unsigned value possible to
119   /// avoid conflicting with the GVNs of assigned values.  We start at -3 since
120   /// -2 and -1 are assigned by the DenseMap.
121   unsigned PHINodeGVNTracker = -3;
122 
123   DenseMap<unsigned,
124            std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
125       PHINodeGVNToGVNs;
126   DenseMap<hash_code, unsigned> GVNsToPHINodeGVN;
127 
128   /// The number of instructions that will be outlined by extracting \ref
129   /// Regions.
130   InstructionCost Benefit = 0;
131   /// The number of added instructions needed for the outlining of the \ref
132   /// Regions.
133   InstructionCost Cost = 0;
134 
135   /// The argument that needs to be marked with the swifterr attribute.  If not
136   /// needed, there is no value.
137   Optional<unsigned> SwiftErrorArgument;
138 
139   /// For the \ref Regions, we look at every Value.  If it is a constant,
140   /// we check whether it is the same in Region.
141   ///
142   /// \param [in,out] NotSame contains the global value numbers where the
143   /// constant is not always the same, and must be passed in as an argument.
144   void findSameConstants(DenseSet<unsigned> &NotSame);
145 
146   /// For the regions, look at each set of GVN stores needed and account for
147   /// each combination.  Add an argument to the argument types if there is
148   /// more than one combination.
149   ///
150   /// \param [in] M - The module we are outlining from.
151   void collectGVNStoreSets(Module &M);
152 };
153 
154 /// Move the contents of \p SourceBB to before the last instruction of \p
155 /// TargetBB.
156 /// \param SourceBB - the BasicBlock to pull Instructions from.
157 /// \param TargetBB - the BasicBlock to put Instruction into.
158 static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
159   for (Instruction &I : llvm::make_early_inc_range(SourceBB))
160     I.moveBefore(TargetBB, TargetBB.end());
161 }
162 
163 /// A function to sort the keys of \p Map, which must be a mapping of constant
164 /// values to basic blocks and return it in \p SortedKeys
165 ///
166 /// \param SortedKeys - The vector the keys will be return in and sorted.
167 /// \param Map - The DenseMap containing keys to sort.
168 static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
169                                   DenseMap<Value *, BasicBlock *> &Map) {
170   for (auto &VtoBB : Map)
171     SortedKeys.push_back(VtoBB.first);
172 
173   stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
174     const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS);
175     const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
176     assert(RHSC && "Not a constant integer in return value?");
177     assert(LHSC && "Not a constant integer in return value?");
178 
179     return LHSC->getLimitedValue() < RHSC->getLimitedValue();
180   });
181 }
182 
183 Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
184                                                   Value *V) {
185   Optional<unsigned> GVN = Candidate->getGVN(V);
186   assert(GVN.hasValue() && "No GVN for incoming value");
187   Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
188   Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum);
189   Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
190   return FoundValueOpt.getValueOr(nullptr);
191 }
192 
193 BasicBlock *
194 OutlinableRegion::findCorrespondingBlockIn(const OutlinableRegion &Other,
195                                            BasicBlock *BB) {
196   Instruction *FirstNonPHI = BB->getFirstNonPHI();
197   assert(FirstNonPHI && "block is empty?");
198   Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI);
199   if (!CorrespondingVal)
200     return nullptr;
201   BasicBlock *CorrespondingBlock =
202       cast<Instruction>(CorrespondingVal)->getParent();
203   return CorrespondingBlock;
204 }
205 
206 /// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
207 /// in \p Included to branch to BasicBlock \p Replace if they currently branch
208 /// to the BasicBlock \p Find.  This is used to fix up the incoming basic blocks
209 /// when PHINodes are included in outlined regions.
210 ///
211 /// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
212 /// checked.
213 /// \param Find - The successor block to be replaced.
214 /// \param Replace - The new succesor block to branch to.
215 /// \param Included - The set of blocks about to be outlined.
216 static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find,
217                                       BasicBlock *Replace,
218                                       DenseSet<BasicBlock *> &Included) {
219   for (PHINode &PN : PHIBlock->phis()) {
220     for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
221          ++Idx) {
222       // Check if the incoming block is included in the set of blocks being
223       // outlined.
224       BasicBlock *Incoming = PN.getIncomingBlock(Idx);
225       if (!Included.contains(Incoming))
226         continue;
227 
228       BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator());
229       assert(BI && "Not a branch instruction?");
230       // Look over the branching instructions into this block to see if we
231       // used to branch to Find in this outlined block.
232       for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
233            Succ++) {
234         // If we have found the block to replace, we do so here.
235         if (BI->getSuccessor(Succ) != Find)
236           continue;
237         BI->setSuccessor(Succ, Replace);
238       }
239     }
240   }
241 }
242 
243 
244 void OutlinableRegion::splitCandidate() {
245   assert(!CandidateSplit && "Candidate already split!");
246 
247   Instruction *BackInst = Candidate->backInstruction();
248 
249   Instruction *EndInst = nullptr;
250   // Check whether the last instruction is a terminator, if it is, we do
251   // not split on the following instruction. We leave the block as it is.  We
252   // also check that this is not the last instruction in the Module, otherwise
253   // the check for whether the current following instruction matches the
254   // previously recorded instruction will be incorrect.
255   if (!BackInst->isTerminator() ||
256       BackInst->getParent() != &BackInst->getFunction()->back()) {
257     EndInst = Candidate->end()->Inst;
258     assert(EndInst && "Expected an end instruction?");
259   }
260 
261   // We check if the current instruction following the last instruction in the
262   // region is the same as the recorded instruction following the last
263   // instruction. If they do not match, there could be problems in rewriting
264   // the program after outlining, so we ignore it.
265   if (!BackInst->isTerminator() &&
266       EndInst != BackInst->getNextNonDebugInstruction())
267     return;
268 
269   Instruction *StartInst = (*Candidate->begin()).Inst;
270   assert(StartInst && "Expected a start instruction?");
271   StartBB = StartInst->getParent();
272   PrevBB = StartBB;
273 
274   DenseSet<BasicBlock *> BBSet;
275   Candidate->getBasicBlocks(BBSet);
276 
277   // We iterate over the instructions in the region, if we find a PHINode, we
278   // check if there are predecessors outside of the region, if there are,
279   // we ignore this region since we are unable to handle the severing of the
280   // phi node right now.
281   BasicBlock::iterator It = StartInst->getIterator();
282   while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
283     unsigned NumPredsOutsideRegion = 0;
284     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
285       if (!BBSet.contains(PN->getIncomingBlock(i)))
286         ++NumPredsOutsideRegion;
287 
288     if (NumPredsOutsideRegion > 1)
289       return;
290 
291     It++;
292   }
293 
294   // If the region starts with a PHINode, but is not the initial instruction of
295   // the BasicBlock, we ignore this region for now.
296   if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
297     return;
298 
299   // If the region ends with a PHINode, but does not contain all of the phi node
300   // instructions of the region, we ignore it for now.
301   if (isa<PHINode>(BackInst)) {
302     EndBB = BackInst->getParent();
303     if (BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
304       return;
305   }
306 
307   // The basic block gets split like so:
308   // block:                 block:
309   //   inst1                  inst1
310   //   inst2                  inst2
311   //   region1               br block_to_outline
312   //   region2              block_to_outline:
313   //   region3          ->    region1
314   //   region4                region2
315   //   inst3                  region3
316   //   inst4                  region4
317   //                          br block_after_outline
318   //                        block_after_outline:
319   //                          inst3
320   //                          inst4
321 
322   std::string OriginalName = PrevBB->getName().str();
323 
324   StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
325   PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
326 
327   CandidateSplit = true;
328   if (!BackInst->isTerminator()) {
329     EndBB = EndInst->getParent();
330     FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
331     EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
332     FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
333   } else {
334     EndBB = BackInst->getParent();
335     EndsInBranch = true;
336     FollowBB = nullptr;
337   }
338 
339   // Refind the basic block set.
340   BBSet.clear();
341   Candidate->getBasicBlocks(BBSet);
342   // For the phi nodes in the new starting basic block of the region, we
343   // reassign the targets of the basic blocks branching instructions.
344   replaceTargetsFromPHINode(StartBB, PrevBB, StartBB, BBSet);
345   if (FollowBB)
346     replaceTargetsFromPHINode(FollowBB, EndBB, FollowBB, BBSet);
347 }
348 
349 void OutlinableRegion::reattachCandidate() {
350   assert(CandidateSplit && "Candidate is not split!");
351 
352   // The basic block gets reattached like so:
353   // block:                        block:
354   //   inst1                         inst1
355   //   inst2                         inst2
356   //   br block_to_outline           region1
357   // block_to_outline:        ->     region2
358   //   region1                       region3
359   //   region2                       region4
360   //   region3                       inst3
361   //   region4                       inst4
362   //   br block_after_outline
363   // block_after_outline:
364   //   inst3
365   //   inst4
366   assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
367 
368   assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
369   PrevBB->getTerminator()->eraseFromParent();
370 
371   // If we reattaching after outlining, we iterate over the phi nodes to
372   // the initial block, and reassign the branch instructions of the incoming
373   // blocks to the block we are remerging into.
374   if (!ExtractedFunction) {
375     DenseSet<BasicBlock *> BBSet;
376     Candidate->getBasicBlocks(BBSet);
377 
378     replaceTargetsFromPHINode(StartBB, StartBB, PrevBB, BBSet);
379     if (!EndsInBranch)
380       replaceTargetsFromPHINode(FollowBB, FollowBB, EndBB, BBSet);
381   }
382 
383   moveBBContents(*StartBB, *PrevBB);
384 
385   BasicBlock *PlacementBB = PrevBB;
386   if (StartBB != EndBB)
387     PlacementBB = EndBB;
388   if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
389     assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
390     assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
391     PlacementBB->getTerminator()->eraseFromParent();
392     moveBBContents(*FollowBB, *PlacementBB);
393     PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
394     FollowBB->eraseFromParent();
395   }
396 
397   PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
398   StartBB->eraseFromParent();
399 
400   // Make sure to save changes back to the StartBB.
401   StartBB = PrevBB;
402   EndBB = nullptr;
403   PrevBB = nullptr;
404   FollowBB = nullptr;
405 
406   CandidateSplit = false;
407 }
408 
409 /// Find whether \p V matches the Constants previously found for the \p GVN.
410 ///
411 /// \param V - The value to check for consistency.
412 /// \param GVN - The global value number assigned to \p V.
413 /// \param GVNToConstant - The mapping of global value number to Constants.
414 /// \returns true if the Value matches the Constant mapped to by V and false if
415 /// it \p V is a Constant but does not match.
416 /// \returns None if \p V is not a Constant.
417 static Optional<bool>
418 constantMatches(Value *V, unsigned GVN,
419                 DenseMap<unsigned, Constant *> &GVNToConstant) {
420   // See if we have a constants
421   Constant *CST = dyn_cast<Constant>(V);
422   if (!CST)
423     return None;
424 
425   // Holds a mapping from a global value number to a Constant.
426   DenseMap<unsigned, Constant *>::iterator GVNToConstantIt;
427   bool Inserted;
428 
429 
430   // If we have a constant, try to make a new entry in the GVNToConstant.
431   std::tie(GVNToConstantIt, Inserted) =
432       GVNToConstant.insert(std::make_pair(GVN, CST));
433   // If it was found and is not equal, it is not the same. We do not
434   // handle this case yet, and exit early.
435   if (Inserted || (GVNToConstantIt->second == CST))
436     return true;
437 
438   return false;
439 }
440 
441 InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
442   InstructionCost Benefit = 0;
443 
444   // Estimate the benefit of outlining a specific sections of the program.  We
445   // delegate mostly this task to the TargetTransformInfo so that if the target
446   // has specific changes, we can have a more accurate estimate.
447 
448   // However, getInstructionCost delegates the code size calculation for
449   // arithmetic instructions to getArithmeticInstrCost in
450   // include/Analysis/TargetTransformImpl.h, where it always estimates that the
451   // code size for a division and remainder instruction to be equal to 4, and
452   // everything else to 1.  This is not an accurate representation of the
453   // division instruction for targets that have a native division instruction.
454   // To be overly conservative, we only add 1 to the number of instructions for
455   // each division instruction.
456   for (IRInstructionData &ID : *Candidate) {
457     Instruction *I = ID.Inst;
458     switch (I->getOpcode()) {
459     case Instruction::FDiv:
460     case Instruction::FRem:
461     case Instruction::SDiv:
462     case Instruction::SRem:
463     case Instruction::UDiv:
464     case Instruction::URem:
465       Benefit += 1;
466       break;
467     default:
468       Benefit += TTI.getInstructionCost(I, TargetTransformInfo::TCK_CodeSize);
469       break;
470     }
471   }
472 
473   return Benefit;
474 }
475 
476 /// Check the \p OutputMappings structure for value \p Input, if it exists
477 /// it has been used as an output for outlining, and has been renamed, and we
478 /// return the new value, otherwise, we return the same value.
479 ///
480 /// \param OutputMappings [in] - The mapping of values to their renamed value
481 /// after being used as an output for an outlined region.
482 /// \param Input [in] - The value to find the remapped value of, if it exists.
483 /// \return The remapped value if it has been renamed, and the same value if has
484 /// not.
485 static Value *findOutputMapping(const DenseMap<Value *, Value *> OutputMappings,
486                                 Value *Input) {
487   DenseMap<Value *, Value *>::const_iterator OutputMapping =
488       OutputMappings.find(Input);
489   if (OutputMapping != OutputMappings.end())
490     return OutputMapping->second;
491   return Input;
492 }
493 
494 /// Find whether \p Region matches the global value numbering to Constant
495 /// mapping found so far.
496 ///
497 /// \param Region - The OutlinableRegion we are checking for constants
498 /// \param GVNToConstant - The mapping of global value number to Constants.
499 /// \param NotSame - The set of global value numbers that do not have the same
500 /// constant in each region.
501 /// \returns true if all Constants are the same in every use of a Constant in \p
502 /// Region and false if not
503 static bool
504 collectRegionsConstants(OutlinableRegion &Region,
505                         DenseMap<unsigned, Constant *> &GVNToConstant,
506                         DenseSet<unsigned> &NotSame) {
507   bool ConstantsTheSame = true;
508 
509   IRSimilarityCandidate &C = *Region.Candidate;
510   for (IRInstructionData &ID : C) {
511 
512     // Iterate over the operands in an instruction. If the global value number,
513     // assigned by the IRSimilarityCandidate, has been seen before, we check if
514     // the the number has been found to be not the same value in each instance.
515     for (Value *V : ID.OperVals) {
516       Optional<unsigned> GVNOpt = C.getGVN(V);
517       assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
518       unsigned GVN = GVNOpt.getValue();
519 
520       // Check if this global value has been found to not be the same already.
521       if (NotSame.contains(GVN)) {
522         if (isa<Constant>(V))
523           ConstantsTheSame = false;
524         continue;
525       }
526 
527       // If it has been the same so far, we check the value for if the
528       // associated Constant value match the previous instances of the same
529       // global value number.  If the global value does not map to a Constant,
530       // it is considered to not be the same value.
531       Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
532       if (ConstantMatches.hasValue()) {
533         if (ConstantMatches.getValue())
534           continue;
535         else
536           ConstantsTheSame = false;
537       }
538 
539       // While this value is a register, it might not have been previously,
540       // make sure we don't already have a constant mapped to this global value
541       // number.
542       if (GVNToConstant.find(GVN) != GVNToConstant.end())
543         ConstantsTheSame = false;
544 
545       NotSame.insert(GVN);
546     }
547   }
548 
549   return ConstantsTheSame;
550 }
551 
552 void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
553   DenseMap<unsigned, Constant *> GVNToConstant;
554 
555   for (OutlinableRegion *Region : Regions)
556     collectRegionsConstants(*Region, GVNToConstant, NotSame);
557 }
558 
559 void OutlinableGroup::collectGVNStoreSets(Module &M) {
560   for (OutlinableRegion *OS : Regions)
561     OutputGVNCombinations.insert(OS->GVNStores);
562 
563   // We are adding an extracted argument to decide between which output path
564   // to use in the basic block.  It is used in a switch statement and only
565   // needs to be an integer.
566   if (OutputGVNCombinations.size() > 1)
567     ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
568 }
569 
570 /// Get the subprogram if it exists for one of the outlined regions.
571 ///
572 /// \param [in] Group - The set of regions to find a subprogram for.
573 /// \returns the subprogram if it exists, or nullptr.
574 static DISubprogram *getSubprogramOrNull(OutlinableGroup &Group) {
575   for (OutlinableRegion *OS : Group.Regions)
576     if (Function *F = OS->Call->getFunction())
577       if (DISubprogram *SP = F->getSubprogram())
578         return SP;
579 
580   return nullptr;
581 }
582 
583 Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
584                                      unsigned FunctionNameSuffix) {
585   assert(!Group.OutlinedFunction && "Function is already defined!");
586 
587   Type *RetTy = Type::getVoidTy(M.getContext());
588   // All extracted functions _should_ have the same return type at this point
589   // since the similarity identifier ensures that all branches outside of the
590   // region occur in the same place.
591 
592   // NOTE: Should we ever move to the model that uses a switch at every point
593   // needed, meaning that we could branch within the region or out, it is
594   // possible that we will need to switch to using the most general case all of
595   // the time.
596   for (OutlinableRegion *R : Group.Regions) {
597     Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
598     if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
599         (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
600       RetTy = ExtractedFuncType;
601   }
602 
603   Group.OutlinedFunctionType = FunctionType::get(
604       RetTy, Group.ArgumentTypes, false);
605 
606   // These functions will only be called from within the same module, so
607   // we can set an internal linkage.
608   Group.OutlinedFunction = Function::Create(
609       Group.OutlinedFunctionType, GlobalValue::InternalLinkage,
610       "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
611 
612   // Transfer the swifterr attribute to the correct function parameter.
613   if (Group.SwiftErrorArgument.hasValue())
614     Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(),
615                                          Attribute::SwiftError);
616 
617   Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
618   Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
619 
620   // If there's a DISubprogram associated with this outlined function, then
621   // emit debug info for the outlined function.
622   if (DISubprogram *SP = getSubprogramOrNull(Group)) {
623     Function *F = Group.OutlinedFunction;
624     // We have a DISubprogram. Get its DICompileUnit.
625     DICompileUnit *CU = SP->getUnit();
626     DIBuilder DB(M, true, CU);
627     DIFile *Unit = SP->getFile();
628     Mangler Mg;
629     // Get the mangled name of the function for the linkage name.
630     std::string Dummy;
631     llvm::raw_string_ostream MangledNameStream(Dummy);
632     Mg.getNameWithPrefix(MangledNameStream, F, false);
633 
634     DISubprogram *OutlinedSP = DB.createFunction(
635         Unit /* Context */, F->getName(), MangledNameStream.str(),
636         Unit /* File */,
637         0 /* Line 0 is reserved for compiler-generated code. */,
638         DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
639         0, /* Line 0 is reserved for compiler-generated code. */
640         DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
641         /* Outlined code is optimized code by definition. */
642         DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
643 
644     // Don't add any new variables to the subprogram.
645     DB.finalizeSubprogram(OutlinedSP);
646 
647     // Attach subprogram to the function.
648     F->setSubprogram(OutlinedSP);
649     // We're done with the DIBuilder.
650     DB.finalize();
651   }
652 
653   return Group.OutlinedFunction;
654 }
655 
656 /// Move each BasicBlock in \p Old to \p New.
657 ///
658 /// \param [in] Old - The function to move the basic blocks from.
659 /// \param [in] New - The function to move the basic blocks to.
660 /// \param [out] NewEnds - The return blocks of the new overall function.
661 static void moveFunctionData(Function &Old, Function &New,
662                              DenseMap<Value *, BasicBlock *> &NewEnds) {
663   for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
664     CurrBB.removeFromParent();
665     CurrBB.insertInto(&New);
666     Instruction *I = CurrBB.getTerminator();
667 
668     // For each block we find a return instruction is, it is a potential exit
669     // path for the function.  We keep track of each block based on the return
670     // value here.
671     if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
672       NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
673 
674     std::vector<Instruction *> DebugInsts;
675 
676     for (Instruction &Val : CurrBB) {
677       // We must handle the scoping of called functions differently than
678       // other outlined instructions.
679       if (!isa<CallInst>(&Val)) {
680         // Remove the debug information for outlined functions.
681         Val.setDebugLoc(DebugLoc());
682         continue;
683       }
684 
685       // From this point we are only handling call instructions.
686       CallInst *CI = cast<CallInst>(&Val);
687 
688       // We add any debug statements here, to be removed after.  Since the
689       // instructions originate from many different locations in the program,
690       // it will cause incorrect reporting from a debugger if we keep the
691       // same debug instructions.
692       if (isa<DbgInfoIntrinsic>(CI)) {
693         DebugInsts.push_back(&Val);
694         continue;
695       }
696 
697       // Edit the scope of called functions inside of outlined functions.
698       if (DISubprogram *SP = New.getSubprogram()) {
699         DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
700         Val.setDebugLoc(DI);
701       }
702     }
703 
704     for (Instruction *I : DebugInsts)
705       I->eraseFromParent();
706   }
707 }
708 
709 /// Find the the constants that will need to be lifted into arguments
710 /// as they are not the same in each instance of the region.
711 ///
712 /// \param [in] C - The IRSimilarityCandidate containing the region we are
713 /// analyzing.
714 /// \param [in] NotSame - The set of global value numbers that do not have a
715 /// single Constant across all OutlinableRegions similar to \p C.
716 /// \param [out] Inputs - The list containing the global value numbers of the
717 /// arguments needed for the region of code.
718 static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame,
719                           std::vector<unsigned> &Inputs) {
720   DenseSet<unsigned> Seen;
721   // Iterate over the instructions, and find what constants will need to be
722   // extracted into arguments.
723   for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
724        IDIt != EndIDIt; IDIt++) {
725     for (Value *V : (*IDIt).OperVals) {
726       // Since these are stored before any outlining, they will be in the
727       // global value numbering.
728       unsigned GVN = C.getGVN(V).getValue();
729       if (isa<Constant>(V))
730         if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
731           Inputs.push_back(GVN);
732           Seen.insert(GVN);
733         }
734     }
735   }
736 }
737 
738 /// Find the GVN for the inputs that have been found by the CodeExtractor.
739 ///
740 /// \param [in] C - The IRSimilarityCandidate containing the region we are
741 /// analyzing.
742 /// \param [in] CurrentInputs - The set of inputs found by the
743 /// CodeExtractor.
744 /// \param [in] OutputMappings - The mapping of values that have been replaced
745 /// by a new output value.
746 /// \param [out] EndInputNumbers - The global value numbers for the extracted
747 /// arguments.
748 static void mapInputsToGVNs(IRSimilarityCandidate &C,
749                             SetVector<Value *> &CurrentInputs,
750                             const DenseMap<Value *, Value *> &OutputMappings,
751                             std::vector<unsigned> &EndInputNumbers) {
752   // Get the Global Value Number for each input.  We check if the Value has been
753   // replaced by a different value at output, and use the original value before
754   // replacement.
755   for (Value *Input : CurrentInputs) {
756     assert(Input && "Have a nullptr as an input");
757     if (OutputMappings.find(Input) != OutputMappings.end())
758       Input = OutputMappings.find(Input)->second;
759     assert(C.getGVN(Input).hasValue() &&
760            "Could not find a numbering for the given input");
761     EndInputNumbers.push_back(C.getGVN(Input).getValue());
762   }
763 }
764 
765 /// Find the original value for the \p ArgInput values if any one of them was
766 /// replaced during a previous extraction.
767 ///
768 /// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
769 /// \param [in] OutputMappings - The mapping of values that have been replaced
770 /// by a new output value.
771 /// \param [out] RemappedArgInputs - The remapped values according to
772 /// \p OutputMappings that will be extracted.
773 static void
774 remapExtractedInputs(const ArrayRef<Value *> ArgInputs,
775                      const DenseMap<Value *, Value *> &OutputMappings,
776                      SetVector<Value *> &RemappedArgInputs) {
777   // Get the global value number for each input that will be extracted as an
778   // argument by the code extractor, remapping if needed for reloaded values.
779   for (Value *Input : ArgInputs) {
780     if (OutputMappings.find(Input) != OutputMappings.end())
781       Input = OutputMappings.find(Input)->second;
782     RemappedArgInputs.insert(Input);
783   }
784 }
785 
786 /// Find the input GVNs and the output values for a region of Instructions.
787 /// Using the code extractor, we collect the inputs to the extracted function.
788 ///
789 /// The \p Region can be identified as needing to be ignored in this function.
790 /// It should be checked whether it should be ignored after a call to this
791 /// function.
792 ///
793 /// \param [in,out] Region - The region of code to be analyzed.
794 /// \param [out] InputGVNs - The global value numbers for the extracted
795 /// arguments.
796 /// \param [in] NotSame - The global value numbers in the region that do not
797 /// have the same constant value in the regions structurally similar to
798 /// \p Region.
799 /// \param [in] OutputMappings - The mapping of values that have been replaced
800 /// by a new output value after extraction.
801 /// \param [out] ArgInputs - The values of the inputs to the extracted function.
802 /// \param [out] Outputs - The set of values extracted by the CodeExtractor
803 /// as outputs.
804 static void getCodeExtractorArguments(
805     OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
806     DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
807     SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
808   IRSimilarityCandidate &C = *Region.Candidate;
809 
810   // OverallInputs are the inputs to the region found by the CodeExtractor,
811   // SinkCands and HoistCands are used by the CodeExtractor to find sunken
812   // allocas of values whose lifetimes are contained completely within the
813   // outlined region. PremappedInputs are the arguments found by the
814   // CodeExtractor, removing conditions such as sunken allocas, but that
815   // may need to be remapped due to the extracted output values replacing
816   // the original values. We use DummyOutputs for this first run of finding
817   // inputs and outputs since the outputs could change during findAllocas,
818   // the correct set of extracted outputs will be in the final Outputs ValueSet.
819   SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
820       DummyOutputs;
821 
822   // Use the code extractor to get the inputs and outputs, without sunken
823   // allocas or removing llvm.assumes.
824   CodeExtractor *CE = Region.CE;
825   CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
826   assert(Region.StartBB && "Region must have a start BasicBlock!");
827   Function *OrigF = Region.StartBB->getParent();
828   CodeExtractorAnalysisCache CEAC(*OrigF);
829   BasicBlock *Dummy = nullptr;
830 
831   // The region may be ineligible due to VarArgs in the parent function. In this
832   // case we ignore the region.
833   if (!CE->isEligible()) {
834     Region.IgnoreRegion = true;
835     return;
836   }
837 
838   // Find if any values are going to be sunk into the function when extracted
839   CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
840   CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
841 
842   // TODO: Support regions with sunken allocas: values whose lifetimes are
843   // contained completely within the outlined region.  These are not guaranteed
844   // to be the same in every region, so we must elevate them all to arguments
845   // when they appear.  If these values are not equal, it means there is some
846   // Input in OverallInputs that was removed for ArgInputs.
847   if (OverallInputs.size() != PremappedInputs.size()) {
848     Region.IgnoreRegion = true;
849     return;
850   }
851 
852   findConstants(C, NotSame, InputGVNs);
853 
854   mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
855 
856   remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
857                        ArgInputs);
858 
859   // Sort the GVNs, since we now have constants included in the \ref InputGVNs
860   // we need to make sure they are in a deterministic order.
861   stable_sort(InputGVNs);
862 }
863 
864 /// Look over the inputs and map each input argument to an argument in the
865 /// overall function for the OutlinableRegions.  This creates a way to replace
866 /// the arguments of the extracted function with the arguments of the new
867 /// overall function.
868 ///
869 /// \param [in,out] Region - The region of code to be analyzed.
870 /// \param [in] InputGVNs - The global value numbering of the input values
871 /// collected.
872 /// \param [in] ArgInputs - The values of the arguments to the extracted
873 /// function.
874 static void
875 findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
876                                         std::vector<unsigned> &InputGVNs,
877                                         SetVector<Value *> &ArgInputs) {
878 
879   IRSimilarityCandidate &C = *Region.Candidate;
880   OutlinableGroup &Group = *Region.Parent;
881 
882   // This counts the argument number in the overall function.
883   unsigned TypeIndex = 0;
884 
885   // This counts the argument number in the extracted function.
886   unsigned OriginalIndex = 0;
887 
888   // Find the mapping of the extracted arguments to the arguments for the
889   // overall function. Since there may be extra arguments in the overall
890   // function to account for the extracted constants, we have two different
891   // counters as we find extracted arguments, and as we come across overall
892   // arguments.
893 
894   // Additionally, in our first pass, for the first extracted function,
895   // we find argument locations for the canonical value numbering.  This
896   // numbering overrides any discovered location for the extracted code.
897   for (unsigned InputVal : InputGVNs) {
898     Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
899     assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?");
900     unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
901 
902     Optional<Value *> InputOpt = C.fromGVN(InputVal);
903     assert(InputOpt.hasValue() && "Global value number not found?");
904     Value *Input = InputOpt.getValue();
905 
906     DenseMap<unsigned, unsigned>::iterator AggArgIt =
907         Group.CanonicalNumberToAggArg.find(CanonicalNumber);
908 
909     if (!Group.InputTypesSet) {
910       Group.ArgumentTypes.push_back(Input->getType());
911       // If the input value has a swifterr attribute, make sure to mark the
912       // argument in the overall function.
913       if (Input->isSwiftError()) {
914         assert(
915             !Group.SwiftErrorArgument.hasValue() &&
916             "Argument already marked with swifterr for this OutlinableGroup!");
917         Group.SwiftErrorArgument = TypeIndex;
918       }
919     }
920 
921     // Check if we have a constant. If we do add it to the overall argument
922     // number to Constant map for the region, and continue to the next input.
923     if (Constant *CST = dyn_cast<Constant>(Input)) {
924       if (AggArgIt != Group.CanonicalNumberToAggArg.end())
925         Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
926       else {
927         Group.CanonicalNumberToAggArg.insert(
928             std::make_pair(CanonicalNumber, TypeIndex));
929         Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
930       }
931       TypeIndex++;
932       continue;
933     }
934 
935     // It is not a constant, we create the mapping from extracted argument list
936     // to the overall argument list, using the canonical location, if it exists.
937     assert(ArgInputs.count(Input) && "Input cannot be found!");
938 
939     if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
940       if (OriginalIndex != AggArgIt->second)
941         Region.ChangedArgOrder = true;
942       Region.ExtractedArgToAgg.insert(
943           std::make_pair(OriginalIndex, AggArgIt->second));
944       Region.AggArgToExtracted.insert(
945           std::make_pair(AggArgIt->second, OriginalIndex));
946     } else {
947       Group.CanonicalNumberToAggArg.insert(
948           std::make_pair(CanonicalNumber, TypeIndex));
949       Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
950       Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
951     }
952     OriginalIndex++;
953     TypeIndex++;
954   }
955 
956   // If the function type definitions for the OutlinableGroup holding the region
957   // have not been set, set the length of the inputs here.  We should have the
958   // same inputs for all of the different regions contained in the
959   // OutlinableGroup since they are all structurally similar to one another.
960   if (!Group.InputTypesSet) {
961     Group.NumAggregateInputs = TypeIndex;
962     Group.InputTypesSet = true;
963   }
964 
965   Region.NumExtractedInputs = OriginalIndex;
966 }
967 
968 /// Check if the \p V has any uses outside of the region other than \p PN.
969 ///
970 /// \param V [in] - The value to check.
971 /// \param PHILoc [in] - The location in the PHINode of \p V.
972 /// \param PN [in] - The PHINode using \p V.
973 /// \param Exits [in] - The potential blocks we exit to from the outlined
974 /// region.
975 /// \param BlocksInRegion [in] - The basic blocks contained in the region.
976 /// \returns true if \p V has any use soutside its region other than \p PN.
977 static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
978                             SmallPtrSet<BasicBlock *, 1> &Exits,
979                             DenseSet<BasicBlock *> &BlocksInRegion) {
980   // We check to see if the value is used by the PHINode from some other
981   // predecessor not included in the region.  If it is, we make sure
982   // to keep it as an output.
983   if (any_of(llvm::seq<unsigned>(0, PN.getNumIncomingValues()),
984              [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
985                return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
986                        !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
987              }))
988     return true;
989 
990   // Check if the value is used by any other instructions outside the region.
991   return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) {
992     Instruction *I = dyn_cast<Instruction>(U);
993     if (!I)
994       return false;
995 
996     // If the use of the item is inside the region, we skip it.  Uses
997     // inside the region give us useful information about how the item could be
998     // used as an output.
999     BasicBlock *Parent = I->getParent();
1000     if (BlocksInRegion.contains(Parent))
1001       return false;
1002 
1003     // If it's not a PHINode then we definitely know the use matters.  This
1004     // output value will not completely combined with another item in a PHINode
1005     // as it is directly reference by another non-phi instruction
1006     if (!isa<PHINode>(I))
1007       return true;
1008 
1009     // If we have a PHINode outside one of the exit locations, then it
1010     // can be considered an outside use as well.  If there is a PHINode
1011     // contained in the Exit where this values use matters, it will be
1012     // caught when we analyze that PHINode.
1013     if (!Exits.contains(Parent))
1014       return true;
1015 
1016     return false;
1017   });
1018 }
1019 
1020 /// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
1021 /// considered outputs. A PHINodes is an output when more than one incoming
1022 /// value has been marked by the CodeExtractor as an output.
1023 ///
1024 /// \param CurrentExitFromRegion [in] - The block to analyze.
1025 /// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
1026 /// region.
1027 /// \param RegionBlocks [in] - The basic blocks in the region.
1028 /// \param Outputs [in, out] - The existing outputs for the region, we may add
1029 /// PHINodes to this as we find that they replace output values.
1030 /// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
1031 /// totally replaced  by a PHINode.
1032 /// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
1033 /// in PHINodes, but have other uses, and should still be considered outputs.
1034 static void analyzeExitPHIsForOutputUses(
1035     BasicBlock *CurrentExitFromRegion,
1036     SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
1037     DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
1038     DenseSet<Value *> &OutputsReplacedByPHINode,
1039     DenseSet<Value *> &OutputsWithNonPhiUses) {
1040   for (PHINode &PN : CurrentExitFromRegion->phis()) {
1041     // Find all incoming values from the outlining region.
1042     SmallVector<unsigned, 2> IncomingVals;
1043     for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
1044       if (RegionBlocks.contains(PN.getIncomingBlock(I)))
1045         IncomingVals.push_back(I);
1046 
1047     // Do not process PHI if there are no predecessors from region.
1048     unsigned NumIncomingVals = IncomingVals.size();
1049     if (NumIncomingVals == 0)
1050       continue;
1051 
1052     // If there is one predecessor, we mark it as a value that needs to be kept
1053     // as an output.
1054     if (NumIncomingVals == 1) {
1055       Value *V = PN.getIncomingValue(*IncomingVals.begin());
1056       OutputsWithNonPhiUses.insert(V);
1057       OutputsReplacedByPHINode.erase(V);
1058       continue;
1059     }
1060 
1061     // This PHINode will be used as an output value, so we add it to our list.
1062     Outputs.insert(&PN);
1063 
1064     // Not all of the incoming values should be ignored as other inputs and
1065     // outputs may have uses in outlined region.  If they have other uses
1066     // outside of the single PHINode we should not skip over it.
1067     for (unsigned Idx : IncomingVals) {
1068       Value *V = PN.getIncomingValue(Idx);
1069       if (outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) {
1070         OutputsWithNonPhiUses.insert(V);
1071         OutputsReplacedByPHINode.erase(V);
1072         continue;
1073       }
1074       if (!OutputsWithNonPhiUses.contains(V))
1075         OutputsReplacedByPHINode.insert(V);
1076     }
1077   }
1078 }
1079 
1080 // Represents the type for the unsigned number denoting the output number for
1081 // phi node, along with the canonical number for the exit block.
1082 using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
1083 // The list of canonical numbers for the incoming values to a PHINode.
1084 using CanonList = SmallVector<unsigned, 2>;
1085 // The pair type representing the set of canonical values being combined in the
1086 // PHINode, along with the location data for the PHINode.
1087 using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
1088 
1089 /// Encode \p PND as an integer for easy lookup based on the argument location,
1090 /// the parent BasicBlock canonical numbering, and the canonical numbering of
1091 /// the values stored in the PHINode.
1092 ///
1093 /// \param PND - The data to hash.
1094 /// \returns The hash code of \p PND.
1095 static hash_code encodePHINodeData(PHINodeData &PND) {
1096   return llvm::hash_combine(
1097       llvm::hash_value(PND.first.first), llvm::hash_value(PND.first.second),
1098       llvm::hash_combine_range(PND.second.begin(), PND.second.end()));
1099 }
1100 
1101 /// Create a special GVN for PHINodes that will be used outside of
1102 /// the region.  We create a hash code based on the Canonical number of the
1103 /// parent BasicBlock, the canonical numbering of the values stored in the
1104 /// PHINode and the aggregate argument location.  This is used to find whether
1105 /// this PHINode type has been given a canonical numbering already.  If not, we
1106 /// assign it a value and store it for later use.  The value is returned to
1107 /// identify different output schemes for the set of regions.
1108 ///
1109 /// \param Region - The region that \p PN is an output for.
1110 /// \param PN - The PHINode we are analyzing.
1111 /// \param Blocks - The blocks for the region we are analyzing.
1112 /// \param AggArgIdx - The argument \p PN will be stored into.
1113 /// \returns An optional holding the assigned canonical number, or None if
1114 /// there is some attribute of the PHINode blocking it from being used.
1115 static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
1116                                            PHINode *PN,
1117                                            DenseSet<BasicBlock *> &Blocks,
1118                                            unsigned AggArgIdx) {
1119   OutlinableGroup &Group = *Region.Parent;
1120   IRSimilarityCandidate &Cand = *Region.Candidate;
1121   BasicBlock *PHIBB = PN->getParent();
1122   CanonList PHIGVNs;
1123   Value *Incoming;
1124   BasicBlock *IncomingBlock;
1125   for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1126     Incoming = PN->getIncomingValue(Idx);
1127     IncomingBlock = PN->getIncomingBlock(Idx);
1128     // If we cannot find a GVN, and the incoming block is included in the region
1129     // this means that the input to the PHINode is not included in the region we
1130     // are trying to analyze, meaning, that if it was outlined, we would be
1131     // adding an extra input.  We ignore this case for now, and so ignore the
1132     // region.
1133     Optional<unsigned> OGVN = Cand.getGVN(Incoming);
1134     if (!OGVN.hasValue() && (Blocks.find(IncomingBlock) != Blocks.end())) {
1135       Region.IgnoreRegion = true;
1136       return None;
1137     }
1138 
1139     // If the incoming block isn't in the region, we don't have to worry about
1140     // this incoming value.
1141     if (Blocks.find(IncomingBlock) == Blocks.end())
1142       continue;
1143 
1144     // Collect the canonical numbers of the values in the PHINode.
1145     unsigned GVN = OGVN.getValue();
1146     OGVN = Cand.getCanonicalNum(GVN);
1147     assert(OGVN.hasValue() && "No GVN found for incoming value?");
1148     PHIGVNs.push_back(*OGVN);
1149   }
1150 
1151   // Now that we have the GVNs for the incoming values, we are going to combine
1152   // them with the GVN of the incoming bock, and the output location of the
1153   // PHINode to generate a hash value representing this instance of the PHINode.
1154   DenseMap<hash_code, unsigned>::iterator GVNToPHIIt;
1155   DenseMap<unsigned, PHINodeData>::iterator PHIToGVNIt;
1156   Optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
1157   assert(BBGVN.hasValue() && "Could not find GVN for the incoming block!");
1158 
1159   BBGVN = Cand.getCanonicalNum(BBGVN.getValue());
1160   assert(BBGVN.hasValue() &&
1161          "Could not find canonical number for the incoming block!");
1162   // Create a pair of the exit block canonical value, and the aggregate
1163   // argument location, connected to the canonical numbers stored in the
1164   // PHINode.
1165   PHINodeData TemporaryPair =
1166       std::make_pair(std::make_pair(BBGVN.getValue(), AggArgIdx), PHIGVNs);
1167   hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
1168 
1169   // Look for and create a new entry in our connection between canonical
1170   // numbers for PHINodes, and the set of objects we just created.
1171   GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash);
1172   if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
1173     bool Inserted = false;
1174     std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert(
1175         std::make_pair(Group.PHINodeGVNTracker, TemporaryPair));
1176     std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert(
1177         std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--));
1178   }
1179 
1180   return GVNToPHIIt->second;
1181 }
1182 
1183 /// Create a mapping of the output arguments for the \p Region to the output
1184 /// arguments of the overall outlined function.
1185 ///
1186 /// \param [in,out] Region - The region of code to be analyzed.
1187 /// \param [in] Outputs - The values found by the code extractor.
1188 static void
1189 findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
1190                                           SetVector<Value *> &Outputs) {
1191   OutlinableGroup &Group = *Region.Parent;
1192   IRSimilarityCandidate &C = *Region.Candidate;
1193 
1194   SmallVector<BasicBlock *> BE;
1195   DenseSet<BasicBlock *> BlocksInRegion;
1196   C.getBasicBlocks(BlocksInRegion, BE);
1197 
1198   // Find the exits to the region.
1199   SmallPtrSet<BasicBlock *, 1> Exits;
1200   for (BasicBlock *Block : BE)
1201     for (BasicBlock *Succ : successors(Block))
1202       if (!BlocksInRegion.contains(Succ))
1203         Exits.insert(Succ);
1204 
1205   // After determining which blocks exit to PHINodes, we add these PHINodes to
1206   // the set of outputs to be processed.  We also check the incoming values of
1207   // the PHINodes for whether they should no longer be considered outputs.
1208   DenseSet<Value *> OutputsReplacedByPHINode;
1209   DenseSet<Value *> OutputsWithNonPhiUses;
1210   for (BasicBlock *ExitBB : Exits)
1211     analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs,
1212                                  OutputsReplacedByPHINode,
1213                                  OutputsWithNonPhiUses);
1214 
1215   // This counts the argument number in the extracted function.
1216   unsigned OriginalIndex = Region.NumExtractedInputs;
1217 
1218   // This counts the argument number in the overall function.
1219   unsigned TypeIndex = Group.NumAggregateInputs;
1220   bool TypeFound;
1221   DenseSet<unsigned> AggArgsUsed;
1222 
1223   // Iterate over the output types and identify if there is an aggregate pointer
1224   // type whose base type matches the current output type. If there is, we mark
1225   // that we will use this output register for this value. If not we add another
1226   // type to the overall argument type list. We also store the GVNs used for
1227   // stores to identify which values will need to be moved into an special
1228   // block that holds the stores to the output registers.
1229   for (Value *Output : Outputs) {
1230     TypeFound = false;
1231     // We can do this since it is a result value, and will have a number
1232     // that is necessarily the same. BUT if in the future, the instructions
1233     // do not have to be in same order, but are functionally the same, we will
1234     // have to use a different scheme, as one-to-one correspondence is not
1235     // guaranteed.
1236     unsigned ArgumentSize = Group.ArgumentTypes.size();
1237 
1238     // If the output is combined in a PHINode, we make sure to skip over it.
1239     if (OutputsReplacedByPHINode.contains(Output))
1240       continue;
1241 
1242     unsigned AggArgIdx = 0;
1243     for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
1244       if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
1245         continue;
1246 
1247       if (AggArgsUsed.contains(Jdx))
1248         continue;
1249 
1250       TypeFound = true;
1251       AggArgsUsed.insert(Jdx);
1252       Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
1253       Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
1254       AggArgIdx = Jdx;
1255       break;
1256     }
1257 
1258     // We were unable to find an unused type in the output type set that matches
1259     // the output, so we add a pointer type to the argument types of the overall
1260     // function to handle this output and create a mapping to it.
1261     if (!TypeFound) {
1262       Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType()));
1263       // Mark the new pointer type as the last value in the aggregate argument
1264       // list.
1265       unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
1266       AggArgsUsed.insert(ArgTypeIdx);
1267       Region.ExtractedArgToAgg.insert(
1268           std::make_pair(OriginalIndex, ArgTypeIdx));
1269       Region.AggArgToExtracted.insert(
1270           std::make_pair(ArgTypeIdx, OriginalIndex));
1271       AggArgIdx = ArgTypeIdx;
1272     }
1273 
1274     // TODO: Adapt to the extra input from the PHINode.
1275     PHINode *PN = dyn_cast<PHINode>(Output);
1276 
1277     Optional<unsigned> GVN;
1278     if (PN && !BlocksInRegion.contains(PN->getParent())) {
1279       // Values outside the region can be combined into PHINode when we
1280       // have multiple exits. We collect both of these into a list to identify
1281       // which values are being used in the PHINode. Each list identifies a
1282       // different PHINode, and a different output. We store the PHINode as it's
1283       // own canonical value.  These canonical values are also dependent on the
1284       // output argument it is saved to.
1285 
1286       // If two PHINodes have the same canonical values, but different aggregate
1287       // argument locations, then they will have distinct Canonical Values.
1288       GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx);
1289       if (!GVN.hasValue())
1290         return;
1291     } else {
1292       // If we do not have a PHINode we use the global value numbering for the
1293       // output value, to find the canonical number to add to the set of stored
1294       // values.
1295       GVN = C.getGVN(Output);
1296       GVN = C.getCanonicalNum(*GVN);
1297     }
1298 
1299     // Each region has a potentially unique set of outputs.  We save which
1300     // values are output in a list of canonical values so we can differentiate
1301     // among the different store schemes.
1302     Region.GVNStores.push_back(*GVN);
1303 
1304     OriginalIndex++;
1305     TypeIndex++;
1306   }
1307 
1308   // We sort the stored values to make sure that we are not affected by analysis
1309   // order when determining what combination of items were stored.
1310   stable_sort(Region.GVNStores);
1311 }
1312 
1313 void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
1314                                       DenseSet<unsigned> &NotSame) {
1315   std::vector<unsigned> Inputs;
1316   SetVector<Value *> ArgInputs, Outputs;
1317 
1318   getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
1319                             Outputs);
1320 
1321   if (Region.IgnoreRegion)
1322     return;
1323 
1324   // Map the inputs found by the CodeExtractor to the arguments found for
1325   // the overall function.
1326   findExtractedInputToOverallInputMapping(Region, Inputs, ArgInputs);
1327 
1328   // Map the outputs found by the CodeExtractor to the arguments found for
1329   // the overall function.
1330   findExtractedOutputToOverallOutputMapping(Region, Outputs);
1331 }
1332 
1333 /// Replace the extracted function in the Region with a call to the overall
1334 /// function constructed from the deduplicated similar regions, replacing and
1335 /// remapping the values passed to the extracted function as arguments to the
1336 /// new arguments of the overall function.
1337 ///
1338 /// \param [in] M - The module to outline from.
1339 /// \param [in] Region - The regions of extracted code to be replaced with a new
1340 /// function.
1341 /// \returns a call instruction with the replaced function.
1342 CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
1343   std::vector<Value *> NewCallArgs;
1344   DenseMap<unsigned, unsigned>::iterator ArgPair;
1345 
1346   OutlinableGroup &Group = *Region.Parent;
1347   CallInst *Call = Region.Call;
1348   assert(Call && "Call to replace is nullptr?");
1349   Function *AggFunc = Group.OutlinedFunction;
1350   assert(AggFunc && "Function to replace with is nullptr?");
1351 
1352   // If the arguments are the same size, there are not values that need to be
1353   // made into an argument, the argument ordering has not been change, or
1354   // different output registers to handle.  We can simply replace the called
1355   // function in this case.
1356   if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
1357     LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1358                       << *AggFunc << " with same number of arguments\n");
1359     Call->setCalledFunction(AggFunc);
1360     return Call;
1361   }
1362 
1363   // We have a different number of arguments than the new function, so
1364   // we need to use our previously mappings off extracted argument to overall
1365   // function argument, and constants to overall function argument to create the
1366   // new argument list.
1367   for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
1368 
1369     if (AggArgIdx == AggFunc->arg_size() - 1 &&
1370         Group.OutputGVNCombinations.size() > 1) {
1371       // If we are on the last argument, and we need to differentiate between
1372       // output blocks, add an integer to the argument list to determine
1373       // what block to take
1374       LLVM_DEBUG(dbgs() << "Set switch block argument to "
1375                         << Region.OutputBlockNum << "\n");
1376       NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
1377                                              Region.OutputBlockNum));
1378       continue;
1379     }
1380 
1381     ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
1382     if (ArgPair != Region.AggArgToExtracted.end()) {
1383       Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
1384       // If we found the mapping from the extracted function to the overall
1385       // function, we simply add it to the argument list.  We use the same
1386       // value, it just needs to honor the new order of arguments.
1387       LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1388                         << *ArgumentValue << "\n");
1389       NewCallArgs.push_back(ArgumentValue);
1390       continue;
1391     }
1392 
1393     // If it is a constant, we simply add it to the argument list as a value.
1394     if (Region.AggArgToConstant.find(AggArgIdx) !=
1395         Region.AggArgToConstant.end()) {
1396       Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
1397       LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1398                         << *CST << "\n");
1399       NewCallArgs.push_back(CST);
1400       continue;
1401     }
1402 
1403     // Add a nullptr value if the argument is not found in the extracted
1404     // function.  If we cannot find a value, it means it is not in use
1405     // for the region, so we should not pass anything to it.
1406     LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1407     NewCallArgs.push_back(ConstantPointerNull::get(
1408         static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
1409   }
1410 
1411   LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1412                     << *AggFunc << " with new set of arguments\n");
1413   // Create the new call instruction and erase the old one.
1414   Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
1415                           Call);
1416 
1417   // It is possible that the call to the outlined function is either the first
1418   // instruction is in the new block, the last instruction, or both.  If either
1419   // of these is the case, we need to make sure that we replace the instruction
1420   // in the IRInstructionData struct with the new call.
1421   CallInst *OldCall = Region.Call;
1422   if (Region.NewFront->Inst == OldCall)
1423     Region.NewFront->Inst = Call;
1424   if (Region.NewBack->Inst == OldCall)
1425     Region.NewBack->Inst = Call;
1426 
1427   // Transfer any debug information.
1428   Call->setDebugLoc(Region.Call->getDebugLoc());
1429   // Since our output may determine which branch we go to, we make sure to
1430   // propogate this new call value through the module.
1431   OldCall->replaceAllUsesWith(Call);
1432 
1433   // Remove the old instruction.
1434   OldCall->eraseFromParent();
1435   Region.Call = Call;
1436 
1437   // Make sure that the argument in the new function has the SwiftError
1438   // argument.
1439   if (Group.SwiftErrorArgument.hasValue())
1440     Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
1441                        Attribute::SwiftError);
1442 
1443   return Call;
1444 }
1445 
1446 /// Find or create a BasicBlock in the outlined function containing PhiBlocks
1447 /// for \p RetVal.
1448 ///
1449 /// \param Group - The OutlinableGroup containing the information about the
1450 /// overall outlined function.
1451 /// \param RetVal - The return value or exit option that we are currently
1452 /// evaluating.
1453 /// \returns The found or newly created BasicBlock to contain the needed
1454 /// PHINodes to be used as outputs.
1455 static BasicBlock *findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal) {
1456   DenseMap<Value *, BasicBlock *>::iterator PhiBlockForRetVal,
1457       ReturnBlockForRetVal;
1458   PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
1459   ReturnBlockForRetVal = Group.EndBBs.find(RetVal);
1460   assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
1461          "Could not find output value!");
1462   BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
1463 
1464   // Find if a PHIBlock exists for this return value already.  If it is
1465   // the first time we are analyzing this, we will not, so we record it.
1466   PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
1467   if (PhiBlockForRetVal != Group.PHIBlocks.end())
1468     return PhiBlockForRetVal->second;
1469 
1470   // If we did not find a block, we create one, and insert it into the
1471   // overall function and record it.
1472   bool Inserted = false;
1473   BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block",
1474                                             ReturnBB->getParent());
1475   std::tie(PhiBlockForRetVal, Inserted) =
1476       Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1477 
1478   // We find the predecessors of the return block in the newly created outlined
1479   // function in order to point them to the new PHIBlock rather than the already
1480   // existing return block.
1481   SmallVector<BranchInst *, 2> BranchesToChange;
1482   for (BasicBlock *Pred : predecessors(ReturnBB))
1483     BranchesToChange.push_back(cast<BranchInst>(Pred->getTerminator()));
1484 
1485   // Now we mark the branch instructions found, and change the references of the
1486   // return block to the newly created PHIBlock.
1487   for (BranchInst *BI : BranchesToChange)
1488     for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) {
1489       if (BI->getSuccessor(Succ) != ReturnBB)
1490         continue;
1491       BI->setSuccessor(Succ, PHIBlock);
1492     }
1493 
1494   BranchInst::Create(ReturnBB, PHIBlock);
1495 
1496   return PhiBlockForRetVal->second;
1497 }
1498 
1499 /// For the function call now representing the \p Region, find the passed value
1500 /// to that call that represents Argument \p A at the call location if the
1501 /// call has already been replaced with a call to the  overall, aggregate
1502 /// function.
1503 ///
1504 /// \param A - The Argument to get the passed value for.
1505 /// \param Region - The extracted Region corresponding to the outlined function.
1506 /// \returns The Value representing \p A at the call site.
1507 static Value *
1508 getPassedArgumentInAlreadyOutlinedFunction(const Argument *A,
1509                                            const OutlinableRegion &Region) {
1510   // If we don't need to adjust the argument number at all (since the call
1511   // has already been replaced by a call to the overall outlined function)
1512   // we can just get the specified argument.
1513   return Region.Call->getArgOperand(A->getArgNo());
1514 }
1515 
1516 /// For the function call now representing the \p Region, find the passed value
1517 /// to that call that represents Argument \p A at the call location if the
1518 /// call has only been replaced by the call to the aggregate function.
1519 ///
1520 /// \param A - The Argument to get the passed value for.
1521 /// \param Region - The extracted Region corresponding to the outlined function.
1522 /// \returns The Value representing \p A at the call site.
1523 static Value *
1524 getPassedArgumentAndAdjustArgumentLocation(const Argument *A,
1525                                            const OutlinableRegion &Region) {
1526   unsigned ArgNum = A->getArgNo();
1527 
1528   // If it is a constant, we can look at our mapping from when we created
1529   // the outputs to figure out what the constant value is.
1530   if (Region.AggArgToConstant.count(ArgNum))
1531     return Region.AggArgToConstant.find(ArgNum)->second;
1532 
1533   // If it is not a constant, and we are not looking at the overall function, we
1534   // need to adjust which argument we are looking at.
1535   ArgNum = Region.AggArgToExtracted.find(ArgNum)->second;
1536   return Region.Call->getArgOperand(ArgNum);
1537 }
1538 
1539 /// Find the canonical numbering for the incoming Values into the PHINode \p PN.
1540 ///
1541 /// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1542 /// \param Region [in] - The OutlinableRegion containing \p PN.
1543 /// \param OutputMappings [in] - The mapping of output values from outlined
1544 /// region to their original values.
1545 /// \param CanonNums [out] - The canonical numbering for the incoming values to
1546 /// \p PN paired with their incoming block.
1547 /// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
1548 /// of \p Region rather than the overall function's call.
1549 static void findCanonNumsForPHI(
1550     PHINode *PN, OutlinableRegion &Region,
1551     const DenseMap<Value *, Value *> &OutputMappings,
1552     SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
1553     bool ReplacedWithOutlinedCall = true) {
1554   // Iterate over the incoming values.
1555   for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1556     Value *IVal = PN->getIncomingValue(Idx);
1557     BasicBlock *IBlock = PN->getIncomingBlock(Idx);
1558     // If we have an argument as incoming value, we need to grab the passed
1559     // value from the call itself.
1560     if (Argument *A = dyn_cast<Argument>(IVal)) {
1561       if (ReplacedWithOutlinedCall)
1562         IVal = getPassedArgumentInAlreadyOutlinedFunction(A, Region);
1563       else
1564         IVal = getPassedArgumentAndAdjustArgumentLocation(A, Region);
1565     }
1566 
1567     // Get the original value if it has been replaced by an output value.
1568     IVal = findOutputMapping(OutputMappings, IVal);
1569 
1570     // Find and add the canonical number for the incoming value.
1571     Optional<unsigned> GVN = Region.Candidate->getGVN(IVal);
1572     assert(GVN.hasValue() && "No GVN for incoming value");
1573     Optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN);
1574     assert(CanonNum.hasValue() && "No Canonical Number for GVN");
1575     CanonNums.push_back(std::make_pair(*CanonNum, IBlock));
1576   }
1577 }
1578 
1579 /// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
1580 /// in order to condense the number of instructions added to the outlined
1581 /// function.
1582 ///
1583 /// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1584 /// \param Region [in] - The OutlinableRegion containing \p PN.
1585 /// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
1586 /// \p PN in.
1587 /// \param OutputMappings [in] - The mapping of output values from outlined
1588 /// region to their original values.
1589 /// \param UsedPhis [in, out] - The PHINodes in the block that have already been
1590 /// matched.
1591 /// \return the newly found or created PHINode in \p OverallPhiBlock.
1592 static PHINode*
1593 findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
1594                        BasicBlock *OverallPhiBlock,
1595                        const DenseMap<Value *, Value *> &OutputMappings,
1596                        DenseSet<PHINode *> &UsedPHIs) {
1597   OutlinableGroup &Group = *Region.Parent;
1598 
1599 
1600   // A list of the canonical numbering assigned to each incoming value, paired
1601   // with the incoming block for the PHINode passed into this function.
1602   SmallVector<std::pair<unsigned, BasicBlock *>> PNCanonNums;
1603 
1604   // We have to use the extracted function since we have merged this region into
1605   // the overall function yet.  We make sure to reassign the argument numbering
1606   // since it is possible that the argument ordering is different between the
1607   // functions.
1608   findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums,
1609                       /* ReplacedWithOutlinedCall = */ false);
1610 
1611   OutlinableRegion *FirstRegion = Group.Regions[0];
1612 
1613   // A list of the canonical numbering assigned to each incoming value, paired
1614   // with the incoming block for the PHINode that we are currently comparing
1615   // the passed PHINode to.
1616   SmallVector<std::pair<unsigned, BasicBlock *>> CurrentCanonNums;
1617 
1618   // Find the Canonical Numbering for each PHINode, if it matches, we replace
1619   // the uses of the PHINode we are searching for, with the found PHINode.
1620   for (PHINode &CurrPN : OverallPhiBlock->phis()) {
1621     // If this PHINode has already been matched to another PHINode to be merged,
1622     // we skip it.
1623     if (UsedPHIs.find(&CurrPN) != UsedPHIs.end())
1624       continue;
1625 
1626     CurrentCanonNums.clear();
1627     findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
1628                         /* ReplacedWithOutlinedCall = */ true);
1629 
1630     // If the list of incoming values is not the same length, then they cannot
1631     // match since there is not an analogue for each incoming value.
1632     if (PNCanonNums.size() != CurrentCanonNums.size())
1633       continue;
1634 
1635     bool FoundMatch = true;
1636 
1637     // We compare the canonical value for each incoming value in the passed
1638     // in PHINode to one already present in the outlined region.  If the
1639     // incoming values do not match, then the PHINodes do not match.
1640 
1641     // We also check to make sure that the incoming block matches as well by
1642     // finding the corresponding incoming block in the combined outlined region
1643     // for the current outlined region.
1644     for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
1645       std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
1646       std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
1647       if (ToCompareTo.first != ToAdd.first) {
1648         FoundMatch = false;
1649         break;
1650       }
1651 
1652       BasicBlock *CorrespondingBlock =
1653           Region.findCorrespondingBlockIn(*FirstRegion, ToAdd.second);
1654       assert(CorrespondingBlock && "Found block is nullptr");
1655       if (CorrespondingBlock != ToCompareTo.second) {
1656         FoundMatch = false;
1657         break;
1658       }
1659     }
1660 
1661     // If all incoming values and branches matched, then we can merge
1662     // into the found PHINode.
1663     if (FoundMatch) {
1664       UsedPHIs.insert(&CurrPN);
1665       return &CurrPN;
1666     }
1667   }
1668 
1669   // If we've made it here, it means we weren't able to replace the PHINode, so
1670   // we must insert it ourselves.
1671   PHINode *NewPN = cast<PHINode>(PN.clone());
1672   NewPN->insertBefore(&*OverallPhiBlock->begin());
1673   for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
1674        Idx++) {
1675     Value *IncomingVal = NewPN->getIncomingValue(Idx);
1676     BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx);
1677 
1678     // Find corresponding basic block in the overall function for the incoming
1679     // block.
1680     BasicBlock *BlockToUse =
1681         Region.findCorrespondingBlockIn(*FirstRegion, IncomingBlock);
1682     NewPN->setIncomingBlock(Idx, BlockToUse);
1683 
1684     // If we have an argument we make sure we replace using the argument from
1685     // the correct function.
1686     if (Argument *A = dyn_cast<Argument>(IncomingVal)) {
1687       Value *Val = Group.OutlinedFunction->getArg(A->getArgNo());
1688       NewPN->setIncomingValue(Idx, Val);
1689       continue;
1690     }
1691 
1692     // Find the corresponding value in the overall function.
1693     IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
1694     Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
1695     assert(Val && "Value is nullptr?");
1696     NewPN->setIncomingValue(Idx, Val);
1697   }
1698   return NewPN;
1699 }
1700 
1701 // Within an extracted function, replace the argument uses of the extracted
1702 // region with the arguments of the function for an OutlinableGroup.
1703 //
1704 /// \param [in] Region - The region of extracted code to be changed.
1705 /// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1706 /// region.
1707 /// \param [in] FirstFunction - A flag to indicate whether we are using this
1708 /// function to define the overall outlined function for all the regions, or
1709 /// if we are operating on one of the following regions.
1710 static void
1711 replaceArgumentUses(OutlinableRegion &Region,
1712                     DenseMap<Value *, BasicBlock *> &OutputBBs,
1713                     const DenseMap<Value *, Value *> &OutputMappings,
1714                     bool FirstFunction = false) {
1715   OutlinableGroup &Group = *Region.Parent;
1716   assert(Region.ExtractedFunction && "Region has no extracted function?");
1717 
1718   Function *DominatingFunction = Region.ExtractedFunction;
1719   if (FirstFunction)
1720     DominatingFunction = Group.OutlinedFunction;
1721   DominatorTree DT(*DominatingFunction);
1722   DenseSet<PHINode *> UsedPHIs;
1723 
1724   for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1725        ArgIdx++) {
1726     assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
1727                Region.ExtractedArgToAgg.end() &&
1728            "No mapping from extracted to outlined?");
1729     unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
1730     Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
1731     Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
1732     // The argument is an input, so we can simply replace it with the overall
1733     // argument value
1734     if (ArgIdx < Region.NumExtractedInputs) {
1735       LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1736                         << *Region.ExtractedFunction << " with " << *AggArg
1737                         << " in function " << *Group.OutlinedFunction << "\n");
1738       Arg->replaceAllUsesWith(AggArg);
1739       continue;
1740     }
1741 
1742     // If we are replacing an output, we place the store value in its own
1743     // block inside the overall function before replacing the use of the output
1744     // in the function.
1745     assert(Arg->hasOneUse() && "Output argument can only have one use");
1746     User *InstAsUser = Arg->user_back();
1747     assert(InstAsUser && "User is nullptr!");
1748 
1749     Instruction *I = cast<Instruction>(InstAsUser);
1750     BasicBlock *BB = I->getParent();
1751     SmallVector<BasicBlock *, 4> Descendants;
1752     DT.getDescendants(BB, Descendants);
1753     bool EdgeAdded = false;
1754     if (Descendants.size() == 0) {
1755       EdgeAdded = true;
1756       DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
1757       DT.getDescendants(BB, Descendants);
1758     }
1759 
1760     // Iterate over the following blocks, looking for return instructions,
1761     // if we find one, find the corresponding output block for the return value
1762     // and move our store instruction there.
1763     for (BasicBlock *DescendBB : Descendants) {
1764       ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
1765       if (!RI)
1766         continue;
1767       Value *RetVal = RI->getReturnValue();
1768       auto VBBIt = OutputBBs.find(RetVal);
1769       assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1770 
1771       // If this is storing a PHINode, we must make sure it is included in the
1772       // overall function.
1773       StoreInst *SI = cast<StoreInst>(I);
1774 
1775       Value *ValueOperand = SI->getValueOperand();
1776 
1777       StoreInst *NewI = cast<StoreInst>(I->clone());
1778       NewI->setDebugLoc(DebugLoc());
1779       BasicBlock *OutputBB = VBBIt->second;
1780       OutputBB->getInstList().push_back(NewI);
1781       LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1782                         << *OutputBB << "\n");
1783 
1784       // If this is storing a PHINode, we must make sure it is included in the
1785       // overall function.
1786       if (!isa<PHINode>(ValueOperand) ||
1787           Region.Candidate->getGVN(ValueOperand).hasValue()) {
1788         if (FirstFunction)
1789           continue;
1790         Value *CorrVal =
1791             Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
1792         assert(CorrVal && "Value is nullptr?");
1793         NewI->setOperand(0, CorrVal);
1794         continue;
1795       }
1796       PHINode *PN = cast<PHINode>(SI->getValueOperand());
1797       // If it has a value, it was not split by the code extractor, which
1798       // is what we are looking for.
1799       if (Region.Candidate->getGVN(PN).hasValue())
1800         continue;
1801 
1802       // We record the parent block for the PHINode in the Region so that
1803       // we can exclude it from checks later on.
1804       Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent()));
1805 
1806       // If this is the first function, we do not need to worry about mergiing
1807       // this with any other block in the overall outlined function, so we can
1808       // just continue.
1809       if (FirstFunction) {
1810         BasicBlock *PHIBlock = PN->getParent();
1811         Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1812         continue;
1813       }
1814 
1815       // We look for the aggregate block that contains the PHINodes leading into
1816       // this exit path. If we can't find one, we create one.
1817       BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
1818 
1819       // For our PHINode, we find the combined canonical numbering, and
1820       // attempt to find a matching PHINode in the overall PHIBlock.  If we
1821       // cannot, we copy the PHINode and move it into this new block.
1822       PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock,
1823                                               OutputMappings, UsedPHIs);
1824       NewI->setOperand(0, NewPN);
1825     }
1826 
1827     // If we added an edge for basic blocks without a predecessor, we remove it
1828     // here.
1829     if (EdgeAdded)
1830       DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
1831     I->eraseFromParent();
1832 
1833     LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1834                       << *Region.ExtractedFunction << " with " << *AggArg
1835                       << " in function " << *Group.OutlinedFunction << "\n");
1836     Arg->replaceAllUsesWith(AggArg);
1837   }
1838 }
1839 
1840 /// Within an extracted function, replace the constants that need to be lifted
1841 /// into arguments with the actual argument.
1842 ///
1843 /// \param Region [in] - The region of extracted code to be changed.
1844 void replaceConstants(OutlinableRegion &Region) {
1845   OutlinableGroup &Group = *Region.Parent;
1846   // Iterate over the constants that need to be elevated into arguments
1847   for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1848     unsigned AggArgIdx = Const.first;
1849     Function *OutlinedFunction = Group.OutlinedFunction;
1850     assert(OutlinedFunction && "Overall Function is not defined?");
1851     Constant *CST = Const.second;
1852     Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
1853     // Identify the argument it will be elevated to, and replace instances of
1854     // that constant in the function.
1855 
1856     // TODO: If in the future constants do not have one global value number,
1857     // i.e. a constant 1 could be mapped to several values, this check will
1858     // have to be more strict.  It cannot be using only replaceUsesWithIf.
1859 
1860     LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1861                       << " in function " << *OutlinedFunction << " with "
1862                       << *Arg << "\n");
1863     CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
1864       if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
1865         return I->getFunction() == OutlinedFunction;
1866       return false;
1867     });
1868   }
1869 }
1870 
1871 /// It is possible that there is a basic block that already performs the same
1872 /// stores. This returns a duplicate block, if it exists
1873 ///
1874 /// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1875 /// \param OutputStoreBBs [in] The existing output blocks.
1876 /// \returns an optional value with the number output block if there is a match.
1877 Optional<unsigned> findDuplicateOutputBlock(
1878     DenseMap<Value *, BasicBlock *> &OutputBBs,
1879     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1880 
1881   bool Mismatch = false;
1882   unsigned MatchingNum = 0;
1883   // We compare the new set output blocks to the other sets of output blocks.
1884   // If they are the same number, and have identical instructions, they are
1885   // considered to be the same.
1886   for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1887     Mismatch = false;
1888     for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1889       DenseMap<Value *, BasicBlock *>::iterator OutputBBIt =
1890           OutputBBs.find(VToB.first);
1891       if (OutputBBIt == OutputBBs.end()) {
1892         Mismatch = true;
1893         break;
1894       }
1895 
1896       BasicBlock *CompBB = VToB.second;
1897       BasicBlock *OutputBB = OutputBBIt->second;
1898       if (CompBB->size() - 1 != OutputBB->size()) {
1899         Mismatch = true;
1900         break;
1901       }
1902 
1903       BasicBlock::iterator NIt = OutputBB->begin();
1904       for (Instruction &I : *CompBB) {
1905         if (isa<BranchInst>(&I))
1906           continue;
1907 
1908         if (!I.isIdenticalTo(&(*NIt))) {
1909           Mismatch = true;
1910           break;
1911         }
1912 
1913         NIt++;
1914       }
1915     }
1916 
1917     if (!Mismatch)
1918       return MatchingNum;
1919 
1920     MatchingNum++;
1921   }
1922 
1923   return None;
1924 }
1925 
1926 /// Remove empty output blocks from the outlined region.
1927 ///
1928 /// \param BlocksToPrune - Mapping of return values output blocks for the \p
1929 /// Region.
1930 /// \param Region - The OutlinableRegion we are analyzing.
1931 static bool
1932 analyzeAndPruneOutputBlocks(DenseMap<Value *, BasicBlock *> &BlocksToPrune,
1933                             OutlinableRegion &Region) {
1934   bool AllRemoved = true;
1935   Value *RetValueForBB;
1936   BasicBlock *NewBB;
1937   SmallVector<Value *, 4> ToRemove;
1938   // Iterate over the output blocks created in the outlined section.
1939   for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
1940     RetValueForBB = VtoBB.first;
1941     NewBB = VtoBB.second;
1942 
1943     // If there are no instructions, we remove it from the module, and also
1944     // mark the value for removal from the return value to output block mapping.
1945     if (NewBB->size() == 0) {
1946       NewBB->eraseFromParent();
1947       ToRemove.push_back(RetValueForBB);
1948       continue;
1949     }
1950 
1951     // Mark that we could not remove all the blocks since they were not all
1952     // empty.
1953     AllRemoved = false;
1954   }
1955 
1956   // Remove the return value from the mapping.
1957   for (Value *V : ToRemove)
1958     BlocksToPrune.erase(V);
1959 
1960   // Mark the region as having the no output scheme.
1961   if (AllRemoved)
1962     Region.OutputBlockNum = -1;
1963 
1964   return AllRemoved;
1965 }
1966 
1967 /// For the outlined section, move needed the StoreInsts for the output
1968 /// registers into their own block. Then, determine if there is a duplicate
1969 /// output block already created.
1970 ///
1971 /// \param [in] OG - The OutlinableGroup of regions to be outlined.
1972 /// \param [in] Region - The OutlinableRegion that is being analyzed.
1973 /// \param [in,out] OutputBBs - the blocks that stores for this region will be
1974 /// placed in.
1975 /// \param [in] EndBBs - the final blocks of the extracted function.
1976 /// \param [in] OutputMappings - OutputMappings the mapping of values that have
1977 /// been replaced by a new output value.
1978 /// \param [in,out] OutputStoreBBs - The existing output blocks.
1979 static void alignOutputBlockWithAggFunc(
1980     OutlinableGroup &OG, OutlinableRegion &Region,
1981     DenseMap<Value *, BasicBlock *> &OutputBBs,
1982     DenseMap<Value *, BasicBlock *> &EndBBs,
1983     const DenseMap<Value *, Value *> &OutputMappings,
1984     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1985   // If none of the output blocks have any instructions, this means that we do
1986   // not have to determine if it matches any of the other output schemes, and we
1987   // don't have to do anything else.
1988   if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
1989     return;
1990 
1991   // Determine is there is a duplicate set of blocks.
1992   Optional<unsigned> MatchingBB =
1993       findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
1994 
1995   // If there is, we remove the new output blocks.  If it does not,
1996   // we add it to our list of sets of output blocks.
1997   if (MatchingBB.hasValue()) {
1998     LLVM_DEBUG(dbgs() << "Set output block for region in function"
1999                       << Region.ExtractedFunction << " to "
2000                       << MatchingBB.getValue());
2001 
2002     Region.OutputBlockNum = MatchingBB.getValue();
2003     for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
2004       VtoBB.second->eraseFromParent();
2005     return;
2006   }
2007 
2008   Region.OutputBlockNum = OutputStoreBBs.size();
2009 
2010   Value *RetValueForBB;
2011   BasicBlock *NewBB;
2012   OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2013   for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
2014     RetValueForBB = VtoBB.first;
2015     NewBB = VtoBB.second;
2016     DenseMap<Value *, BasicBlock *>::iterator VBBIt =
2017         EndBBs.find(RetValueForBB);
2018     LLVM_DEBUG(dbgs() << "Create output block for region in"
2019                       << Region.ExtractedFunction << " to "
2020                       << *NewBB);
2021     BranchInst::Create(VBBIt->second, NewBB);
2022     OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
2023   }
2024 }
2025 
2026 /// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
2027 /// before creating a basic block for each \p NewMap, and inserting into the new
2028 /// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
2029 ///
2030 /// \param OldMap [in] - The mapping to base the new mapping off of.
2031 /// \param NewMap [out] - The output mapping using the keys of \p OldMap.
2032 /// \param ParentFunc [in] - The function to put the new basic block in.
2033 /// \param BaseName [in] - The start of the BasicBlock names to be appended to
2034 /// by an index value.
2035 static void createAndInsertBasicBlocks(DenseMap<Value *, BasicBlock *> &OldMap,
2036                                        DenseMap<Value *, BasicBlock *> &NewMap,
2037                                        Function *ParentFunc, Twine BaseName) {
2038   unsigned Idx = 0;
2039   std::vector<Value *> SortedKeys;
2040 
2041   getSortedConstantKeys(SortedKeys, OldMap);
2042 
2043   for (Value *RetVal : SortedKeys) {
2044     BasicBlock *NewBB = BasicBlock::Create(
2045         ParentFunc->getContext(),
2046         Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
2047         ParentFunc);
2048     NewMap.insert(std::make_pair(RetVal, NewBB));
2049   }
2050 }
2051 
2052 /// Create the switch statement for outlined function to differentiate between
2053 /// all the output blocks.
2054 ///
2055 /// For the outlined section, determine if an outlined block already exists that
2056 /// matches the needed stores for the extracted section.
2057 /// \param [in] M - The module we are outlining from.
2058 /// \param [in] OG - The group of regions to be outlined.
2059 /// \param [in] EndBBs - The final blocks of the extracted function.
2060 /// \param [in,out] OutputStoreBBs - The existing output blocks.
2061 void createSwitchStatement(
2062     Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs,
2063     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2064   // We only need the switch statement if there is more than one store
2065   // combination, or there is more than one set of output blocks.  The first
2066   // will occur when we store different sets of values for two different
2067   // regions.  The second will occur when we have two outputs that are combined
2068   // in a PHINode outside of the region in one outlined instance, and are used
2069   // seaparately in another. This will create the same set of OutputGVNs, but
2070   // will generate two different output schemes.
2071   if (OG.OutputGVNCombinations.size() > 1) {
2072     Function *AggFunc = OG.OutlinedFunction;
2073     // Create a final block for each different return block.
2074     DenseMap<Value *, BasicBlock *> ReturnBBs;
2075     createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
2076 
2077     for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
2078       std::pair<Value *, BasicBlock *> &OutputBlock =
2079           *OG.EndBBs.find(RetBlockPair.first);
2080       BasicBlock *ReturnBlock = RetBlockPair.second;
2081       BasicBlock *EndBB = OutputBlock.second;
2082       Instruction *Term = EndBB->getTerminator();
2083       // Move the return value to the final block instead of the original exit
2084       // stub.
2085       Term->moveBefore(*ReturnBlock, ReturnBlock->end());
2086       // Put the switch statement in the old end basic block for the function
2087       // with a fall through to the new return block.
2088       LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
2089                         << OutputStoreBBs.size() << "\n");
2090       SwitchInst *SwitchI =
2091           SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
2092                              ReturnBlock, OutputStoreBBs.size(), EndBB);
2093 
2094       unsigned Idx = 0;
2095       for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
2096         DenseMap<Value *, BasicBlock *>::iterator OSBBIt =
2097             OutputStoreBB.find(OutputBlock.first);
2098 
2099         if (OSBBIt == OutputStoreBB.end())
2100           continue;
2101 
2102         BasicBlock *BB = OSBBIt->second;
2103         SwitchI->addCase(
2104             ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
2105         Term = BB->getTerminator();
2106         Term->setSuccessor(0, ReturnBlock);
2107         Idx++;
2108       }
2109     }
2110     return;
2111   }
2112 
2113   assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
2114 
2115   // If there needs to be stores, move them from the output blocks to their
2116   // corresponding ending block.  We do not check that the OutputGVNCombinations
2117   // is equal to 1 here since that could just been the case where there are 0
2118   // outputs. Instead, we check whether there is more than one set of output
2119   // blocks since this is the only case where we would have to move the
2120   // stores, and erase the extraneous blocks.
2121   if (OutputStoreBBs.size() == 1) {
2122     LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
2123                       << *OG.OutlinedFunction << "\n");
2124     DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
2125     for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
2126       DenseMap<Value *, BasicBlock *>::iterator EndBBIt =
2127           EndBBs.find(VBPair.first);
2128       assert(EndBBIt != EndBBs.end() && "Could not find end block");
2129       BasicBlock *EndBB = EndBBIt->second;
2130       BasicBlock *OutputBB = VBPair.second;
2131       Instruction *Term = OutputBB->getTerminator();
2132       Term->eraseFromParent();
2133       Term = EndBB->getTerminator();
2134       moveBBContents(*OutputBB, *EndBB);
2135       Term->moveBefore(*EndBB, EndBB->end());
2136       OutputBB->eraseFromParent();
2137     }
2138   }
2139 }
2140 
2141 /// Fill the new function that will serve as the replacement function for all of
2142 /// the extracted regions of a certain structure from the first region in the
2143 /// list of regions.  Replace this first region's extracted function with the
2144 /// new overall function.
2145 ///
2146 /// \param [in] M - The module we are outlining from.
2147 /// \param [in] CurrentGroup - The group of regions to be outlined.
2148 /// \param [in,out] OutputStoreBBs - The output blocks for each different
2149 /// set of stores needed for the different functions.
2150 /// \param [in,out] FuncsToRemove - Extracted functions to erase from module
2151 /// once outlining is complete.
2152 /// \param [in] OutputMappings - Extracted functions to erase from module
2153 /// once outlining is complete.
2154 static void fillOverallFunction(
2155     Module &M, OutlinableGroup &CurrentGroup,
2156     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
2157     std::vector<Function *> &FuncsToRemove,
2158     const DenseMap<Value *, Value *> &OutputMappings) {
2159   OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
2160 
2161   // Move first extracted function's instructions into new function.
2162   LLVM_DEBUG(dbgs() << "Move instructions from "
2163                     << *CurrentOS->ExtractedFunction << " to instruction "
2164                     << *CurrentGroup.OutlinedFunction << "\n");
2165   moveFunctionData(*CurrentOS->ExtractedFunction,
2166                    *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
2167 
2168   // Transfer the attributes from the function to the new function.
2169   for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
2170     CurrentGroup.OutlinedFunction->addFnAttr(A);
2171 
2172   // Create a new set of output blocks for the first extracted function.
2173   DenseMap<Value *, BasicBlock *> NewBBs;
2174   createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
2175                              CurrentGroup.OutlinedFunction, "output_block_0");
2176   CurrentOS->OutputBlockNum = 0;
2177 
2178   replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true);
2179   replaceConstants(*CurrentOS);
2180 
2181   // We first identify if any output blocks are empty, if they are we remove
2182   // them. We then create a branch instruction to the basic block to the return
2183   // block for the function for each non empty output block.
2184   if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
2185     OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2186     for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
2187       DenseMap<Value *, BasicBlock *>::iterator VBBIt =
2188           CurrentGroup.EndBBs.find(VToBB.first);
2189       BasicBlock *EndBB = VBBIt->second;
2190       BranchInst::Create(EndBB, VToBB.second);
2191       OutputStoreBBs.back().insert(VToBB);
2192     }
2193   }
2194 
2195   // Replace the call to the extracted function with the outlined function.
2196   CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2197 
2198   // We only delete the extracted functions at the end since we may need to
2199   // reference instructions contained in them for mapping purposes.
2200   FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2201 }
2202 
2203 void IROutliner::deduplicateExtractedSections(
2204     Module &M, OutlinableGroup &CurrentGroup,
2205     std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
2206   createFunction(M, CurrentGroup, OutlinedFunctionNum);
2207 
2208   std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
2209 
2210   OutlinableRegion *CurrentOS;
2211 
2212   fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove,
2213                       OutputMappings);
2214 
2215   std::vector<Value *> SortedKeys;
2216   for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
2217     CurrentOS = CurrentGroup.Regions[Idx];
2218     AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction,
2219                                                *CurrentOS->ExtractedFunction);
2220 
2221     // Create a set of BasicBlocks, one for each return block, to hold the
2222     // needed store instructions.
2223     DenseMap<Value *, BasicBlock *> NewBBs;
2224     createAndInsertBasicBlocks(
2225         CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
2226         "output_block_" + Twine(static_cast<unsigned>(Idx)));
2227     replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings);
2228     alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
2229                                 CurrentGroup.EndBBs, OutputMappings,
2230                                 OutputStoreBBs);
2231 
2232     CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2233     FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2234   }
2235 
2236   // Create a switch statement to handle the different output schemes.
2237   createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
2238 
2239   OutlinedFunctionNum++;
2240 }
2241 
2242 /// Checks that the next instruction in the InstructionDataList matches the
2243 /// next instruction in the module.  If they do not, there could be the
2244 /// possibility that extra code has been inserted, and we must ignore it.
2245 ///
2246 /// \param ID - The IRInstructionData to check the next instruction of.
2247 /// \returns true if the InstructionDataList and actual instruction match.
2248 static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID) {
2249   // We check if there is a discrepancy between the InstructionDataList
2250   // and the actual next instruction in the module.  If there is, it means
2251   // that an extra instruction was added, likely by the CodeExtractor.
2252 
2253   // Since we do not have any similarity data about this particular
2254   // instruction, we cannot confidently outline it, and must discard this
2255   // candidate.
2256   IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
2257   Instruction *NextIDLInst = NextIDIt->Inst;
2258   Instruction *NextModuleInst = nullptr;
2259   if (!ID.Inst->isTerminator())
2260     NextModuleInst = ID.Inst->getNextNonDebugInstruction();
2261   else if (NextIDLInst != nullptr)
2262     NextModuleInst =
2263         &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
2264 
2265   if (NextIDLInst && NextIDLInst != NextModuleInst)
2266     return false;
2267 
2268   return true;
2269 }
2270 
2271 bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
2272     const OutlinableRegion &Region) {
2273   IRSimilarityCandidate *IRSC = Region.Candidate;
2274   unsigned StartIdx = IRSC->getStartIdx();
2275   unsigned EndIdx = IRSC->getEndIdx();
2276 
2277   // A check to make sure that we are not about to attempt to outline something
2278   // that has already been outlined.
2279   for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2280     if (Outlined.contains(Idx))
2281       return false;
2282 
2283   // We check if the recorded instruction matches the actual next instruction,
2284   // if it does not, we fix it in the InstructionDataList.
2285   if (!Region.Candidate->backInstruction()->isTerminator()) {
2286     Instruction *NewEndInst =
2287         Region.Candidate->backInstruction()->getNextNonDebugInstruction();
2288     assert(NewEndInst && "Next instruction is a nullptr?");
2289     if (Region.Candidate->end()->Inst != NewEndInst) {
2290       IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2291       IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
2292           IRInstructionData(*NewEndInst,
2293                             InstructionClassifier.visit(*NewEndInst), *IDL);
2294 
2295       // Insert the first IRInstructionData of the new region after the
2296       // last IRInstructionData of the IRSimilarityCandidate.
2297       IDL->insert(Region.Candidate->end(), *NewEndIRID);
2298     }
2299   }
2300 
2301   return none_of(*IRSC, [this](IRInstructionData &ID) {
2302     if (!nextIRInstructionDataMatchesNextInst(ID))
2303       return true;
2304 
2305     return !this->InstructionClassifier.visit(ID.Inst);
2306   });
2307 }
2308 
2309 void IROutliner::pruneIncompatibleRegions(
2310     std::vector<IRSimilarityCandidate> &CandidateVec,
2311     OutlinableGroup &CurrentGroup) {
2312   bool PreviouslyOutlined;
2313 
2314   // Sort from beginning to end, so the IRSimilarityCandidates are in order.
2315   stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
2316                                const IRSimilarityCandidate &RHS) {
2317     return LHS.getStartIdx() < RHS.getStartIdx();
2318   });
2319 
2320   IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
2321   // Since outlining a call and a branch instruction will be the same as only
2322   // outlinining a call instruction, we ignore it as a space saving.
2323   if (FirstCandidate.getLength() == 2) {
2324     if (isa<CallInst>(FirstCandidate.front()->Inst) &&
2325         isa<BranchInst>(FirstCandidate.back()->Inst))
2326       return;
2327   }
2328 
2329   unsigned CurrentEndIdx = 0;
2330   for (IRSimilarityCandidate &IRSC : CandidateVec) {
2331     PreviouslyOutlined = false;
2332     unsigned StartIdx = IRSC.getStartIdx();
2333     unsigned EndIdx = IRSC.getEndIdx();
2334 
2335     for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2336       if (Outlined.contains(Idx)) {
2337         PreviouslyOutlined = true;
2338         break;
2339       }
2340 
2341     if (PreviouslyOutlined)
2342       continue;
2343 
2344     // Check over the instructions, and if the basic block has its address
2345     // taken for use somewhere else, we do not outline that block.
2346     bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
2347       return ID.Inst->getParent()->hasAddressTaken();
2348     });
2349 
2350     if (BBHasAddressTaken)
2351       continue;
2352 
2353     if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
2354         !OutlineFromLinkODRs)
2355       continue;
2356 
2357     // Greedily prune out any regions that will overlap with already chosen
2358     // regions.
2359     if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
2360       continue;
2361 
2362     bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
2363       if (!nextIRInstructionDataMatchesNextInst(ID))
2364         return true;
2365 
2366       return !this->InstructionClassifier.visit(ID.Inst);
2367     });
2368 
2369     if (BadInst)
2370       continue;
2371 
2372     OutlinableRegion *OS = new (RegionAllocator.Allocate())
2373         OutlinableRegion(IRSC, CurrentGroup);
2374     CurrentGroup.Regions.push_back(OS);
2375 
2376     CurrentEndIdx = EndIdx;
2377   }
2378 }
2379 
2380 InstructionCost
2381 IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
2382   InstructionCost RegionBenefit = 0;
2383   for (OutlinableRegion *Region : CurrentGroup.Regions) {
2384     TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2385     // We add the number of instructions in the region to the benefit as an
2386     // estimate as to how much will be removed.
2387     RegionBenefit += Region->getBenefit(TTI);
2388     LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
2389                       << " saved instructions to overfall benefit.\n");
2390   }
2391 
2392   return RegionBenefit;
2393 }
2394 
2395 /// For the \p OutputCanon number passed in find the value represented by this
2396 /// canonical number. If it is from a PHINode, we pick the first incoming
2397 /// value and return that Value instead.
2398 ///
2399 /// \param Region - The OutlinableRegion to get the Value from.
2400 /// \param OutputCanon - The canonical number to find the Value from.
2401 /// \returns The Value represented by a canonical number \p OutputCanon in \p
2402 /// Region.
2403 static Value *findOutputValueInRegion(OutlinableRegion &Region,
2404                                       unsigned OutputCanon) {
2405   OutlinableGroup &CurrentGroup = *Region.Parent;
2406   // If the value is greater than the value in the tracker, we have a
2407   // PHINode and will instead use one of the incoming values to find the
2408   // type.
2409   if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
2410     auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon);
2411     assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
2412            "Could not find GVN set for PHINode number!");
2413     assert(It->second.second.size() > 0 && "PHINode does not have any values!");
2414     OutputCanon = *It->second.second.begin();
2415   }
2416   Optional<unsigned> OGVN = Region.Candidate->fromCanonicalNum(OutputCanon);
2417   assert(OGVN.hasValue() && "Could not find GVN for Canonical Number?");
2418   Optional<Value *> OV = Region.Candidate->fromGVN(*OGVN);
2419   assert(OV.hasValue() && "Could not find value for GVN?");
2420   return *OV;
2421 }
2422 
2423 InstructionCost
2424 IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
2425   InstructionCost OverallCost = 0;
2426   for (OutlinableRegion *Region : CurrentGroup.Regions) {
2427     TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2428 
2429     // Each output incurs a load after the call, so we add that to the cost.
2430     for (unsigned OutputCanon : Region->GVNStores) {
2431       Value *V = findOutputValueInRegion(*Region, OutputCanon);
2432       InstructionCost LoadCost =
2433           TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2434                               TargetTransformInfo::TCK_CodeSize);
2435 
2436       LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
2437                         << " instructions to cost for output of type "
2438                         << *V->getType() << "\n");
2439       OverallCost += LoadCost;
2440     }
2441   }
2442 
2443   return OverallCost;
2444 }
2445 
2446 /// Find the extra instructions needed to handle any output values for the
2447 /// region.
2448 ///
2449 /// \param [in] M - The Module to outline from.
2450 /// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
2451 /// \param [in] TTI - The TargetTransformInfo used to collect information for
2452 /// new instruction costs.
2453 /// \returns the additional cost to handle the outputs.
2454 static InstructionCost findCostForOutputBlocks(Module &M,
2455                                                OutlinableGroup &CurrentGroup,
2456                                                TargetTransformInfo &TTI) {
2457   InstructionCost OutputCost = 0;
2458   unsigned NumOutputBranches = 0;
2459 
2460   OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
2461   IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
2462   DenseSet<BasicBlock *> CandidateBlocks;
2463   Candidate.getBasicBlocks(CandidateBlocks);
2464 
2465   // Count the number of different output branches that point to blocks outside
2466   // of the region.
2467   DenseSet<BasicBlock *> FoundBlocks;
2468   for (IRInstructionData &ID : Candidate) {
2469     if (!isa<BranchInst>(ID.Inst))
2470       continue;
2471 
2472     for (Value *V : ID.OperVals) {
2473       BasicBlock *BB = static_cast<BasicBlock *>(V);
2474       DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB);
2475       if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB))
2476         continue;
2477       FoundBlocks.insert(BB);
2478       NumOutputBranches++;
2479     }
2480   }
2481 
2482   CurrentGroup.BranchesToOutside = NumOutputBranches;
2483 
2484   for (const ArrayRef<unsigned> &OutputUse :
2485        CurrentGroup.OutputGVNCombinations) {
2486     for (unsigned OutputCanon : OutputUse) {
2487       Value *V = findOutputValueInRegion(FirstRegion, OutputCanon);
2488       InstructionCost StoreCost =
2489           TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2490                               TargetTransformInfo::TCK_CodeSize);
2491 
2492       // An instruction cost is added for each store set that needs to occur for
2493       // various output combinations inside the function, plus a branch to
2494       // return to the exit block.
2495       LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
2496                         << " instructions to cost for output of type "
2497                         << *V->getType() << "\n");
2498       OutputCost += StoreCost * NumOutputBranches;
2499     }
2500 
2501     InstructionCost BranchCost =
2502         TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
2503     LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
2504                       << " a branch instruction\n");
2505     OutputCost += BranchCost * NumOutputBranches;
2506   }
2507 
2508   // If there is more than one output scheme, we must have a comparison and
2509   // branch for each different item in the switch statement.
2510   if (CurrentGroup.OutputGVNCombinations.size() > 1) {
2511     InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
2512         Instruction::ICmp, Type::getInt32Ty(M.getContext()),
2513         Type::getInt32Ty(M.getContext()), CmpInst::BAD_ICMP_PREDICATE,
2514         TargetTransformInfo::TCK_CodeSize);
2515     InstructionCost BranchCost =
2516         TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
2517 
2518     unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
2519     InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
2520 
2521     LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
2522                       << " instructions for each switch case for each different"
2523                       << " output path in a function\n");
2524     OutputCost += TotalCost * NumOutputBranches;
2525   }
2526 
2527   return OutputCost;
2528 }
2529 
2530 void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
2531   InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
2532   CurrentGroup.Benefit += RegionBenefit;
2533   LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
2534 
2535   InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
2536   CurrentGroup.Cost += OutputReloadCost;
2537   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2538 
2539   InstructionCost AverageRegionBenefit =
2540       RegionBenefit / CurrentGroup.Regions.size();
2541   unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
2542   unsigned NumRegions = CurrentGroup.Regions.size();
2543   TargetTransformInfo &TTI =
2544       getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
2545 
2546   // We add one region to the cost once, to account for the instructions added
2547   // inside of the newly created function.
2548   LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
2549                     << " instructions to cost for body of new function.\n");
2550   CurrentGroup.Cost += AverageRegionBenefit;
2551   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2552 
2553   // For each argument, we must add an instruction for loading the argument
2554   // out of the register and into a value inside of the newly outlined function.
2555   LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2556                     << " instructions to cost for each argument in the new"
2557                     << " function.\n");
2558   CurrentGroup.Cost +=
2559       OverallArgumentNum * TargetTransformInfo::TCC_Basic;
2560   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2561 
2562   // Each argument needs to either be loaded into a register or onto the stack.
2563   // Some arguments will only be loaded into the stack once the argument
2564   // registers are filled.
2565   LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2566                     << " instructions to cost for each argument in the new"
2567                     << " function " << NumRegions << " times for the "
2568                     << "needed argument handling at the call site.\n");
2569   CurrentGroup.Cost +=
2570       2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
2571   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2572 
2573   CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
2574   LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2575 }
2576 
2577 void IROutliner::updateOutputMapping(OutlinableRegion &Region,
2578                                      ArrayRef<Value *> Outputs,
2579                                      LoadInst *LI) {
2580   // For and load instructions following the call
2581   Value *Operand = LI->getPointerOperand();
2582   Optional<unsigned> OutputIdx = None;
2583   // Find if the operand it is an output register.
2584   for (unsigned ArgIdx = Region.NumExtractedInputs;
2585        ArgIdx < Region.Call->arg_size(); ArgIdx++) {
2586     if (Operand == Region.Call->getArgOperand(ArgIdx)) {
2587       OutputIdx = ArgIdx - Region.NumExtractedInputs;
2588       break;
2589     }
2590   }
2591 
2592   // If we found an output register, place a mapping of the new value
2593   // to the original in the mapping.
2594   if (!OutputIdx.hasValue())
2595     return;
2596 
2597   if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
2598       OutputMappings.end()) {
2599     LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
2600                       << *Outputs[OutputIdx.getValue()] << "\n");
2601     OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()]));
2602   } else {
2603     Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second;
2604     LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
2605                       << *Outputs[OutputIdx.getValue()] << "\n");
2606     OutputMappings.insert(std::make_pair(LI, Orig));
2607   }
2608 }
2609 
2610 bool IROutliner::extractSection(OutlinableRegion &Region) {
2611   SetVector<Value *> ArgInputs, Outputs, SinkCands;
2612   assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
2613   BasicBlock *InitialStart = Region.StartBB;
2614   Function *OrigF = Region.StartBB->getParent();
2615   CodeExtractorAnalysisCache CEAC(*OrigF);
2616   Region.ExtractedFunction =
2617       Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
2618 
2619   // If the extraction was successful, find the BasicBlock, and reassign the
2620   // OutlinableRegion blocks
2621   if (!Region.ExtractedFunction) {
2622     LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
2623                       << "\n");
2624     Region.reattachCandidate();
2625     return false;
2626   }
2627 
2628   // Get the block containing the called branch, and reassign the blocks as
2629   // necessary.  If the original block still exists, it is because we ended on
2630   // a branch instruction, and so we move the contents into the block before
2631   // and assign the previous block correctly.
2632   User *InstAsUser = Region.ExtractedFunction->user_back();
2633   BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
2634   Region.PrevBB = RewrittenBB->getSinglePredecessor();
2635   assert(Region.PrevBB && "PrevBB is nullptr?");
2636   if (Region.PrevBB == InitialStart) {
2637     BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
2638     Instruction *BI = NewPrev->getTerminator();
2639     BI->eraseFromParent();
2640     moveBBContents(*InitialStart, *NewPrev);
2641     Region.PrevBB = NewPrev;
2642     InitialStart->eraseFromParent();
2643   }
2644 
2645   Region.StartBB = RewrittenBB;
2646   Region.EndBB = RewrittenBB;
2647 
2648   // The sequences of outlinable regions has now changed.  We must fix the
2649   // IRInstructionDataList for consistency.  Although they may not be illegal
2650   // instructions, they should not be compared with anything else as they
2651   // should not be outlined in this round.  So marking these as illegal is
2652   // allowed.
2653   IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2654   Instruction *BeginRewritten = &*RewrittenBB->begin();
2655   Instruction *EndRewritten = &*RewrittenBB->begin();
2656   Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
2657       *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
2658   Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
2659       *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
2660 
2661   // Insert the first IRInstructionData of the new region in front of the
2662   // first IRInstructionData of the IRSimilarityCandidate.
2663   IDL->insert(Region.Candidate->begin(), *Region.NewFront);
2664   // Insert the first IRInstructionData of the new region after the
2665   // last IRInstructionData of the IRSimilarityCandidate.
2666   IDL->insert(Region.Candidate->end(), *Region.NewBack);
2667   // Remove the IRInstructionData from the IRSimilarityCandidate.
2668   IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
2669 
2670   assert(RewrittenBB != nullptr &&
2671          "Could not find a predecessor after extraction!");
2672 
2673   // Iterate over the new set of instructions to find the new call
2674   // instruction.
2675   for (Instruction &I : *RewrittenBB)
2676     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2677       if (Region.ExtractedFunction == CI->getCalledFunction())
2678         Region.Call = CI;
2679     } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
2680       updateOutputMapping(Region, Outputs.getArrayRef(), LI);
2681   Region.reattachCandidate();
2682   return true;
2683 }
2684 
2685 unsigned IROutliner::doOutline(Module &M) {
2686   // Find the possible similarity sections.
2687   InstructionClassifier.EnableBranches = !DisableBranches;
2688   InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
2689   InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
2690 
2691   IRSimilarityIdentifier &Identifier = getIRSI(M);
2692   SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
2693 
2694   // Sort them by size of extracted sections
2695   unsigned OutlinedFunctionNum = 0;
2696   // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
2697   // to sort them by the potential number of instructions to be outlined
2698   if (SimilarityCandidates.size() > 1)
2699     llvm::stable_sort(SimilarityCandidates,
2700                       [](const std::vector<IRSimilarityCandidate> &LHS,
2701                          const std::vector<IRSimilarityCandidate> &RHS) {
2702                         return LHS[0].getLength() * LHS.size() >
2703                                RHS[0].getLength() * RHS.size();
2704                       });
2705   // Creating OutlinableGroups for each SimilarityCandidate to be used in
2706   // each of the following for loops to avoid making an allocator.
2707   std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
2708 
2709   DenseSet<unsigned> NotSame;
2710   std::vector<OutlinableGroup *> NegativeCostGroups;
2711   std::vector<OutlinableRegion *> OutlinedRegions;
2712   // Iterate over the possible sets of similarity.
2713   unsigned PotentialGroupIdx = 0;
2714   for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2715     OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2716 
2717     // Remove entries that were previously outlined
2718     pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2719 
2720     // We pruned the number of regions to 0 to 1, meaning that it's not worth
2721     // trying to outlined since there is no compatible similar instance of this
2722     // code.
2723     if (CurrentGroup.Regions.size() < 2)
2724       continue;
2725 
2726     // Determine if there are any values that are the same constant throughout
2727     // each section in the set.
2728     NotSame.clear();
2729     CurrentGroup.findSameConstants(NotSame);
2730 
2731     if (CurrentGroup.IgnoreGroup)
2732       continue;
2733 
2734     // Create a CodeExtractor for each outlinable region. Identify inputs and
2735     // outputs for each section using the code extractor and create the argument
2736     // types for the Aggregate Outlining Function.
2737     OutlinedRegions.clear();
2738     for (OutlinableRegion *OS : CurrentGroup.Regions) {
2739       // Break the outlinable region out of its parent BasicBlock into its own
2740       // BasicBlocks (see function implementation).
2741       OS->splitCandidate();
2742 
2743       // There's a chance that when the region is split, extra instructions are
2744       // added to the region. This makes the region no longer viable
2745       // to be split, so we ignore it for outlining.
2746       if (!OS->CandidateSplit)
2747         continue;
2748 
2749       SmallVector<BasicBlock *> BE;
2750       DenseSet<BasicBlock *> BlocksInRegion;
2751       OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2752       OS->CE = new (ExtractorAllocator.Allocate())
2753           CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2754                         false, nullptr, "outlined");
2755       findAddInputsOutputs(M, *OS, NotSame);
2756       if (!OS->IgnoreRegion)
2757         OutlinedRegions.push_back(OS);
2758 
2759       // We recombine the blocks together now that we have gathered all the
2760       // needed information.
2761       OS->reattachCandidate();
2762     }
2763 
2764     CurrentGroup.Regions = std::move(OutlinedRegions);
2765 
2766     if (CurrentGroup.Regions.empty())
2767       continue;
2768 
2769     CurrentGroup.collectGVNStoreSets(M);
2770 
2771     if (CostModel)
2772       findCostBenefit(M, CurrentGroup);
2773 
2774     // If we are adhering to the cost model, skip those groups where the cost
2775     // outweighs the benefits.
2776     if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2777       OptimizationRemarkEmitter &ORE =
2778           getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2779       ORE.emit([&]() {
2780         IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2781         OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2782                                    C->frontInstruction());
2783         R << "did not outline "
2784           << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2785           << " regions due to estimated increase of "
2786           << ore::NV("InstructionIncrease",
2787                      CurrentGroup.Cost - CurrentGroup.Benefit)
2788           << " instructions at locations ";
2789         interleave(
2790             CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2791             [&R](OutlinableRegion *Region) {
2792               R << ore::NV(
2793                   "DebugLoc",
2794                   Region->Candidate->frontInstruction()->getDebugLoc());
2795             },
2796             [&R]() { R << " "; });
2797         return R;
2798       });
2799       continue;
2800     }
2801 
2802     NegativeCostGroups.push_back(&CurrentGroup);
2803   }
2804 
2805   ExtractorAllocator.DestroyAll();
2806 
2807   if (NegativeCostGroups.size() > 1)
2808     stable_sort(NegativeCostGroups,
2809                 [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2810                   return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2811                 });
2812 
2813   std::vector<Function *> FuncsToRemove;
2814   for (OutlinableGroup *CG : NegativeCostGroups) {
2815     OutlinableGroup &CurrentGroup = *CG;
2816 
2817     OutlinedRegions.clear();
2818     for (OutlinableRegion *Region : CurrentGroup.Regions) {
2819       // We check whether our region is compatible with what has already been
2820       // outlined, and whether we need to ignore this item.
2821       if (!isCompatibleWithAlreadyOutlinedCode(*Region))
2822         continue;
2823       OutlinedRegions.push_back(Region);
2824     }
2825 
2826     if (OutlinedRegions.size() < 2)
2827       continue;
2828 
2829     // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2830     // we are still outlining enough regions to make up for the added cost.
2831     CurrentGroup.Regions = std::move(OutlinedRegions);
2832     if (CostModel) {
2833       CurrentGroup.Benefit = 0;
2834       CurrentGroup.Cost = 0;
2835       findCostBenefit(M, CurrentGroup);
2836       if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2837         continue;
2838     }
2839     OutlinedRegions.clear();
2840     for (OutlinableRegion *Region : CurrentGroup.Regions) {
2841       Region->splitCandidate();
2842       if (!Region->CandidateSplit)
2843         continue;
2844       OutlinedRegions.push_back(Region);
2845     }
2846 
2847     CurrentGroup.Regions = std::move(OutlinedRegions);
2848     if (CurrentGroup.Regions.size() < 2) {
2849       for (OutlinableRegion *R : CurrentGroup.Regions)
2850         R->reattachCandidate();
2851       continue;
2852     }
2853 
2854     LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2855                       << " and benefit " << CurrentGroup.Benefit << "\n");
2856 
2857     // Create functions out of all the sections, and mark them as outlined.
2858     OutlinedRegions.clear();
2859     for (OutlinableRegion *OS : CurrentGroup.Regions) {
2860       SmallVector<BasicBlock *> BE;
2861       DenseSet<BasicBlock *> BlocksInRegion;
2862       OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2863       OS->CE = new (ExtractorAllocator.Allocate())
2864           CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2865                         false, nullptr, "outlined");
2866       bool FunctionOutlined = extractSection(*OS);
2867       if (FunctionOutlined) {
2868         unsigned StartIdx = OS->Candidate->getStartIdx();
2869         unsigned EndIdx = OS->Candidate->getEndIdx();
2870         for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2871           Outlined.insert(Idx);
2872 
2873         OutlinedRegions.push_back(OS);
2874       }
2875     }
2876 
2877     LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2878                       << " with benefit " << CurrentGroup.Benefit
2879                       << " and cost " << CurrentGroup.Cost << "\n");
2880 
2881     CurrentGroup.Regions = std::move(OutlinedRegions);
2882 
2883     if (CurrentGroup.Regions.empty())
2884       continue;
2885 
2886     OptimizationRemarkEmitter &ORE =
2887         getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2888     ORE.emit([&]() {
2889       IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2890       OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2891       R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2892         << " regions with decrease of "
2893         << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2894         << " instructions at locations ";
2895       interleave(
2896           CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2897           [&R](OutlinableRegion *Region) {
2898             R << ore::NV("DebugLoc",
2899                          Region->Candidate->frontInstruction()->getDebugLoc());
2900           },
2901           [&R]() { R << " "; });
2902       return R;
2903     });
2904 
2905     deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
2906                                  OutlinedFunctionNum);
2907   }
2908 
2909   for (Function *F : FuncsToRemove)
2910     F->eraseFromParent();
2911 
2912   return OutlinedFunctionNum;
2913 }
2914 
2915 bool IROutliner::run(Module &M) {
2916   CostModel = !NoCostModel;
2917   OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
2918 
2919   return doOutline(M) > 0;
2920 }
2921 
2922 // Pass Manager Boilerplate
2923 namespace {
2924 class IROutlinerLegacyPass : public ModulePass {
2925 public:
2926   static char ID;
2927   IROutlinerLegacyPass() : ModulePass(ID) {
2928     initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry());
2929   }
2930 
2931   void getAnalysisUsage(AnalysisUsage &AU) const override {
2932     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
2933     AU.addRequired<TargetTransformInfoWrapperPass>();
2934     AU.addRequired<IRSimilarityIdentifierWrapperPass>();
2935   }
2936 
2937   bool runOnModule(Module &M) override;
2938 };
2939 } // namespace
2940 
2941 bool IROutlinerLegacyPass::runOnModule(Module &M) {
2942   if (skipModule(M))
2943     return false;
2944 
2945   std::unique_ptr<OptimizationRemarkEmitter> ORE;
2946   auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2947     ORE.reset(new OptimizationRemarkEmitter(&F));
2948     return *ORE.get();
2949   };
2950 
2951   auto GTTI = [this](Function &F) -> TargetTransformInfo & {
2952     return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2953   };
2954 
2955   auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
2956     return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
2957   };
2958 
2959   return IROutliner(GTTI, GIRSI, GORE).run(M);
2960 }
2961 
2962 PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
2963   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2964 
2965   std::function<TargetTransformInfo &(Function &)> GTTI =
2966       [&FAM](Function &F) -> TargetTransformInfo & {
2967     return FAM.getResult<TargetIRAnalysis>(F);
2968   };
2969 
2970   std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
2971       [&AM](Module &M) -> IRSimilarityIdentifier & {
2972     return AM.getResult<IRSimilarityAnalysis>(M);
2973   };
2974 
2975   std::unique_ptr<OptimizationRemarkEmitter> ORE;
2976   std::function<OptimizationRemarkEmitter &(Function &)> GORE =
2977       [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2978     ORE.reset(new OptimizationRemarkEmitter(&F));
2979     return *ORE.get();
2980   };
2981 
2982   if (IROutliner(GTTI, GIRSI, GORE).run(M))
2983     return PreservedAnalyses::none();
2984   return PreservedAnalyses::all();
2985 }
2986 
2987 char IROutlinerLegacyPass::ID = 0;
2988 INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
2989                       false)
2990 INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass)
2991 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
2992 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
2993 INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
2994                     false)
2995 
2996 ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }
2997