1f22ef01cSRoman Divacky //===- PartialInlining.cpp - Inline parts of functions --------------------===//
2f22ef01cSRoman Divacky //
3f22ef01cSRoman Divacky //                     The LLVM Compiler Infrastructure
4f22ef01cSRoman Divacky //
5f22ef01cSRoman Divacky // This file is distributed under the University of Illinois Open Source
6f22ef01cSRoman Divacky // License. See LICENSE.TXT for details.
7f22ef01cSRoman Divacky //
8f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
9f22ef01cSRoman Divacky //
10f22ef01cSRoman Divacky // This pass performs partial inlining, typically by inlining an if statement
11f22ef01cSRoman Divacky // that surrounds the body of the function.
12f22ef01cSRoman Divacky //
13f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
14f22ef01cSRoman Divacky 
153ca95b02SDimitry Andric #include "llvm/Transforms/IPO/PartialInlining.h"
162cab237bSDimitry Andric #include "llvm/ADT/DenseMap.h"
172cab237bSDimitry Andric #include "llvm/ADT/DenseSet.h"
182cab237bSDimitry Andric #include "llvm/ADT/None.h"
192cab237bSDimitry Andric #include "llvm/ADT/Optional.h"
202cab237bSDimitry Andric #include "llvm/ADT/STLExtras.h"
212cab237bSDimitry Andric #include "llvm/ADT/SmallVector.h"
22139f7f9bSDimitry Andric #include "llvm/ADT/Statistic.h"
23d88c1a5aSDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h"
24d88c1a5aSDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h"
25f37b6182SDimitry Andric #include "llvm/Analysis/InlineCost.h"
26d88c1a5aSDimitry Andric #include "llvm/Analysis/LoopInfo.h"
272cab237bSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
28f37b6182SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
29f37b6182SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
30f37b6182SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
312cab237bSDimitry Andric #include "llvm/IR/Attributes.h"
322cab237bSDimitry Andric #include "llvm/IR/BasicBlock.h"
3391bc56edSDimitry Andric #include "llvm/IR/CFG.h"
342cab237bSDimitry Andric #include "llvm/IR/CallSite.h"
352cab237bSDimitry Andric #include "llvm/IR/DebugLoc.h"
3651690af2SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
3791bc56edSDimitry Andric #include "llvm/IR/Dominators.h"
382cab237bSDimitry Andric #include "llvm/IR/Function.h"
392cab237bSDimitry Andric #include "llvm/IR/InstrTypes.h"
402cab237bSDimitry Andric #include "llvm/IR/Instruction.h"
41139f7f9bSDimitry Andric #include "llvm/IR/Instructions.h"
422cab237bSDimitry Andric #include "llvm/IR/IntrinsicInst.h"
432cab237bSDimitry Andric #include "llvm/IR/Intrinsics.h"
44139f7f9bSDimitry Andric #include "llvm/IR/Module.h"
452cab237bSDimitry Andric #include "llvm/IR/User.h"
46139f7f9bSDimitry Andric #include "llvm/Pass.h"
472cab237bSDimitry Andric #include "llvm/Support/BlockFrequency.h"
482cab237bSDimitry Andric #include "llvm/Support/BranchProbability.h"
492cab237bSDimitry Andric #include "llvm/Support/Casting.h"
502cab237bSDimitry Andric #include "llvm/Support/CommandLine.h"
512cab237bSDimitry Andric #include "llvm/Support/ErrorHandling.h"
523ca95b02SDimitry Andric #include "llvm/Transforms/IPO.h"
53f22ef01cSRoman Divacky #include "llvm/Transforms/Utils/Cloning.h"
547ae0e2c9SDimitry Andric #include "llvm/Transforms/Utils/CodeExtractor.h"
552cab237bSDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
562cab237bSDimitry Andric #include <algorithm>
572cab237bSDimitry Andric #include <cassert>
582cab237bSDimitry Andric #include <cstdint>
592cab237bSDimitry Andric #include <functional>
602cab237bSDimitry Andric #include <iterator>
612cab237bSDimitry Andric #include <memory>
622cab237bSDimitry Andric #include <tuple>
632cab237bSDimitry Andric #include <vector>
642cab237bSDimitry Andric 
65f22ef01cSRoman Divacky using namespace llvm;
66f22ef01cSRoman Divacky 
6751690af2SDimitry Andric #define DEBUG_TYPE "partial-inlining"
6891bc56edSDimitry Andric 
69f37b6182SDimitry Andric STATISTIC(NumPartialInlined,
70f37b6182SDimitry Andric           "Number of callsites functions partially inlined into.");
712cab237bSDimitry Andric STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
722cab237bSDimitry Andric                                         "cold outlined regions were partially "
732cab237bSDimitry Andric                                         "inlined into its caller(s).");
742cab237bSDimitry Andric STATISTIC(NumColdRegionsFound,
752cab237bSDimitry Andric            "Number of cold single entry/exit regions found.");
762cab237bSDimitry Andric STATISTIC(NumColdRegionsOutlined,
772cab237bSDimitry Andric            "Number of cold single entry/exit regions outlined.");
78f22ef01cSRoman Divacky 
7951690af2SDimitry Andric // Command line option to disable partial-inlining. The default is false:
8051690af2SDimitry Andric static cl::opt<bool>
8151690af2SDimitry Andric     DisablePartialInlining("disable-partial-inlining", cl::init(false),
822cab237bSDimitry Andric                            cl::Hidden, cl::desc("Disable partial inlining"));
832cab237bSDimitry Andric // Command line option to disable multi-region partial-inlining. The default is
842cab237bSDimitry Andric // false:
852cab237bSDimitry Andric static cl::opt<bool> DisableMultiRegionPartialInline(
862cab237bSDimitry Andric     "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
872cab237bSDimitry Andric     cl::desc("Disable multi-region partial inlining"));
882cab237bSDimitry Andric 
892cab237bSDimitry Andric // Command line option to force outlining in regions with live exit variables.
902cab237bSDimitry Andric // The default is false:
912cab237bSDimitry Andric static cl::opt<bool>
922cab237bSDimitry Andric     ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
932cab237bSDimitry Andric                cl::desc("Force outline regions with live exits"));
942cab237bSDimitry Andric 
952cab237bSDimitry Andric // Command line option to enable marking outline functions with Cold Calling
962cab237bSDimitry Andric // Convention. The default is false:
972cab237bSDimitry Andric static cl::opt<bool>
982cab237bSDimitry Andric     MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
992cab237bSDimitry Andric                        cl::desc("Mark outline function calls with ColdCC"));
1002cab237bSDimitry Andric 
1012cab237bSDimitry Andric #ifndef NDEBUG
1022cab237bSDimitry Andric // Command line option to debug partial-inlining. The default is none:
1032cab237bSDimitry Andric static cl::opt<bool> TracePartialInlining("trace-partial-inlining",
1042cab237bSDimitry Andric                                           cl::init(false), cl::Hidden,
1052cab237bSDimitry Andric                                           cl::desc("Trace partial inlining."));
1062cab237bSDimitry Andric #endif
1072cab237bSDimitry Andric 
1085517e702SDimitry Andric // This is an option used by testing:
1095517e702SDimitry Andric static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
1105517e702SDimitry Andric                                       cl::init(false), cl::ZeroOrMore,
1115517e702SDimitry Andric                                       cl::ReallyHidden,
1125517e702SDimitry Andric                                       cl::desc("Skip Cost Analysis"));
1132cab237bSDimitry Andric // Used to determine if a cold region is worth outlining based on
1142cab237bSDimitry Andric // its inlining cost compared to the original function.  Default is set at 10%.
1152cab237bSDimitry Andric // ie. if the cold region reduces the inlining cost of the original function by
1162cab237bSDimitry Andric // at least 10%.
1172cab237bSDimitry Andric static cl::opt<float> MinRegionSizeRatio(
1182cab237bSDimitry Andric     "min-region-size-ratio", cl::init(0.1), cl::Hidden,
1192cab237bSDimitry Andric     cl::desc("Minimum ratio comparing relative sizes of each "
1202cab237bSDimitry Andric              "outline candidate and original function"));
1212cab237bSDimitry Andric // Used to tune the minimum number of execution counts needed in the predecessor
1222cab237bSDimitry Andric // block to the cold edge. ie. confidence interval.
1232cab237bSDimitry Andric static cl::opt<unsigned>
1242cab237bSDimitry Andric     MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
1252cab237bSDimitry Andric                              cl::desc("Minimum block executions to consider "
1262cab237bSDimitry Andric                                       "its BranchProbabilityInfo valid"));
1272cab237bSDimitry Andric // Used to determine when an edge is considered cold. Default is set to 10%. ie.
1282cab237bSDimitry Andric // if the branch probability is 10% or less, then it is deemed as 'cold'.
1292cab237bSDimitry Andric static cl::opt<float> ColdBranchRatio(
1302cab237bSDimitry Andric     "cold-branch-ratio", cl::init(0.1), cl::Hidden,
1312cab237bSDimitry Andric     cl::desc("Minimum BranchProbability to consider a region cold."));
13251690af2SDimitry Andric 
133f37b6182SDimitry Andric static cl::opt<unsigned> MaxNumInlineBlocks(
134f37b6182SDimitry Andric     "max-num-inline-blocks", cl::init(5), cl::Hidden,
1352cab237bSDimitry Andric     cl::desc("Max number of blocks to be partially inlined"));
136f37b6182SDimitry Andric 
13751690af2SDimitry Andric // Command line option to set the maximum number of partial inlining allowed
13851690af2SDimitry Andric // for the module. The default value of -1 means no limit.
13951690af2SDimitry Andric static cl::opt<int> MaxNumPartialInlining(
14051690af2SDimitry Andric     "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
14151690af2SDimitry Andric     cl::desc("Max number of partial inlining. The default is unlimited"));
14251690af2SDimitry Andric 
1435517e702SDimitry Andric // Used only when PGO or user annotated branch data is absent. It is
1445517e702SDimitry Andric // the least value that is used to weigh the outline region. If BFI
1455517e702SDimitry Andric // produces larger value, the BFI value will be used.
1465517e702SDimitry Andric static cl::opt<int>
1475517e702SDimitry Andric     OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
1485517e702SDimitry Andric                              cl::Hidden, cl::ZeroOrMore,
1495517e702SDimitry Andric                              cl::desc("Relative frequency of outline region to "
1505517e702SDimitry Andric                                       "the entry block"));
1515517e702SDimitry Andric 
1526d97bb29SDimitry Andric static cl::opt<unsigned> ExtraOutliningPenalty(
1536d97bb29SDimitry Andric     "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
1546d97bb29SDimitry Andric     cl::desc("A debug option to add additional penalty to the computed one."));
1556d97bb29SDimitry Andric 
156f22ef01cSRoman Divacky namespace {
157f37b6182SDimitry Andric 
158f37b6182SDimitry Andric struct FunctionOutliningInfo {
1592cab237bSDimitry Andric   FunctionOutliningInfo() = default;
1602cab237bSDimitry Andric 
161f37b6182SDimitry Andric   // Returns the number of blocks to be inlined including all blocks
162f37b6182SDimitry Andric   // in Entries and one return block.
GetNumInlinedBlocks__anona2f48faf0111::FunctionOutliningInfo163f37b6182SDimitry Andric   unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
164f37b6182SDimitry Andric 
165f37b6182SDimitry Andric   // A set of blocks including the function entry that guard
166f37b6182SDimitry Andric   // the region to be outlined.
167f37b6182SDimitry Andric   SmallVector<BasicBlock *, 4> Entries;
1682cab237bSDimitry Andric 
169f37b6182SDimitry Andric   // The return block that is not included in the outlined region.
1702cab237bSDimitry Andric   BasicBlock *ReturnBlock = nullptr;
1712cab237bSDimitry Andric 
1726d97bb29SDimitry Andric   // The dominating block of the region to be outlined.
1732cab237bSDimitry Andric   BasicBlock *NonReturnBlock = nullptr;
1742cab237bSDimitry Andric 
175f37b6182SDimitry Andric   // The set of blocks in Entries that that are predecessors to ReturnBlock
176f37b6182SDimitry Andric   SmallVector<BasicBlock *, 4> ReturnBlockPreds;
177f37b6182SDimitry Andric };
178f37b6182SDimitry Andric 
1792cab237bSDimitry Andric struct FunctionOutliningMultiRegionInfo {
FunctionOutliningMultiRegionInfo__anona2f48faf0111::FunctionOutliningMultiRegionInfo1802cab237bSDimitry Andric   FunctionOutliningMultiRegionInfo()
1812cab237bSDimitry Andric       : ORI() {}
1822cab237bSDimitry Andric 
1832cab237bSDimitry Andric   // Container for outline regions
1842cab237bSDimitry Andric   struct OutlineRegionInfo {
OutlineRegionInfo__anona2f48faf0111::FunctionOutliningMultiRegionInfo::OutlineRegionInfo1852cab237bSDimitry Andric     OutlineRegionInfo(SmallVector<BasicBlock *, 8> Region,
1862cab237bSDimitry Andric                       BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1872cab237bSDimitry Andric                       BasicBlock *ReturnBlock)
1882cab237bSDimitry Andric         : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
1892cab237bSDimitry Andric           ReturnBlock(ReturnBlock) {}
1902cab237bSDimitry Andric     SmallVector<BasicBlock *, 8> Region;
1912cab237bSDimitry Andric     BasicBlock *EntryBlock;
1922cab237bSDimitry Andric     BasicBlock *ExitBlock;
1932cab237bSDimitry Andric     BasicBlock *ReturnBlock;
1942cab237bSDimitry Andric   };
1952cab237bSDimitry Andric 
1962cab237bSDimitry Andric   SmallVector<OutlineRegionInfo, 4> ORI;
1972cab237bSDimitry Andric };
1982cab237bSDimitry Andric 
199d88c1a5aSDimitry Andric struct PartialInlinerImpl {
2002cab237bSDimitry Andric 
PartialInlinerImpl__anona2f48faf0111::PartialInlinerImpl201f37b6182SDimitry Andric   PartialInlinerImpl(
202f37b6182SDimitry Andric       std::function<AssumptionCache &(Function &)> *GetAC,
203f37b6182SDimitry Andric       std::function<TargetTransformInfo &(Function &)> *GTTI,
204f37b6182SDimitry Andric       Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
2054ba319b5SDimitry Andric       ProfileSummaryInfo *ProfSI)
2064ba319b5SDimitry Andric       : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
2072cab237bSDimitry Andric 
208d88c1a5aSDimitry Andric   bool run(Module &M);
2092cab237bSDimitry Andric   // Main part of the transformation that calls helper functions to find
2102cab237bSDimitry Andric   // outlining candidates, clone & outline the function, and attempt to
2112cab237bSDimitry Andric   // partially inline the resulting function. Returns true if
2122cab237bSDimitry Andric   // inlining was successful, false otherwise.  Also returns the outline
2132cab237bSDimitry Andric   // function (only if we partially inlined early returns) as there is a
2142cab237bSDimitry Andric   // possibility to further "peel" early return statements that were left in the
2152cab237bSDimitry Andric   // outline function due to code size.
2162cab237bSDimitry Andric   std::pair<bool, Function *> unswitchFunction(Function *F);
217d88c1a5aSDimitry Andric 
2184ba319b5SDimitry Andric   // This class speculatively clones the function to be partial inlined.
21924d58133SDimitry Andric   // At the end of partial inlining, the remaining callsites to the cloned
22024d58133SDimitry Andric   // function that are not partially inlined will be fixed up to reference
22124d58133SDimitry Andric   // the original function, and the cloned function will be erased.
22224d58133SDimitry Andric   struct FunctionCloner {
2232cab237bSDimitry Andric     // Two constructors, one for single region outlining, the other for
2242cab237bSDimitry Andric     // multi-region outlining.
2252cab237bSDimitry Andric     FunctionCloner(Function *F, FunctionOutliningInfo *OI,
2262cab237bSDimitry Andric                    OptimizationRemarkEmitter &ORE);
2272cab237bSDimitry Andric     FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
2282cab237bSDimitry Andric                    OptimizationRemarkEmitter &ORE);
22924d58133SDimitry Andric     ~FunctionCloner();
23024d58133SDimitry Andric 
23124d58133SDimitry Andric     // Prepare for function outlining: making sure there is only
23224d58133SDimitry Andric     // one incoming edge from the extracted/outlined region to
23324d58133SDimitry Andric     // the return block.
23424d58133SDimitry Andric     void NormalizeReturnBlock();
23524d58133SDimitry Andric 
2362cab237bSDimitry Andric     // Do function outlining for cold regions.
2372cab237bSDimitry Andric     bool doMultiRegionFunctionOutlining();
2382cab237bSDimitry Andric     // Do function outlining for region after early return block(s).
2392cab237bSDimitry Andric     // NOTE: For vararg functions that do the vararg handling in the outlined
2402cab237bSDimitry Andric     //       function, we temporarily generate IR that does not properly
2412cab237bSDimitry Andric     //       forward varargs to the outlined function. Calling InlineFunction
2422cab237bSDimitry Andric     //       will update calls to the outlined functions to properly forward
2432cab237bSDimitry Andric     //       the varargs.
2442cab237bSDimitry Andric     Function *doSingleRegionFunctionOutlining();
24524d58133SDimitry Andric 
24624d58133SDimitry Andric     Function *OrigFunc = nullptr;
24724d58133SDimitry Andric     Function *ClonedFunc = nullptr;
2482cab237bSDimitry Andric 
2492cab237bSDimitry Andric     typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
2502cab237bSDimitry Andric     // Keep track of Outlined Functions and the basic block they're called from.
2512cab237bSDimitry Andric     SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
2522cab237bSDimitry Andric 
25324d58133SDimitry Andric     // ClonedFunc is inlined in one of its callers after function
25424d58133SDimitry Andric     // outlining.
25524d58133SDimitry Andric     bool IsFunctionInlined = false;
25624d58133SDimitry Andric     // The cost of the region to be outlined.
25724d58133SDimitry Andric     int OutlinedRegionCost = 0;
2582cab237bSDimitry Andric     // ClonedOI is specific to outlining non-early return blocks.
25924d58133SDimitry Andric     std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
2602cab237bSDimitry Andric     // ClonedOMRI is specific to outlining cold regions.
2612cab237bSDimitry Andric     std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
26224d58133SDimitry Andric     std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
2632cab237bSDimitry Andric     OptimizationRemarkEmitter &ORE;
26424d58133SDimitry Andric   };
26524d58133SDimitry Andric 
266f37b6182SDimitry Andric private:
267f37b6182SDimitry Andric   int NumPartialInlining = 0;
268f37b6182SDimitry Andric   std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
269f37b6182SDimitry Andric   std::function<TargetTransformInfo &(Function &)> *GetTTI;
270f37b6182SDimitry Andric   Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
271f37b6182SDimitry Andric   ProfileSummaryInfo *PSI;
272f37b6182SDimitry Andric 
2735517e702SDimitry Andric   // Return the frequency of the OutlininingBB relative to F's entry point.
2745517e702SDimitry Andric   // The result is no larger than 1 and is represented using BP.
2755517e702SDimitry Andric   // (Note that the outlined region's 'head' block can only have incoming
2765517e702SDimitry Andric   // edges from the guarding entry blocks).
27724d58133SDimitry Andric   BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
2785517e702SDimitry Andric 
2795517e702SDimitry Andric   // Return true if the callee of CS should be partially inlined with
2805517e702SDimitry Andric   // profit.
28124d58133SDimitry Andric   bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
2824ba319b5SDimitry Andric                            BlockFrequency WeightedOutliningRcost,
2834ba319b5SDimitry Andric                            OptimizationRemarkEmitter &ORE);
2845517e702SDimitry Andric 
2855517e702SDimitry Andric   // Try to inline DuplicateFunction (cloned from F with call to
2865517e702SDimitry Andric   // the OutlinedFunction into its callers. Return true
2875517e702SDimitry Andric   // if there is any successful inlining.
28824d58133SDimitry Andric   bool tryPartialInline(FunctionCloner &Cloner);
2895517e702SDimitry Andric 
2905517e702SDimitry Andric   // Compute the mapping from use site of DuplicationFunction to the enclosing
2915517e702SDimitry Andric   // BB's profile count.
2925517e702SDimitry Andric   void computeCallsiteToProfCountMap(Function *DuplicateFunction,
2935517e702SDimitry Andric                                      DenseMap<User *, uint64_t> &SiteCountMap);
2945517e702SDimitry Andric 
IsLimitReached__anona2f48faf0111::PartialInlinerImpl29551690af2SDimitry Andric   bool IsLimitReached() {
29651690af2SDimitry Andric     return (MaxNumPartialInlining != -1 &&
29751690af2SDimitry Andric             NumPartialInlining >= MaxNumPartialInlining);
29851690af2SDimitry Andric   }
2995517e702SDimitry Andric 
getCallSite__anona2f48faf0111::PartialInlinerImpl30024d58133SDimitry Andric   static CallSite getCallSite(User *U) {
3015517e702SDimitry Andric     CallSite CS;
3025517e702SDimitry Andric     if (CallInst *CI = dyn_cast<CallInst>(U))
3035517e702SDimitry Andric       CS = CallSite(CI);
3045517e702SDimitry Andric     else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
3055517e702SDimitry Andric       CS = CallSite(II);
3065517e702SDimitry Andric     else
3075517e702SDimitry Andric       llvm_unreachable("All uses must be calls");
3085517e702SDimitry Andric     return CS;
3095517e702SDimitry Andric   }
3105517e702SDimitry Andric 
getOneCallSiteTo__anona2f48faf0111::PartialInlinerImpl31124d58133SDimitry Andric   static CallSite getOneCallSiteTo(Function *F) {
3125517e702SDimitry Andric     User *User = *F->user_begin();
3135517e702SDimitry Andric     return getCallSite(User);
3145517e702SDimitry Andric   }
3155517e702SDimitry Andric 
getOneDebugLoc__anona2f48faf0111::PartialInlinerImpl3165517e702SDimitry Andric   std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
3175517e702SDimitry Andric     CallSite CS = getOneCallSiteTo(F);
3185517e702SDimitry Andric     DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
3195517e702SDimitry Andric     BasicBlock *Block = CS.getParent();
3205517e702SDimitry Andric     return std::make_tuple(DLoc, Block);
3215517e702SDimitry Andric   }
3225517e702SDimitry Andric 
3235517e702SDimitry Andric   // Returns the costs associated with function outlining:
3245517e702SDimitry Andric   // - The first value is the non-weighted runtime cost for making the call
32524d58133SDimitry Andric   //   to the outlined function, including the addtional  setup cost in the
32624d58133SDimitry Andric   //    outlined function itself;
3275517e702SDimitry Andric   // - The second value is the estimated size of the new call sequence in
32824d58133SDimitry Andric   //   basic block Cloner.OutliningCallBB;
32924d58133SDimitry Andric   std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
3302cab237bSDimitry Andric 
3315517e702SDimitry Andric   // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
3325517e702SDimitry Andric   // approximate both the size and runtime cost (Note that in the current
3335517e702SDimitry Andric   // inline cost analysis, there is no clear distinction there either).
33424d58133SDimitry Andric   static int computeBBInlineCost(BasicBlock *BB);
3355517e702SDimitry Andric 
3365517e702SDimitry Andric   std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
3372cab237bSDimitry Andric   std::unique_ptr<FunctionOutliningMultiRegionInfo>
3384ba319b5SDimitry Andric   computeOutliningColdRegionsInfo(Function *F, OptimizationRemarkEmitter &ORE);
339d88c1a5aSDimitry Andric };
340f37b6182SDimitry Andric 
3413ca95b02SDimitry Andric struct PartialInlinerLegacyPass : public ModulePass {
342f22ef01cSRoman Divacky   static char ID; // Pass identification, replacement for typeid
3432cab237bSDimitry Andric 
PartialInlinerLegacyPass__anona2f48faf0111::PartialInlinerLegacyPass3443ca95b02SDimitry Andric   PartialInlinerLegacyPass() : ModulePass(ID) {
3453ca95b02SDimitry Andric     initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
3462754fe60SDimitry Andric   }
347f22ef01cSRoman Divacky 
getAnalysisUsage__anona2f48faf0111::PartialInlinerLegacyPass348d88c1a5aSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
349d88c1a5aSDimitry Andric     AU.addRequired<AssumptionCacheTracker>();
350f37b6182SDimitry Andric     AU.addRequired<ProfileSummaryInfoWrapperPass>();
351f37b6182SDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
352d88c1a5aSDimitry Andric   }
3532cab237bSDimitry Andric 
runOnModule__anona2f48faf0111::PartialInlinerLegacyPass3543ca95b02SDimitry Andric   bool runOnModule(Module &M) override {
3553ca95b02SDimitry Andric     if (skipModule(M))
3563ca95b02SDimitry Andric       return false;
357f22ef01cSRoman Divacky 
358d88c1a5aSDimitry Andric     AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
359f37b6182SDimitry Andric     TargetTransformInfoWrapperPass *TTIWP =
360f37b6182SDimitry Andric         &getAnalysis<TargetTransformInfoWrapperPass>();
361f37b6182SDimitry Andric     ProfileSummaryInfo *PSI =
362*b5893f02SDimitry Andric         &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
363f37b6182SDimitry Andric 
364d88c1a5aSDimitry Andric     std::function<AssumptionCache &(Function &)> GetAssumptionCache =
365d88c1a5aSDimitry Andric         [&ACT](Function &F) -> AssumptionCache & {
366d88c1a5aSDimitry Andric       return ACT->getAssumptionCache(F);
367d88c1a5aSDimitry Andric     };
368f37b6182SDimitry Andric 
369f37b6182SDimitry Andric     std::function<TargetTransformInfo &(Function &)> GetTTI =
370f37b6182SDimitry Andric         [&TTIWP](Function &F) -> TargetTransformInfo & {
371f37b6182SDimitry Andric       return TTIWP->getTTI(F);
372f37b6182SDimitry Andric     };
373f37b6182SDimitry Andric 
3744ba319b5SDimitry Andric     return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI)
3752cab237bSDimitry Andric         .run(M);
376d88c1a5aSDimitry Andric   }
377f22ef01cSRoman Divacky };
3782cab237bSDimitry Andric 
3792cab237bSDimitry Andric } // end anonymous namespace
3802cab237bSDimitry Andric 
3812cab237bSDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo>
computeOutliningColdRegionsInfo(Function * F,OptimizationRemarkEmitter & ORE)3824ba319b5SDimitry Andric PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
3834ba319b5SDimitry Andric                                                     OptimizationRemarkEmitter &ORE) {
3842cab237bSDimitry Andric   BasicBlock *EntryBlock = &F->front();
3852cab237bSDimitry Andric 
3862cab237bSDimitry Andric   DominatorTree DT(*F);
3872cab237bSDimitry Andric   LoopInfo LI(DT);
3882cab237bSDimitry Andric   BranchProbabilityInfo BPI(*F, LI);
3892cab237bSDimitry Andric   std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
3902cab237bSDimitry Andric   BlockFrequencyInfo *BFI;
3912cab237bSDimitry Andric   if (!GetBFI) {
3922cab237bSDimitry Andric     ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI));
3932cab237bSDimitry Andric     BFI = ScopedBFI.get();
3942cab237bSDimitry Andric   } else
3952cab237bSDimitry Andric     BFI = &(*GetBFI)(*F);
3962cab237bSDimitry Andric 
3972cab237bSDimitry Andric   // Return if we don't have profiling information.
3982cab237bSDimitry Andric   if (!PSI->hasInstrumentationProfile())
3992cab237bSDimitry Andric     return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
4002cab237bSDimitry Andric 
4012cab237bSDimitry Andric   std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
4022cab237bSDimitry Andric       llvm::make_unique<FunctionOutliningMultiRegionInfo>();
4032cab237bSDimitry Andric 
4042cab237bSDimitry Andric   auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
4052cab237bSDimitry Andric     BasicBlock *Dom = BlockList.front();
406*b5893f02SDimitry Andric     return BlockList.size() > 1 && Dom->hasNPredecessors(1);
4072cab237bSDimitry Andric   };
4082cab237bSDimitry Andric 
4092cab237bSDimitry Andric   auto IsSingleExit =
4102cab237bSDimitry Andric       [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
4112cab237bSDimitry Andric     BasicBlock *ExitBlock = nullptr;
4122cab237bSDimitry Andric     for (auto *Block : BlockList) {
4132cab237bSDimitry Andric       for (auto SI = succ_begin(Block); SI != succ_end(Block); ++SI) {
4142cab237bSDimitry Andric         if (!is_contained(BlockList, *SI)) {
4152cab237bSDimitry Andric           if (ExitBlock) {
4162cab237bSDimitry Andric             ORE.emit([&]() {
4172cab237bSDimitry Andric               return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
4182cab237bSDimitry Andric                                               &SI->front())
4192cab237bSDimitry Andric                      << "Region dominated by "
4202cab237bSDimitry Andric                      << ore::NV("Block", BlockList.front()->getName())
4212cab237bSDimitry Andric                      << " has more than one region exit edge.";
4222cab237bSDimitry Andric             });
4232cab237bSDimitry Andric             return nullptr;
4242cab237bSDimitry Andric           } else
4252cab237bSDimitry Andric             ExitBlock = Block;
4262cab237bSDimitry Andric         }
4272cab237bSDimitry Andric       }
4282cab237bSDimitry Andric     }
4292cab237bSDimitry Andric     return ExitBlock;
4302cab237bSDimitry Andric   };
4312cab237bSDimitry Andric 
4322cab237bSDimitry Andric   auto BBProfileCount = [BFI](BasicBlock *BB) {
4332cab237bSDimitry Andric     return BFI->getBlockProfileCount(BB)
4342cab237bSDimitry Andric                ? BFI->getBlockProfileCount(BB).getValue()
4352cab237bSDimitry Andric                : 0;
4362cab237bSDimitry Andric   };
4372cab237bSDimitry Andric 
4382cab237bSDimitry Andric   // Use the same computeBBInlineCost function to compute the cost savings of
4392cab237bSDimitry Andric   // the outlining the candidate region.
4402cab237bSDimitry Andric   int OverallFunctionCost = 0;
4412cab237bSDimitry Andric   for (auto &BB : *F)
4422cab237bSDimitry Andric     OverallFunctionCost += computeBBInlineCost(&BB);
4432cab237bSDimitry Andric 
4442cab237bSDimitry Andric #ifndef NDEBUG
4452cab237bSDimitry Andric   if (TracePartialInlining)
4462cab237bSDimitry Andric     dbgs() << "OverallFunctionCost = " << OverallFunctionCost << "\n";
4472cab237bSDimitry Andric #endif
4482cab237bSDimitry Andric   int MinOutlineRegionCost =
4492cab237bSDimitry Andric       static_cast<int>(OverallFunctionCost * MinRegionSizeRatio);
4502cab237bSDimitry Andric   BranchProbability MinBranchProbability(
4512cab237bSDimitry Andric       static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
4522cab237bSDimitry Andric       MinBlockCounterExecution);
4532cab237bSDimitry Andric   bool ColdCandidateFound = false;
4542cab237bSDimitry Andric   BasicBlock *CurrEntry = EntryBlock;
4552cab237bSDimitry Andric   std::vector<BasicBlock *> DFS;
4562cab237bSDimitry Andric   DenseMap<BasicBlock *, bool> VisitedMap;
4572cab237bSDimitry Andric   DFS.push_back(CurrEntry);
4582cab237bSDimitry Andric   VisitedMap[CurrEntry] = true;
4592cab237bSDimitry Andric   // Use Depth First Search on the basic blocks to find CFG edges that are
4602cab237bSDimitry Andric   // considered cold.
4612cab237bSDimitry Andric   // Cold regions considered must also have its inline cost compared to the
4622cab237bSDimitry Andric   // overall inline cost of the original function.  The region is outlined only
4632cab237bSDimitry Andric   // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
4642cab237bSDimitry Andric   // more.
4652cab237bSDimitry Andric   while (!DFS.empty()) {
4662cab237bSDimitry Andric     auto *thisBB = DFS.back();
4672cab237bSDimitry Andric     DFS.pop_back();
4682cab237bSDimitry Andric     // Only consider regions with predecessor blocks that are considered
4692cab237bSDimitry Andric     // not-cold (default: part of the top 99.99% of all block counters)
4702cab237bSDimitry Andric     // AND greater than our minimum block execution count (default: 100).
471*b5893f02SDimitry Andric     if (PSI->isColdBlock(thisBB, BFI) ||
4722cab237bSDimitry Andric         BBProfileCount(thisBB) < MinBlockCounterExecution)
4732cab237bSDimitry Andric       continue;
4742cab237bSDimitry Andric     for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) {
4752cab237bSDimitry Andric       if (VisitedMap[*SI])
4762cab237bSDimitry Andric         continue;
4772cab237bSDimitry Andric       VisitedMap[*SI] = true;
4782cab237bSDimitry Andric       DFS.push_back(*SI);
4792cab237bSDimitry Andric       // If branch isn't cold, we skip to the next one.
4802cab237bSDimitry Andric       BranchProbability SuccProb = BPI.getEdgeProbability(thisBB, *SI);
4812cab237bSDimitry Andric       if (SuccProb > MinBranchProbability)
4822cab237bSDimitry Andric         continue;
4832cab237bSDimitry Andric #ifndef NDEBUG
4842cab237bSDimitry Andric       if (TracePartialInlining) {
4852cab237bSDimitry Andric         dbgs() << "Found cold edge: " << thisBB->getName() << "->"
4862cab237bSDimitry Andric                << (*SI)->getName() << "\nBranch Probability = " << SuccProb
4872cab237bSDimitry Andric                << "\n";
4882cab237bSDimitry Andric       }
4892cab237bSDimitry Andric #endif
4902cab237bSDimitry Andric       SmallVector<BasicBlock *, 8> DominateVector;
4912cab237bSDimitry Andric       DT.getDescendants(*SI, DominateVector);
4922cab237bSDimitry Andric       // We can only outline single entry regions (for now).
4932cab237bSDimitry Andric       if (!IsSingleEntry(DominateVector))
4942cab237bSDimitry Andric         continue;
4952cab237bSDimitry Andric       BasicBlock *ExitBlock = nullptr;
4962cab237bSDimitry Andric       // We can only outline single exit regions (for now).
4972cab237bSDimitry Andric       if (!(ExitBlock = IsSingleExit(DominateVector)))
4982cab237bSDimitry Andric         continue;
4992cab237bSDimitry Andric       int OutlineRegionCost = 0;
5002cab237bSDimitry Andric       for (auto *BB : DominateVector)
5012cab237bSDimitry Andric         OutlineRegionCost += computeBBInlineCost(BB);
5022cab237bSDimitry Andric 
5032cab237bSDimitry Andric #ifndef NDEBUG
5042cab237bSDimitry Andric       if (TracePartialInlining)
5052cab237bSDimitry Andric         dbgs() << "OutlineRegionCost = " << OutlineRegionCost << "\n";
5062cab237bSDimitry Andric #endif
5072cab237bSDimitry Andric 
5082cab237bSDimitry Andric       if (OutlineRegionCost < MinOutlineRegionCost) {
5092cab237bSDimitry Andric         ORE.emit([&]() {
5102cab237bSDimitry Andric           return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
5112cab237bSDimitry Andric                                             &SI->front())
5122cab237bSDimitry Andric                  << ore::NV("Callee", F) << " inline cost-savings smaller than "
5132cab237bSDimitry Andric                  << ore::NV("Cost", MinOutlineRegionCost);
5142cab237bSDimitry Andric         });
5152cab237bSDimitry Andric         continue;
5162cab237bSDimitry Andric       }
5172cab237bSDimitry Andric       // For now, ignore blocks that belong to a SISE region that is a
5182cab237bSDimitry Andric       // candidate for outlining.  In the future, we may want to look
5192cab237bSDimitry Andric       // at inner regions because the outer region may have live-exit
5202cab237bSDimitry Andric       // variables.
5212cab237bSDimitry Andric       for (auto *BB : DominateVector)
5222cab237bSDimitry Andric         VisitedMap[BB] = true;
5232cab237bSDimitry Andric       // ReturnBlock here means the block after the outline call
5242cab237bSDimitry Andric       BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
5252cab237bSDimitry Andric       // assert(ReturnBlock && "ReturnBlock is NULL somehow!");
5262cab237bSDimitry Andric       FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
5272cab237bSDimitry Andric           DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
5282cab237bSDimitry Andric       RegInfo.Region = DominateVector;
5292cab237bSDimitry Andric       OutliningInfo->ORI.push_back(RegInfo);
5302cab237bSDimitry Andric #ifndef NDEBUG
5312cab237bSDimitry Andric       if (TracePartialInlining) {
5322cab237bSDimitry Andric         dbgs() << "Found Cold Candidate starting at block: "
5332cab237bSDimitry Andric                << DominateVector.front()->getName() << "\n";
5342cab237bSDimitry Andric       }
5352cab237bSDimitry Andric #endif
5362cab237bSDimitry Andric       ColdCandidateFound = true;
5372cab237bSDimitry Andric       NumColdRegionsFound++;
5382cab237bSDimitry Andric     }
5392cab237bSDimitry Andric   }
5402cab237bSDimitry Andric   if (ColdCandidateFound)
5412cab237bSDimitry Andric     return OutliningInfo;
5422cab237bSDimitry Andric   else
5432cab237bSDimitry Andric     return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
5443dac3a9bSDimitry Andric }
545f22ef01cSRoman Divacky 
546f37b6182SDimitry Andric std::unique_ptr<FunctionOutliningInfo>
computeOutliningInfo(Function * F)547f37b6182SDimitry Andric PartialInlinerImpl::computeOutliningInfo(Function *F) {
548d88c1a5aSDimitry Andric   BasicBlock *EntryBlock = &F->front();
549d88c1a5aSDimitry Andric   BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
550f22ef01cSRoman Divacky   if (!BR || BR->isUnconditional())
551f37b6182SDimitry Andric     return std::unique_ptr<FunctionOutliningInfo>();
552f22ef01cSRoman Divacky 
553f37b6182SDimitry Andric   // Returns true if Succ is BB's successor
554f37b6182SDimitry Andric   auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
555f37b6182SDimitry Andric     return is_contained(successors(BB), Succ);
556f37b6182SDimitry Andric   };
557f37b6182SDimitry Andric 
558f37b6182SDimitry Andric   auto IsReturnBlock = [](BasicBlock *BB) {
559*b5893f02SDimitry Andric     Instruction *TI = BB->getTerminator();
560f37b6182SDimitry Andric     return isa<ReturnInst>(TI);
561f37b6182SDimitry Andric   };
562f37b6182SDimitry Andric 
5635517e702SDimitry Andric   auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
564f37b6182SDimitry Andric     if (IsReturnBlock(Succ1))
565f37b6182SDimitry Andric       return std::make_tuple(Succ1, Succ2);
566f37b6182SDimitry Andric     if (IsReturnBlock(Succ2))
567f37b6182SDimitry Andric       return std::make_tuple(Succ2, Succ1);
568f37b6182SDimitry Andric 
569f37b6182SDimitry Andric     return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
570f37b6182SDimitry Andric   };
571f37b6182SDimitry Andric 
572f37b6182SDimitry Andric   // Detect a triangular shape:
5735517e702SDimitry Andric   auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
574f37b6182SDimitry Andric     if (IsSuccessor(Succ1, Succ2))
575f37b6182SDimitry Andric       return std::make_tuple(Succ1, Succ2);
576f37b6182SDimitry Andric     if (IsSuccessor(Succ2, Succ1))
577f37b6182SDimitry Andric       return std::make_tuple(Succ2, Succ1);
578f37b6182SDimitry Andric 
579f37b6182SDimitry Andric     return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
580f37b6182SDimitry Andric   };
581f37b6182SDimitry Andric 
582f37b6182SDimitry Andric   std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
583f37b6182SDimitry Andric       llvm::make_unique<FunctionOutliningInfo>();
584f37b6182SDimitry Andric 
585f37b6182SDimitry Andric   BasicBlock *CurrEntry = EntryBlock;
586f37b6182SDimitry Andric   bool CandidateFound = false;
587f37b6182SDimitry Andric   do {
588f37b6182SDimitry Andric     // The number of blocks to be inlined has already reached
589f37b6182SDimitry Andric     // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
590f37b6182SDimitry Andric     // disables partial inlining for the function.
591f37b6182SDimitry Andric     if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
592f37b6182SDimitry Andric       break;
593f37b6182SDimitry Andric 
5944ba319b5SDimitry Andric     if (succ_size(CurrEntry) != 2)
595f37b6182SDimitry Andric       break;
596f37b6182SDimitry Andric 
597f37b6182SDimitry Andric     BasicBlock *Succ1 = *succ_begin(CurrEntry);
598f37b6182SDimitry Andric     BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
599f37b6182SDimitry Andric 
600f37b6182SDimitry Andric     BasicBlock *ReturnBlock, *NonReturnBlock;
601f37b6182SDimitry Andric     std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
602f37b6182SDimitry Andric 
603f37b6182SDimitry Andric     if (ReturnBlock) {
604f37b6182SDimitry Andric       OutliningInfo->Entries.push_back(CurrEntry);
605f37b6182SDimitry Andric       OutliningInfo->ReturnBlock = ReturnBlock;
606f37b6182SDimitry Andric       OutliningInfo->NonReturnBlock = NonReturnBlock;
607f37b6182SDimitry Andric       CandidateFound = true;
608f37b6182SDimitry Andric       break;
609ff0cc061SDimitry Andric     }
610f22ef01cSRoman Divacky 
611f37b6182SDimitry Andric     BasicBlock *CommSucc;
612f37b6182SDimitry Andric     BasicBlock *OtherSucc;
613f37b6182SDimitry Andric     std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
614f37b6182SDimitry Andric 
615f37b6182SDimitry Andric     if (!CommSucc)
616f37b6182SDimitry Andric       break;
617f37b6182SDimitry Andric 
618f37b6182SDimitry Andric     OutliningInfo->Entries.push_back(CurrEntry);
619f37b6182SDimitry Andric     CurrEntry = OtherSucc;
620f37b6182SDimitry Andric   } while (true);
621f37b6182SDimitry Andric 
622f37b6182SDimitry Andric   if (!CandidateFound)
623f37b6182SDimitry Andric     return std::unique_ptr<FunctionOutliningInfo>();
624f37b6182SDimitry Andric 
625f37b6182SDimitry Andric   // Do sanity check of the entries: threre should not
626f37b6182SDimitry Andric   // be any successors (not in the entry set) other than
627f37b6182SDimitry Andric   // {ReturnBlock, NonReturnBlock}
6285517e702SDimitry Andric   assert(OutliningInfo->Entries[0] == &F->front() &&
6295517e702SDimitry Andric          "Function Entry must be the first in Entries vector");
630f37b6182SDimitry Andric   DenseSet<BasicBlock *> Entries;
631f37b6182SDimitry Andric   for (BasicBlock *E : OutliningInfo->Entries)
632f37b6182SDimitry Andric     Entries.insert(E);
633f37b6182SDimitry Andric 
634f37b6182SDimitry Andric   // Returns true of BB has Predecessor which is not
635f37b6182SDimitry Andric   // in Entries set.
636f37b6182SDimitry Andric   auto HasNonEntryPred = [Entries](BasicBlock *BB) {
637f37b6182SDimitry Andric     for (auto Pred : predecessors(BB)) {
638f37b6182SDimitry Andric       if (!Entries.count(Pred))
639f37b6182SDimitry Andric         return true;
640f37b6182SDimitry Andric     }
641f37b6182SDimitry Andric     return false;
642f37b6182SDimitry Andric   };
643f37b6182SDimitry Andric   auto CheckAndNormalizeCandidate =
644f37b6182SDimitry Andric       [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
645f37b6182SDimitry Andric         for (BasicBlock *E : OutliningInfo->Entries) {
646f37b6182SDimitry Andric           for (auto Succ : successors(E)) {
647f37b6182SDimitry Andric             if (Entries.count(Succ))
648f37b6182SDimitry Andric               continue;
649f37b6182SDimitry Andric             if (Succ == OutliningInfo->ReturnBlock)
650f37b6182SDimitry Andric               OutliningInfo->ReturnBlockPreds.push_back(E);
651f37b6182SDimitry Andric             else if (Succ != OutliningInfo->NonReturnBlock)
652f37b6182SDimitry Andric               return false;
653f37b6182SDimitry Andric           }
654f37b6182SDimitry Andric           // There should not be any outside incoming edges either:
655f37b6182SDimitry Andric           if (HasNonEntryPred(E))
656f37b6182SDimitry Andric             return false;
657f37b6182SDimitry Andric         }
658f37b6182SDimitry Andric         return true;
659f37b6182SDimitry Andric       };
660f37b6182SDimitry Andric 
661f37b6182SDimitry Andric   if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
662f37b6182SDimitry Andric     return std::unique_ptr<FunctionOutliningInfo>();
663f37b6182SDimitry Andric 
664f37b6182SDimitry Andric   // Now further growing the candidate's inlining region by
665f37b6182SDimitry Andric   // peeling off dominating blocks from the outlining region:
666f37b6182SDimitry Andric   while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
667f37b6182SDimitry Andric     BasicBlock *Cand = OutliningInfo->NonReturnBlock;
6684ba319b5SDimitry Andric     if (succ_size(Cand) != 2)
669f37b6182SDimitry Andric       break;
670f37b6182SDimitry Andric 
671f37b6182SDimitry Andric     if (HasNonEntryPred(Cand))
672f37b6182SDimitry Andric       break;
673f37b6182SDimitry Andric 
674f37b6182SDimitry Andric     BasicBlock *Succ1 = *succ_begin(Cand);
675f37b6182SDimitry Andric     BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
676f37b6182SDimitry Andric 
677f37b6182SDimitry Andric     BasicBlock *ReturnBlock, *NonReturnBlock;
678f37b6182SDimitry Andric     std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
679f37b6182SDimitry Andric     if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
680f37b6182SDimitry Andric       break;
681f37b6182SDimitry Andric 
682f37b6182SDimitry Andric     if (NonReturnBlock->getSinglePredecessor() != Cand)
683f37b6182SDimitry Andric       break;
684f37b6182SDimitry Andric 
685f37b6182SDimitry Andric     // Now grow and update OutlininigInfo:
686f37b6182SDimitry Andric     OutliningInfo->Entries.push_back(Cand);
687f37b6182SDimitry Andric     OutliningInfo->NonReturnBlock = NonReturnBlock;
688f37b6182SDimitry Andric     OutliningInfo->ReturnBlockPreds.push_back(Cand);
689f37b6182SDimitry Andric     Entries.insert(Cand);
690f37b6182SDimitry Andric   }
691f37b6182SDimitry Andric 
692f37b6182SDimitry Andric   return OutliningInfo;
693f37b6182SDimitry Andric }
694f37b6182SDimitry Andric 
6955517e702SDimitry Andric // Check if there is PGO data or user annoated branch data:
hasProfileData(Function * F,FunctionOutliningInfo * OI)6965517e702SDimitry Andric static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
697da09e106SDimitry Andric   if (F->hasProfileData())
6985517e702SDimitry Andric     return true;
6995517e702SDimitry Andric   // Now check if any of the entry block has MD_prof data:
7005517e702SDimitry Andric   for (auto *E : OI->Entries) {
7015517e702SDimitry Andric     BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
7025517e702SDimitry Andric     if (!BR || BR->isUnconditional())
7035517e702SDimitry Andric       continue;
7045517e702SDimitry Andric     uint64_t T, F;
7055517e702SDimitry Andric     if (BR->extractProfMetadata(T, F))
7065517e702SDimitry Andric       return true;
7075517e702SDimitry Andric   }
7085517e702SDimitry Andric   return false;
7095517e702SDimitry Andric }
7105517e702SDimitry Andric 
71124d58133SDimitry Andric BranchProbability
getOutliningCallBBRelativeFreq(FunctionCloner & Cloner)71224d58133SDimitry Andric PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
7132cab237bSDimitry Andric   BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
7145517e702SDimitry Andric   auto EntryFreq =
71524d58133SDimitry Andric       Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
71624d58133SDimitry Andric   auto OutliningCallFreq =
7172cab237bSDimitry Andric       Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
7182cab237bSDimitry Andric   // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
7192cab237bSDimitry Andric   // we outlined any regions, so we may encounter situations where the
7202cab237bSDimitry Andric   // OutliningCallFreq is *slightly* bigger than the EntryFreq.
7212cab237bSDimitry Andric   if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) {
7222cab237bSDimitry Andric     OutliningCallFreq = EntryFreq;
7232cab237bSDimitry Andric   }
7242cab237bSDimitry Andric   auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
7252cab237bSDimitry Andric       OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
7265517e702SDimitry Andric 
72724d58133SDimitry Andric   if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
7285517e702SDimitry Andric     return OutlineRegionRelFreq;
7295517e702SDimitry Andric 
7306d97bb29SDimitry Andric   // When profile data is not available, we need to be conservative in
7316d97bb29SDimitry Andric   // estimating the overall savings. Static branch prediction can usually
7326d97bb29SDimitry Andric   // guess the branch direction right (taken/non-taken), but the guessed
7336d97bb29SDimitry Andric   // branch probability is usually not biased enough. In case when the
7346d97bb29SDimitry Andric   // outlined region is predicted to be likely, its probability needs
7356d97bb29SDimitry Andric   // to be made higher (more biased) to not under-estimate the cost of
7366d97bb29SDimitry Andric   // function outlining. On the other hand, if the outlined region
7376d97bb29SDimitry Andric   // is predicted to be less likely, the predicted probablity is usually
7386d97bb29SDimitry Andric   // higher than the actual. For instance, the actual probability of the
7396d97bb29SDimitry Andric   // less likely target is only 5%, but the guessed probablity can be
7406d97bb29SDimitry Andric   // 40%. In the latter case, there is no need for further adjustement.
7416d97bb29SDimitry Andric   // FIXME: add an option for this.
7426d97bb29SDimitry Andric   if (OutlineRegionRelFreq < BranchProbability(45, 100))
7436d97bb29SDimitry Andric     return OutlineRegionRelFreq;
7446d97bb29SDimitry Andric 
7456d97bb29SDimitry Andric   OutlineRegionRelFreq = std::max(
7466d97bb29SDimitry Andric       OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
7475517e702SDimitry Andric 
7485517e702SDimitry Andric   return OutlineRegionRelFreq;
7495517e702SDimitry Andric }
7505517e702SDimitry Andric 
shouldPartialInline(CallSite CS,FunctionCloner & Cloner,BlockFrequency WeightedOutliningRcost,OptimizationRemarkEmitter & ORE)7515517e702SDimitry Andric bool PartialInlinerImpl::shouldPartialInline(
7522cab237bSDimitry Andric     CallSite CS, FunctionCloner &Cloner,
7534ba319b5SDimitry Andric     BlockFrequency WeightedOutliningRcost,
7544ba319b5SDimitry Andric     OptimizationRemarkEmitter &ORE) {
755f37b6182SDimitry Andric   using namespace ore;
7562cab237bSDimitry Andric 
757f37b6182SDimitry Andric   Instruction *Call = CS.getInstruction();
758f37b6182SDimitry Andric   Function *Callee = CS.getCalledFunction();
75924d58133SDimitry Andric   assert(Callee == Cloner.ClonedFunc);
76024d58133SDimitry Andric 
7614ba319b5SDimitry Andric   if (SkipCostAnalysis)
7624ba319b5SDimitry Andric     return isInlineViable(*Callee);
7634ba319b5SDimitry Andric 
764f37b6182SDimitry Andric   Function *Caller = CS.getCaller();
765f37b6182SDimitry Andric   auto &CalleeTTI = (*GetTTI)(*Callee);
766f37b6182SDimitry Andric   InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
7672cab237bSDimitry Andric                                 *GetAssumptionCache, GetBFI, PSI, &ORE);
768f37b6182SDimitry Andric 
769f37b6182SDimitry Andric   if (IC.isAlways()) {
7702cab237bSDimitry Andric     ORE.emit([&]() {
7712cab237bSDimitry Andric       return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
77224d58133SDimitry Andric              << NV("Callee", Cloner.OrigFunc)
7732cab237bSDimitry Andric              << " should always be fully inlined, not partially";
7742cab237bSDimitry Andric     });
775f37b6182SDimitry Andric     return false;
776f37b6182SDimitry Andric   }
777f37b6182SDimitry Andric 
778f37b6182SDimitry Andric   if (IC.isNever()) {
7792cab237bSDimitry Andric     ORE.emit([&]() {
7802cab237bSDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
78124d58133SDimitry Andric              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
782f37b6182SDimitry Andric              << NV("Caller", Caller)
7832cab237bSDimitry Andric              << " because it should never be inlined (cost=never)";
7842cab237bSDimitry Andric     });
785f37b6182SDimitry Andric     return false;
786f37b6182SDimitry Andric   }
787f37b6182SDimitry Andric 
788f37b6182SDimitry Andric   if (!IC) {
7892cab237bSDimitry Andric     ORE.emit([&]() {
7902cab237bSDimitry Andric       return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
79124d58133SDimitry Andric              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
792f37b6182SDimitry Andric              << NV("Caller", Caller) << " because too costly to inline (cost="
793f37b6182SDimitry Andric              << NV("Cost", IC.getCost()) << ", threshold="
7942cab237bSDimitry Andric              << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
7952cab237bSDimitry Andric     });
796f37b6182SDimitry Andric     return false;
797f37b6182SDimitry Andric   }
7985517e702SDimitry Andric   const DataLayout &DL = Caller->getParent()->getDataLayout();
79924d58133SDimitry Andric 
8005517e702SDimitry Andric   // The savings of eliminating the call:
8015517e702SDimitry Andric   int NonWeightedSavings = getCallsiteCost(CS, DL);
8025517e702SDimitry Andric   BlockFrequency NormWeightedSavings(NonWeightedSavings);
8035517e702SDimitry Andric 
8045517e702SDimitry Andric   // Weighted saving is smaller than weighted cost, return false
80524d58133SDimitry Andric   if (NormWeightedSavings < WeightedOutliningRcost) {
8062cab237bSDimitry Andric     ORE.emit([&]() {
8072cab237bSDimitry Andric       return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
8082cab237bSDimitry Andric                                         Call)
80924d58133SDimitry Andric              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
8105517e702SDimitry Andric              << NV("Caller", Caller) << " runtime overhead (overhead="
81124d58133SDimitry Andric              << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
8125517e702SDimitry Andric              << ", savings="
8132cab237bSDimitry Andric              << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
8142cab237bSDimitry Andric              << ")"
8152cab237bSDimitry Andric              << " of making the outlined call is too high";
8162cab237bSDimitry Andric     });
8175517e702SDimitry Andric 
8185517e702SDimitry Andric     return false;
8195517e702SDimitry Andric   }
820f37b6182SDimitry Andric 
8212cab237bSDimitry Andric   ORE.emit([&]() {
8222cab237bSDimitry Andric     return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
82324d58133SDimitry Andric            << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
824f37b6182SDimitry Andric            << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
825f37b6182SDimitry Andric            << " (threshold="
8262cab237bSDimitry Andric            << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
8272cab237bSDimitry Andric   });
828f37b6182SDimitry Andric   return true;
829f37b6182SDimitry Andric }
830f37b6182SDimitry Andric 
8315517e702SDimitry Andric // TODO: Ideally  we should share Inliner's InlineCost Analysis code.
8325517e702SDimitry Andric // For now use a simplified version. The returned 'InlineCost' will be used
8335517e702SDimitry Andric // to esimate the size cost as well as runtime cost of the BB.
computeBBInlineCost(BasicBlock * BB)8345517e702SDimitry Andric int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
8355517e702SDimitry Andric   int InlineCost = 0;
8365517e702SDimitry Andric   const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
837*b5893f02SDimitry Andric   for (Instruction &I : BB->instructionsWithoutDebug()) {
838*b5893f02SDimitry Andric     // Skip free instructions.
839*b5893f02SDimitry Andric     switch (I.getOpcode()) {
8406d97bb29SDimitry Andric     case Instruction::BitCast:
8416d97bb29SDimitry Andric     case Instruction::PtrToInt:
8426d97bb29SDimitry Andric     case Instruction::IntToPtr:
8436d97bb29SDimitry Andric     case Instruction::Alloca:
844*b5893f02SDimitry Andric     case Instruction::PHI:
8456d97bb29SDimitry Andric       continue;
8466d97bb29SDimitry Andric     case Instruction::GetElementPtr:
847*b5893f02SDimitry Andric       if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
8486d97bb29SDimitry Andric         continue;
849da09e106SDimitry Andric       break;
8506d97bb29SDimitry Andric     default:
8516d97bb29SDimitry Andric       break;
8526d97bb29SDimitry Andric     }
8536d97bb29SDimitry Andric 
854*b5893f02SDimitry Andric     if (I.isLifetimeStartOrEnd())
8556d97bb29SDimitry Andric       continue;
8566d97bb29SDimitry Andric 
857*b5893f02SDimitry Andric     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
8585517e702SDimitry Andric       InlineCost += getCallsiteCost(CallSite(CI), DL);
8595517e702SDimitry Andric       continue;
8605517e702SDimitry Andric     }
8615517e702SDimitry Andric 
862*b5893f02SDimitry Andric     if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
8635517e702SDimitry Andric       InlineCost += getCallsiteCost(CallSite(II), DL);
8645517e702SDimitry Andric       continue;
8655517e702SDimitry Andric     }
8665517e702SDimitry Andric 
867*b5893f02SDimitry Andric     if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
8685517e702SDimitry Andric       InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
8695517e702SDimitry Andric       continue;
8705517e702SDimitry Andric     }
8715517e702SDimitry Andric     InlineCost += InlineConstants::InstrCost;
8725517e702SDimitry Andric   }
8735517e702SDimitry Andric   return InlineCost;
8745517e702SDimitry Andric }
8755517e702SDimitry Andric 
87624d58133SDimitry Andric std::tuple<int, int>
computeOutliningCosts(FunctionCloner & Cloner)87724d58133SDimitry Andric PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
8782cab237bSDimitry Andric   int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
8792cab237bSDimitry Andric   for (auto FuncBBPair : Cloner.OutlinedFunctions) {
8802cab237bSDimitry Andric     Function *OutlinedFunc = FuncBBPair.first;
8812cab237bSDimitry Andric     BasicBlock* OutliningCallBB = FuncBBPair.second;
8825517e702SDimitry Andric     // Now compute the cost of the call sequence to the outlined function
8835517e702SDimitry Andric     // 'OutlinedFunction' in BB 'OutliningCallBB':
8842cab237bSDimitry Andric     OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
8855517e702SDimitry Andric 
8865517e702SDimitry Andric     // Now compute the cost of the extracted/outlined function itself:
8872cab237bSDimitry Andric     for (BasicBlock &BB : *OutlinedFunc)
8885517e702SDimitry Andric       OutlinedFunctionCost += computeBBInlineCost(&BB);
8895517e702SDimitry Andric   }
89024d58133SDimitry Andric   assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
8915517e702SDimitry Andric          "Outlined function cost should be no less than the outlined region");
8922cab237bSDimitry Andric 
8936d97bb29SDimitry Andric   // The code extractor introduces a new root and exit stub blocks with
8946d97bb29SDimitry Andric   // additional unconditional branches. Those branches will be eliminated
8956d97bb29SDimitry Andric   // later with bb layout. The cost should be adjusted accordingly:
8962cab237bSDimitry Andric   OutlinedFunctionCost -=
8972cab237bSDimitry Andric       2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
8986d97bb29SDimitry Andric 
89924d58133SDimitry Andric   int OutliningRuntimeOverhead =
90024d58133SDimitry Andric       OutliningFuncCallCost +
90124d58133SDimitry Andric       (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
9026d97bb29SDimitry Andric       ExtraOutliningPenalty;
9035517e702SDimitry Andric 
90424d58133SDimitry Andric   return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
9055517e702SDimitry Andric }
9065517e702SDimitry Andric 
9075517e702SDimitry Andric // Create the callsite to profile count map which is
9085517e702SDimitry Andric // used to update the original function's entry count,
9095517e702SDimitry Andric // after the function is partially inlined into the callsite.
computeCallsiteToProfCountMap(Function * DuplicateFunction,DenseMap<User *,uint64_t> & CallSiteToProfCountMap)9105517e702SDimitry Andric void PartialInlinerImpl::computeCallsiteToProfCountMap(
9115517e702SDimitry Andric     Function *DuplicateFunction,
9125517e702SDimitry Andric     DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
9135517e702SDimitry Andric   std::vector<User *> Users(DuplicateFunction->user_begin(),
9145517e702SDimitry Andric                             DuplicateFunction->user_end());
9155517e702SDimitry Andric   Function *CurrentCaller = nullptr;
916302affcbSDimitry Andric   std::unique_ptr<BlockFrequencyInfo> TempBFI;
9175517e702SDimitry Andric   BlockFrequencyInfo *CurrentCallerBFI = nullptr;
9185517e702SDimitry Andric 
9195517e702SDimitry Andric   auto ComputeCurrBFI = [&,this](Function *Caller) {
9205517e702SDimitry Andric       // For the old pass manager:
9215517e702SDimitry Andric       if (!GetBFI) {
9225517e702SDimitry Andric         DominatorTree DT(*Caller);
9235517e702SDimitry Andric         LoopInfo LI(DT);
9245517e702SDimitry Andric         BranchProbabilityInfo BPI(*Caller, LI);
925302affcbSDimitry Andric         TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
926302affcbSDimitry Andric         CurrentCallerBFI = TempBFI.get();
9275517e702SDimitry Andric       } else {
9285517e702SDimitry Andric         // New pass manager:
9295517e702SDimitry Andric         CurrentCallerBFI = &(*GetBFI)(*Caller);
9305517e702SDimitry Andric       }
9315517e702SDimitry Andric   };
9325517e702SDimitry Andric 
9335517e702SDimitry Andric   for (User *User : Users) {
9345517e702SDimitry Andric     CallSite CS = getCallSite(User);
9355517e702SDimitry Andric     Function *Caller = CS.getCaller();
9365517e702SDimitry Andric     if (CurrentCaller != Caller) {
9375517e702SDimitry Andric       CurrentCaller = Caller;
9385517e702SDimitry Andric       ComputeCurrBFI(Caller);
9395517e702SDimitry Andric     } else {
9405517e702SDimitry Andric       assert(CurrentCallerBFI && "CallerBFI is not set");
9415517e702SDimitry Andric     }
9425517e702SDimitry Andric     BasicBlock *CallBB = CS.getInstruction()->getParent();
9435517e702SDimitry Andric     auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
9445517e702SDimitry Andric     if (Count)
9455517e702SDimitry Andric       CallSiteToProfCountMap[User] = *Count;
9465517e702SDimitry Andric     else
9475517e702SDimitry Andric       CallSiteToProfCountMap[User] = 0;
9485517e702SDimitry Andric   }
9495517e702SDimitry Andric }
9505517e702SDimitry Andric 
FunctionCloner(Function * F,FunctionOutliningInfo * OI,OptimizationRemarkEmitter & ORE)9512cab237bSDimitry Andric PartialInlinerImpl::FunctionCloner::FunctionCloner(
9522cab237bSDimitry Andric     Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE)
9532cab237bSDimitry Andric     : OrigFunc(F), ORE(ORE) {
95424d58133SDimitry Andric   ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
95524d58133SDimitry Andric 
95624d58133SDimitry Andric   // Clone the function, so that we can hack away on it.
95724d58133SDimitry Andric   ValueToValueMapTy VMap;
95824d58133SDimitry Andric   ClonedFunc = CloneFunction(F, VMap);
95924d58133SDimitry Andric 
96024d58133SDimitry Andric   ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
96124d58133SDimitry Andric   ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
96224d58133SDimitry Andric   for (BasicBlock *BB : OI->Entries) {
96324d58133SDimitry Andric     ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
96424d58133SDimitry Andric   }
96524d58133SDimitry Andric   for (BasicBlock *E : OI->ReturnBlockPreds) {
96624d58133SDimitry Andric     BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
96724d58133SDimitry Andric     ClonedOI->ReturnBlockPreds.push_back(NewE);
96824d58133SDimitry Andric   }
96924d58133SDimitry Andric   // Go ahead and update all uses to the duplicate, so that we can just
97024d58133SDimitry Andric   // use the inliner functionality when we're done hacking.
97124d58133SDimitry Andric   F->replaceAllUsesWith(ClonedFunc);
97224d58133SDimitry Andric }
97324d58133SDimitry Andric 
FunctionCloner(Function * F,FunctionOutliningMultiRegionInfo * OI,OptimizationRemarkEmitter & ORE)9742cab237bSDimitry Andric PartialInlinerImpl::FunctionCloner::FunctionCloner(
9752cab237bSDimitry Andric     Function *F, FunctionOutliningMultiRegionInfo *OI,
9762cab237bSDimitry Andric     OptimizationRemarkEmitter &ORE)
9772cab237bSDimitry Andric     : OrigFunc(F), ORE(ORE) {
9782cab237bSDimitry Andric   ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
97924d58133SDimitry Andric 
9802cab237bSDimitry Andric   // Clone the function, so that we can hack away on it.
9812cab237bSDimitry Andric   ValueToValueMapTy VMap;
9822cab237bSDimitry Andric   ClonedFunc = CloneFunction(F, VMap);
9832cab237bSDimitry Andric 
9842cab237bSDimitry Andric   // Go through all Outline Candidate Regions and update all BasicBlock
9852cab237bSDimitry Andric   // information.
9862cab237bSDimitry Andric   for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
9872cab237bSDimitry Andric        OI->ORI) {
9882cab237bSDimitry Andric     SmallVector<BasicBlock *, 8> Region;
9892cab237bSDimitry Andric     for (BasicBlock *BB : RegionInfo.Region) {
9902cab237bSDimitry Andric       Region.push_back(cast<BasicBlock>(VMap[BB]));
9912cab237bSDimitry Andric     }
9922cab237bSDimitry Andric     BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
9932cab237bSDimitry Andric     BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
9942cab237bSDimitry Andric     BasicBlock *NewReturnBlock = nullptr;
9952cab237bSDimitry Andric     if (RegionInfo.ReturnBlock)
9962cab237bSDimitry Andric       NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
9972cab237bSDimitry Andric     FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
9982cab237bSDimitry Andric         Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
9992cab237bSDimitry Andric     ClonedOMRI->ORI.push_back(MappedRegionInfo);
10002cab237bSDimitry Andric   }
10012cab237bSDimitry Andric   // Go ahead and update all uses to the duplicate, so that we can just
10022cab237bSDimitry Andric   // use the inliner functionality when we're done hacking.
10032cab237bSDimitry Andric   F->replaceAllUsesWith(ClonedFunc);
10042cab237bSDimitry Andric }
10052cab237bSDimitry Andric 
NormalizeReturnBlock()10062cab237bSDimitry Andric void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
100724d58133SDimitry Andric   auto getFirstPHI = [](BasicBlock *BB) {
100824d58133SDimitry Andric     BasicBlock::iterator I = BB->begin();
100924d58133SDimitry Andric     PHINode *FirstPhi = nullptr;
101024d58133SDimitry Andric     while (I != BB->end()) {
101124d58133SDimitry Andric       PHINode *Phi = dyn_cast<PHINode>(I);
101224d58133SDimitry Andric       if (!Phi)
101324d58133SDimitry Andric         break;
101424d58133SDimitry Andric       if (!FirstPhi) {
101524d58133SDimitry Andric         FirstPhi = Phi;
101624d58133SDimitry Andric         break;
101724d58133SDimitry Andric       }
101824d58133SDimitry Andric     }
101924d58133SDimitry Andric     return FirstPhi;
102024d58133SDimitry Andric   };
102124d58133SDimitry Andric 
10222cab237bSDimitry Andric   // Shouldn't need to normalize PHIs if we're not outlining non-early return
10232cab237bSDimitry Andric   // blocks.
10242cab237bSDimitry Andric   if (!ClonedOI)
10252cab237bSDimitry Andric     return;
10262cab237bSDimitry Andric 
102724d58133SDimitry Andric   // Special hackery is needed with PHI nodes that have inputs from more than
102824d58133SDimitry Andric   // one extracted block.  For simplicity, just split the PHIs into a two-level
102924d58133SDimitry Andric   // sequence of PHIs, some of which will go in the extracted region, and some
103024d58133SDimitry Andric   // of which will go outside.
103124d58133SDimitry Andric   BasicBlock *PreReturn = ClonedOI->ReturnBlock;
103224d58133SDimitry Andric   // only split block when necessary:
103324d58133SDimitry Andric   PHINode *FirstPhi = getFirstPHI(PreReturn);
103424d58133SDimitry Andric   unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
103524d58133SDimitry Andric 
103624d58133SDimitry Andric   if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
103724d58133SDimitry Andric     return;
103824d58133SDimitry Andric 
103924d58133SDimitry Andric   auto IsTrivialPhi = [](PHINode *PN) -> Value * {
104024d58133SDimitry Andric     Value *CommonValue = PN->getIncomingValue(0);
104124d58133SDimitry Andric     if (all_of(PN->incoming_values(),
104224d58133SDimitry Andric                [&](Value *V) { return V == CommonValue; }))
104324d58133SDimitry Andric       return CommonValue;
104424d58133SDimitry Andric     return nullptr;
104524d58133SDimitry Andric   };
104624d58133SDimitry Andric 
104724d58133SDimitry Andric   ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
104824d58133SDimitry Andric       ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
104924d58133SDimitry Andric   BasicBlock::iterator I = PreReturn->begin();
105024d58133SDimitry Andric   Instruction *Ins = &ClonedOI->ReturnBlock->front();
105124d58133SDimitry Andric   SmallVector<Instruction *, 4> DeadPhis;
105224d58133SDimitry Andric   while (I != PreReturn->end()) {
105324d58133SDimitry Andric     PHINode *OldPhi = dyn_cast<PHINode>(I);
105424d58133SDimitry Andric     if (!OldPhi)
105524d58133SDimitry Andric       break;
105624d58133SDimitry Andric 
105724d58133SDimitry Andric     PHINode *RetPhi =
105824d58133SDimitry Andric         PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
105924d58133SDimitry Andric     OldPhi->replaceAllUsesWith(RetPhi);
106024d58133SDimitry Andric     Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
106124d58133SDimitry Andric 
106224d58133SDimitry Andric     RetPhi->addIncoming(&*I, PreReturn);
106324d58133SDimitry Andric     for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
106424d58133SDimitry Andric       RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
106524d58133SDimitry Andric       OldPhi->removeIncomingValue(E);
106624d58133SDimitry Andric     }
106724d58133SDimitry Andric 
106824d58133SDimitry Andric     // After incoming values splitting, the old phi may become trivial.
106924d58133SDimitry Andric     // Keeping the trivial phi can introduce definition inside the outline
107024d58133SDimitry Andric     // region which is live-out, causing necessary overhead (load, store
107124d58133SDimitry Andric     // arg passing etc).
107224d58133SDimitry Andric     if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
107324d58133SDimitry Andric       OldPhi->replaceAllUsesWith(OldPhiVal);
107424d58133SDimitry Andric       DeadPhis.push_back(OldPhi);
107524d58133SDimitry Andric     }
107624d58133SDimitry Andric     ++I;
107724d58133SDimitry Andric   }
107824d58133SDimitry Andric   for (auto *DP : DeadPhis)
107924d58133SDimitry Andric     DP->eraseFromParent();
108024d58133SDimitry Andric 
108124d58133SDimitry Andric   for (auto E : ClonedOI->ReturnBlockPreds) {
108224d58133SDimitry Andric     E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
108324d58133SDimitry Andric   }
108424d58133SDimitry Andric }
108524d58133SDimitry Andric 
doMultiRegionFunctionOutlining()10862cab237bSDimitry Andric bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
10872cab237bSDimitry Andric 
10882cab237bSDimitry Andric   auto ComputeRegionCost = [](SmallVectorImpl<BasicBlock *> &Region) {
10892cab237bSDimitry Andric     int Cost = 0;
10902cab237bSDimitry Andric     for (BasicBlock* BB : Region)
10912cab237bSDimitry Andric       Cost += computeBBInlineCost(BB);
10922cab237bSDimitry Andric     return Cost;
10932cab237bSDimitry Andric   };
10942cab237bSDimitry Andric 
10952cab237bSDimitry Andric   assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
10962cab237bSDimitry Andric 
10972cab237bSDimitry Andric   if (ClonedOMRI->ORI.empty())
10982cab237bSDimitry Andric     return false;
10992cab237bSDimitry Andric 
11002cab237bSDimitry Andric   // The CodeExtractor needs a dominator tree.
11012cab237bSDimitry Andric   DominatorTree DT;
11022cab237bSDimitry Andric   DT.recalculate(*ClonedFunc);
11032cab237bSDimitry Andric 
11042cab237bSDimitry Andric   // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
11052cab237bSDimitry Andric   LoopInfo LI(DT);
11062cab237bSDimitry Andric   BranchProbabilityInfo BPI(*ClonedFunc, LI);
11072cab237bSDimitry Andric   ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
11082cab237bSDimitry Andric 
11092cab237bSDimitry Andric   SetVector<Value *> Inputs, Outputs, Sinks;
11102cab237bSDimitry Andric   for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
11112cab237bSDimitry Andric        ClonedOMRI->ORI) {
11122cab237bSDimitry Andric     int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
11132cab237bSDimitry Andric 
11142cab237bSDimitry Andric     CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
11152cab237bSDimitry Andric                      ClonedFuncBFI.get(), &BPI, /* AllowVarargs */ false);
11162cab237bSDimitry Andric 
11172cab237bSDimitry Andric     CE.findInputsOutputs(Inputs, Outputs, Sinks);
11182cab237bSDimitry Andric 
11192cab237bSDimitry Andric #ifndef NDEBUG
11202cab237bSDimitry Andric     if (TracePartialInlining) {
11212cab237bSDimitry Andric       dbgs() << "inputs: " << Inputs.size() << "\n";
11222cab237bSDimitry Andric       dbgs() << "outputs: " << Outputs.size() << "\n";
11232cab237bSDimitry Andric       for (Value *value : Inputs)
11242cab237bSDimitry Andric         dbgs() << "value used in func: " << *value << "\n";
11252cab237bSDimitry Andric       for (Value *output : Outputs)
11262cab237bSDimitry Andric         dbgs() << "instr used in func: " << *output << "\n";
11272cab237bSDimitry Andric     }
11282cab237bSDimitry Andric #endif
11292cab237bSDimitry Andric     // Do not extract regions that have live exit variables.
11302cab237bSDimitry Andric     if (Outputs.size() > 0 && !ForceLiveExit)
11312cab237bSDimitry Andric       continue;
11322cab237bSDimitry Andric 
11332cab237bSDimitry Andric     Function *OutlinedFunc = CE.extractCodeRegion();
11342cab237bSDimitry Andric 
11352cab237bSDimitry Andric     if (OutlinedFunc) {
11362cab237bSDimitry Andric       CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
11372cab237bSDimitry Andric       BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent();
11382cab237bSDimitry Andric       assert(OutliningCallBB->getParent() == ClonedFunc);
11392cab237bSDimitry Andric       OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
11402cab237bSDimitry Andric       NumColdRegionsOutlined++;
11412cab237bSDimitry Andric       OutlinedRegionCost += CurrentOutlinedRegionCost;
11422cab237bSDimitry Andric 
11432cab237bSDimitry Andric       if (MarkOutlinedColdCC) {
11442cab237bSDimitry Andric         OutlinedFunc->setCallingConv(CallingConv::Cold);
11452cab237bSDimitry Andric         OCS.setCallingConv(CallingConv::Cold);
11462cab237bSDimitry Andric       }
11472cab237bSDimitry Andric     } else
11482cab237bSDimitry Andric       ORE.emit([&]() {
11492cab237bSDimitry Andric         return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
11502cab237bSDimitry Andric                                         &RegionInfo.Region.front()->front())
11512cab237bSDimitry Andric                << "Failed to extract region at block "
11522cab237bSDimitry Andric                << ore::NV("Block", RegionInfo.Region.front());
11532cab237bSDimitry Andric       });
11542cab237bSDimitry Andric   }
11552cab237bSDimitry Andric 
11562cab237bSDimitry Andric   return !OutlinedFunctions.empty();
11572cab237bSDimitry Andric }
11582cab237bSDimitry Andric 
11592cab237bSDimitry Andric Function *
doSingleRegionFunctionOutlining()11602cab237bSDimitry Andric PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
116124d58133SDimitry Andric   // Returns true if the block is to be partial inlined into the caller
116224d58133SDimitry Andric   // (i.e. not to be extracted to the out of line function)
116324d58133SDimitry Andric   auto ToBeInlined = [&, this](BasicBlock *BB) {
116424d58133SDimitry Andric     return BB == ClonedOI->ReturnBlock ||
116524d58133SDimitry Andric            (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
116624d58133SDimitry Andric             ClonedOI->Entries.end());
116724d58133SDimitry Andric   };
116824d58133SDimitry Andric 
11692cab237bSDimitry Andric   assert(ClonedOI && "Expecting OutlineInfo for single region outline");
11702cab237bSDimitry Andric   // The CodeExtractor needs a dominator tree.
11712cab237bSDimitry Andric   DominatorTree DT;
11722cab237bSDimitry Andric   DT.recalculate(*ClonedFunc);
11732cab237bSDimitry Andric 
11742cab237bSDimitry Andric   // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
11752cab237bSDimitry Andric   LoopInfo LI(DT);
11762cab237bSDimitry Andric   BranchProbabilityInfo BPI(*ClonedFunc, LI);
11772cab237bSDimitry Andric   ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
11782cab237bSDimitry Andric 
117924d58133SDimitry Andric   // Gather up the blocks that we're going to extract.
118024d58133SDimitry Andric   std::vector<BasicBlock *> ToExtract;
118124d58133SDimitry Andric   ToExtract.push_back(ClonedOI->NonReturnBlock);
118224d58133SDimitry Andric   OutlinedRegionCost +=
118324d58133SDimitry Andric       PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
118424d58133SDimitry Andric   for (BasicBlock &BB : *ClonedFunc)
118524d58133SDimitry Andric     if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
118624d58133SDimitry Andric       ToExtract.push_back(&BB);
118724d58133SDimitry Andric       // FIXME: the code extractor may hoist/sink more code
118824d58133SDimitry Andric       // into the outlined function which may make the outlining
118924d58133SDimitry Andric       // overhead (the difference of the outlined function cost
119024d58133SDimitry Andric       // and OutliningRegionCost) look larger.
119124d58133SDimitry Andric       OutlinedRegionCost += computeBBInlineCost(&BB);
119224d58133SDimitry Andric     }
119324d58133SDimitry Andric 
119424d58133SDimitry Andric   // Extract the body of the if.
11952cab237bSDimitry Andric   Function *OutlinedFunc =
11962cab237bSDimitry Andric       CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
11972cab237bSDimitry Andric                     ClonedFuncBFI.get(), &BPI,
11982cab237bSDimitry Andric                     /* AllowVarargs */ true)
119924d58133SDimitry Andric           .extractCodeRegion();
120024d58133SDimitry Andric 
120124d58133SDimitry Andric   if (OutlinedFunc) {
12022cab237bSDimitry Andric     BasicBlock *OutliningCallBB =
12032cab237bSDimitry Andric         PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
120424d58133SDimitry Andric             .getInstruction()
120524d58133SDimitry Andric             ->getParent();
120624d58133SDimitry Andric     assert(OutliningCallBB->getParent() == ClonedFunc);
12072cab237bSDimitry Andric     OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
12082cab237bSDimitry Andric   } else
12092cab237bSDimitry Andric     ORE.emit([&]() {
12102cab237bSDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
12112cab237bSDimitry Andric                                       &ToExtract.front()->front())
12122cab237bSDimitry Andric              << "Failed to extract region at block "
12132cab237bSDimitry Andric              << ore::NV("Block", ToExtract.front());
12142cab237bSDimitry Andric     });
121524d58133SDimitry Andric 
121624d58133SDimitry Andric   return OutlinedFunc;
121724d58133SDimitry Andric }
121824d58133SDimitry Andric 
~FunctionCloner()121924d58133SDimitry Andric PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
122024d58133SDimitry Andric   // Ditch the duplicate, since we're done with it, and rewrite all remaining
122124d58133SDimitry Andric   // users (function pointers, etc.) back to the original function.
122224d58133SDimitry Andric   ClonedFunc->replaceAllUsesWith(OrigFunc);
122324d58133SDimitry Andric   ClonedFunc->eraseFromParent();
122424d58133SDimitry Andric   if (!IsFunctionInlined) {
12252cab237bSDimitry Andric     // Remove each function that was speculatively created if there is no
122624d58133SDimitry Andric     // reference.
12272cab237bSDimitry Andric     for (auto FuncBBPair : OutlinedFunctions) {
12282cab237bSDimitry Andric       Function *Func = FuncBBPair.first;
12292cab237bSDimitry Andric       Func->eraseFromParent();
12302cab237bSDimitry Andric     }
123124d58133SDimitry Andric   }
123224d58133SDimitry Andric }
123324d58133SDimitry Andric 
unswitchFunction(Function * F)12342cab237bSDimitry Andric std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
1235f37b6182SDimitry Andric 
1236f37b6182SDimitry Andric   if (F->hasAddressTaken())
12372cab237bSDimitry Andric     return {false, nullptr};
1238f37b6182SDimitry Andric 
1239f37b6182SDimitry Andric   // Let inliner handle it
1240f37b6182SDimitry Andric   if (F->hasFnAttribute(Attribute::AlwaysInline))
12412cab237bSDimitry Andric     return {false, nullptr};
1242f37b6182SDimitry Andric 
1243f37b6182SDimitry Andric   if (F->hasFnAttribute(Attribute::NoInline))
12442cab237bSDimitry Andric     return {false, nullptr};
1245f37b6182SDimitry Andric 
1246f37b6182SDimitry Andric   if (PSI->isFunctionEntryCold(F))
12472cab237bSDimitry Andric     return {false, nullptr};
1248f37b6182SDimitry Andric 
1249*b5893f02SDimitry Andric   if (empty(F->users()))
12502cab237bSDimitry Andric     return {false, nullptr};
1251f37b6182SDimitry Andric 
12524ba319b5SDimitry Andric   OptimizationRemarkEmitter ORE(F);
12532cab237bSDimitry Andric 
12542cab237bSDimitry Andric   // Only try to outline cold regions if we have a profile summary, which
12552cab237bSDimitry Andric   // implies we have profiling information.
1256da09e106SDimitry Andric   if (PSI->hasProfileSummary() && F->hasProfileData() &&
12572cab237bSDimitry Andric       !DisableMultiRegionPartialInline) {
12582cab237bSDimitry Andric     std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
12594ba319b5SDimitry Andric         computeOutliningColdRegionsInfo(F, ORE);
12602cab237bSDimitry Andric     if (OMRI) {
12612cab237bSDimitry Andric       FunctionCloner Cloner(F, OMRI.get(), ORE);
12622cab237bSDimitry Andric 
12632cab237bSDimitry Andric #ifndef NDEBUG
12642cab237bSDimitry Andric       if (TracePartialInlining) {
12652cab237bSDimitry Andric         dbgs() << "HotCountThreshold = " << PSI->getHotCountThreshold() << "\n";
12662cab237bSDimitry Andric         dbgs() << "ColdCountThreshold = " << PSI->getColdCountThreshold()
12672cab237bSDimitry Andric                << "\n";
12682cab237bSDimitry Andric       }
12692cab237bSDimitry Andric #endif
12702cab237bSDimitry Andric       bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
12712cab237bSDimitry Andric 
12722cab237bSDimitry Andric       if (DidOutline) {
12732cab237bSDimitry Andric #ifndef NDEBUG
12742cab237bSDimitry Andric         if (TracePartialInlining) {
12752cab237bSDimitry Andric           dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
12762cab237bSDimitry Andric           Cloner.ClonedFunc->print(dbgs());
12772cab237bSDimitry Andric           dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
12782cab237bSDimitry Andric         }
12792cab237bSDimitry Andric #endif
12802cab237bSDimitry Andric 
12812cab237bSDimitry Andric         if (tryPartialInline(Cloner))
12822cab237bSDimitry Andric           return {true, nullptr};
12832cab237bSDimitry Andric       }
12842cab237bSDimitry Andric     }
12852cab237bSDimitry Andric   }
12862cab237bSDimitry Andric 
12872cab237bSDimitry Andric   // Fall-thru to regular partial inlining if we:
12882cab237bSDimitry Andric   //    i) can't find any cold regions to outline, or
12892cab237bSDimitry Andric   //   ii) can't inline the outlined function anywhere.
12905517e702SDimitry Andric   std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
12915517e702SDimitry Andric   if (!OI)
12922cab237bSDimitry Andric     return {false, nullptr};
1293f22ef01cSRoman Divacky 
12942cab237bSDimitry Andric   FunctionCloner Cloner(F, OI.get(), ORE);
129524d58133SDimitry Andric   Cloner.NormalizeReturnBlock();
12962cab237bSDimitry Andric 
12972cab237bSDimitry Andric   Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
12982cab237bSDimitry Andric 
12992cab237bSDimitry Andric   if (!OutlinedFunction)
13002cab237bSDimitry Andric     return {false, nullptr};
1301f22ef01cSRoman Divacky 
130224d58133SDimitry Andric   bool AnyInline = tryPartialInline(Cloner);
1303f22ef01cSRoman Divacky 
13045517e702SDimitry Andric   if (AnyInline)
13052cab237bSDimitry Andric     return {true, OutlinedFunction};
1306f22ef01cSRoman Divacky 
13072cab237bSDimitry Andric   return {false, nullptr};
13085517e702SDimitry Andric }
13095517e702SDimitry Andric 
tryPartialInline(FunctionCloner & Cloner)131024d58133SDimitry Andric bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
13112cab237bSDimitry Andric   if (Cloner.OutlinedFunctions.empty())
131224d58133SDimitry Andric     return false;
13135517e702SDimitry Andric 
13142cab237bSDimitry Andric   int SizeCost = 0;
13152cab237bSDimitry Andric   BlockFrequency WeightedRcost;
13162cab237bSDimitry Andric   int NonWeightedRcost;
131724d58133SDimitry Andric   std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
131824d58133SDimitry Andric 
13192cab237bSDimitry Andric   // Only calculate RelativeToEntryFreq when we are doing single region
13202cab237bSDimitry Andric   // outlining.
13212cab237bSDimitry Andric   BranchProbability RelativeToEntryFreq;
13222cab237bSDimitry Andric   if (Cloner.ClonedOI) {
13232cab237bSDimitry Andric     RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
13242cab237bSDimitry Andric   } else
13252cab237bSDimitry Andric     // RelativeToEntryFreq doesn't make sense when we have more than one
13262cab237bSDimitry Andric     // outlined call because each call will have a different relative frequency
13272cab237bSDimitry Andric     // to the entry block.  We can consider using the average, but the
13282cab237bSDimitry Andric     // usefulness of that information is questionable. For now, assume we never
13292cab237bSDimitry Andric     // execute the calls to outlined functions.
13302cab237bSDimitry Andric     RelativeToEntryFreq = BranchProbability(0, 1);
13315517e702SDimitry Andric 
13322cab237bSDimitry Andric   WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
13332cab237bSDimitry Andric 
13342cab237bSDimitry Andric   // The call sequence(s) to the outlined function(s) are larger than the sum of
13352cab237bSDimitry Andric   // the original outlined region size(s), it does not increase the chances of
13362cab237bSDimitry Andric   // inlining the function with outlining (The inliner uses the size increase to
133724d58133SDimitry Andric   // model the cost of inlining a callee).
133824d58133SDimitry Andric   if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
13394ba319b5SDimitry Andric     OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
13405517e702SDimitry Andric     DebugLoc DLoc;
13415517e702SDimitry Andric     BasicBlock *Block;
134224d58133SDimitry Andric     std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
13434ba319b5SDimitry Andric     OrigFuncORE.emit([&]() {
13442cab237bSDimitry Andric       return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
13455517e702SDimitry Andric                                         DLoc, Block)
134624d58133SDimitry Andric              << ore::NV("Function", Cloner.OrigFunc)
13475517e702SDimitry Andric              << " not partially inlined into callers (Original Size = "
134824d58133SDimitry Andric              << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
13495517e702SDimitry Andric              << ", Size of call sequence to outlined function = "
13502cab237bSDimitry Andric              << ore::NV("NewSize", SizeCost) << ")";
13512cab237bSDimitry Andric     });
13525517e702SDimitry Andric     return false;
13535517e702SDimitry Andric   }
13545517e702SDimitry Andric 
1355*b5893f02SDimitry Andric   assert(empty(Cloner.OrigFunc->users()) &&
13565517e702SDimitry Andric          "F's users should all be replaced!");
135724d58133SDimitry Andric 
135824d58133SDimitry Andric   std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
135924d58133SDimitry Andric                             Cloner.ClonedFunc->user_end());
13605517e702SDimitry Andric 
13615517e702SDimitry Andric   DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1362da09e106SDimitry Andric   auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1363da09e106SDimitry Andric   if (CalleeEntryCount)
136424d58133SDimitry Andric     computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
13655517e702SDimitry Andric 
13664ba319b5SDimitry Andric   uint64_t CalleeEntryCountV =
13674ba319b5SDimitry Andric       (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
136824d58133SDimitry Andric 
13695517e702SDimitry Andric   bool AnyInline = false;
13705517e702SDimitry Andric   for (User *User : Users) {
13715517e702SDimitry Andric     CallSite CS = getCallSite(User);
13725517e702SDimitry Andric 
13735517e702SDimitry Andric     if (IsLimitReached())
13745517e702SDimitry Andric       continue;
13755517e702SDimitry Andric 
13764ba319b5SDimitry Andric     OptimizationRemarkEmitter CallerORE(CS.getCaller());
13774ba319b5SDimitry Andric     if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE))
13785517e702SDimitry Andric       continue;
13795517e702SDimitry Andric 
13802cab237bSDimitry Andric     // Construct remark before doing the inlining, as after successful inlining
13812cab237bSDimitry Andric     // the callsite is removed.
13822cab237bSDimitry Andric     OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
13832cab237bSDimitry Andric     OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
13842cab237bSDimitry Andric        << ore::NV("Caller", CS.getCaller());
13855517e702SDimitry Andric 
13865517e702SDimitry Andric     InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
13872cab237bSDimitry Andric     // We can only forward varargs when we outlined a single region, else we
13882cab237bSDimitry Andric     // bail on vararg functions.
13892cab237bSDimitry Andric     if (!InlineFunction(CS, IFI, nullptr, true,
13902cab237bSDimitry Andric                         (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
13912cab237bSDimitry Andric                                          : nullptr)))
13922cab237bSDimitry Andric       continue;
13932cab237bSDimitry Andric 
13944ba319b5SDimitry Andric     CallerORE.emit(OR);
13955517e702SDimitry Andric 
13965517e702SDimitry Andric     // Now update the entry count:
13975517e702SDimitry Andric     if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
13985517e702SDimitry Andric       uint64_t CallSiteCount = CallSiteToProfCountMap[User];
13995517e702SDimitry Andric       CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
14005517e702SDimitry Andric     }
14015517e702SDimitry Andric 
14025517e702SDimitry Andric     AnyInline = true;
14035517e702SDimitry Andric     NumPartialInlining++;
14045517e702SDimitry Andric     // Update the stats
14052cab237bSDimitry Andric     if (Cloner.ClonedOI)
14065517e702SDimitry Andric       NumPartialInlined++;
14072cab237bSDimitry Andric     else
14082cab237bSDimitry Andric       NumColdOutlinePartialInlined++;
14092cab237bSDimitry Andric 
14105517e702SDimitry Andric   }
14115517e702SDimitry Andric 
141224d58133SDimitry Andric   if (AnyInline) {
141324d58133SDimitry Andric     Cloner.IsFunctionInlined = true;
141424d58133SDimitry Andric     if (CalleeEntryCount)
14154ba319b5SDimitry Andric       Cloner.OrigFunc->setEntryCount(
14164ba319b5SDimitry Andric           CalleeEntryCount.setCount(CalleeEntryCountV));
14174ba319b5SDimitry Andric     OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
14184ba319b5SDimitry Andric     OrigFuncORE.emit([&]() {
14192cab237bSDimitry Andric       return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
14202cab237bSDimitry Andric              << "Partially inlined into at least one caller";
14212cab237bSDimitry Andric     });
14222cab237bSDimitry Andric 
142324d58133SDimitry Andric   }
14245517e702SDimitry Andric 
14255517e702SDimitry Andric   return AnyInline;
1426f22ef01cSRoman Divacky }
1427f22ef01cSRoman Divacky 
run(Module & M)1428d88c1a5aSDimitry Andric bool PartialInlinerImpl::run(Module &M) {
142951690af2SDimitry Andric   if (DisablePartialInlining)
143051690af2SDimitry Andric     return false;
143151690af2SDimitry Andric 
1432d88c1a5aSDimitry Andric   std::vector<Function *> Worklist;
1433d88c1a5aSDimitry Andric   Worklist.reserve(M.size());
14343ca95b02SDimitry Andric   for (Function &F : M)
14353ca95b02SDimitry Andric     if (!F.use_empty() && !F.isDeclaration())
1436d88c1a5aSDimitry Andric       Worklist.push_back(&F);
1437f22ef01cSRoman Divacky 
1438d88c1a5aSDimitry Andric   bool Changed = false;
1439d88c1a5aSDimitry Andric   while (!Worklist.empty()) {
1440d88c1a5aSDimitry Andric     Function *CurrFunc = Worklist.back();
1441d88c1a5aSDimitry Andric     Worklist.pop_back();
1442f22ef01cSRoman Divacky 
1443d88c1a5aSDimitry Andric     if (CurrFunc->use_empty())
1444d88c1a5aSDimitry Andric       continue;
1445f22ef01cSRoman Divacky 
1446d88c1a5aSDimitry Andric     bool Recursive = false;
1447d88c1a5aSDimitry Andric     for (User *U : CurrFunc->users())
144891bc56edSDimitry Andric       if (Instruction *I = dyn_cast<Instruction>(U))
1449d88c1a5aSDimitry Andric         if (I->getParent()->getParent() == CurrFunc) {
1450d88c1a5aSDimitry Andric           Recursive = true;
1451f22ef01cSRoman Divacky           break;
1452f22ef01cSRoman Divacky         }
1453d88c1a5aSDimitry Andric     if (Recursive)
1454d88c1a5aSDimitry Andric       continue;
1455f22ef01cSRoman Divacky 
14562cab237bSDimitry Andric     std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
14572cab237bSDimitry Andric     if (Result.second)
14582cab237bSDimitry Andric       Worklist.push_back(Result.second);
1459*b5893f02SDimitry Andric     Changed |= Result.first;
1460f22ef01cSRoman Divacky   }
1461f22ef01cSRoman Divacky 
1462d88c1a5aSDimitry Andric   return Changed;
1463f22ef01cSRoman Divacky }
1464f22ef01cSRoman Divacky 
1465d88c1a5aSDimitry Andric char PartialInlinerLegacyPass::ID = 0;
14662cab237bSDimitry Andric 
1467d88c1a5aSDimitry Andric INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
1468d88c1a5aSDimitry Andric                       "Partial Inliner", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)1469d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1470f37b6182SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
1471f37b6182SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1472d88c1a5aSDimitry Andric INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
1473d88c1a5aSDimitry Andric                     "Partial Inliner", false, false)
1474d88c1a5aSDimitry Andric 
1475d88c1a5aSDimitry Andric ModulePass *llvm::createPartialInliningPass() {
1476d88c1a5aSDimitry Andric   return new PartialInlinerLegacyPass();
1477d88c1a5aSDimitry Andric }
1478d88c1a5aSDimitry Andric 
run(Module & M,ModuleAnalysisManager & AM)1479d88c1a5aSDimitry Andric PreservedAnalyses PartialInlinerPass::run(Module &M,
1480d88c1a5aSDimitry Andric                                           ModuleAnalysisManager &AM) {
1481d88c1a5aSDimitry Andric   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1482f37b6182SDimitry Andric 
1483d88c1a5aSDimitry Andric   std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1484d88c1a5aSDimitry Andric       [&FAM](Function &F) -> AssumptionCache & {
1485d88c1a5aSDimitry Andric     return FAM.getResult<AssumptionAnalysis>(F);
1486d88c1a5aSDimitry Andric   };
1487f37b6182SDimitry Andric 
1488f37b6182SDimitry Andric   std::function<BlockFrequencyInfo &(Function &)> GetBFI =
1489f37b6182SDimitry Andric       [&FAM](Function &F) -> BlockFrequencyInfo & {
1490f37b6182SDimitry Andric     return FAM.getResult<BlockFrequencyAnalysis>(F);
1491f37b6182SDimitry Andric   };
1492f37b6182SDimitry Andric 
1493f37b6182SDimitry Andric   std::function<TargetTransformInfo &(Function &)> GetTTI =
1494f37b6182SDimitry Andric       [&FAM](Function &F) -> TargetTransformInfo & {
1495f37b6182SDimitry Andric     return FAM.getResult<TargetIRAnalysis>(F);
1496f37b6182SDimitry Andric   };
1497f37b6182SDimitry Andric 
1498f37b6182SDimitry Andric   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
1499f37b6182SDimitry Andric 
15004ba319b5SDimitry Andric   if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI)
15012cab237bSDimitry Andric           .run(M))
15023ca95b02SDimitry Andric     return PreservedAnalyses::none();
15033ca95b02SDimitry Andric   return PreservedAnalyses::all();
1504f22ef01cSRoman Divacky }
1505