1f22ef01cSRoman Divacky //===- PartialInlining.cpp - Inline parts of functions --------------------===//
2f22ef01cSRoman Divacky //
3f22ef01cSRoman Divacky // The LLVM Compiler Infrastructure
4f22ef01cSRoman Divacky //
5f22ef01cSRoman Divacky // This file is distributed under the University of Illinois Open Source
6f22ef01cSRoman Divacky // License. See LICENSE.TXT for details.
7f22ef01cSRoman Divacky //
8f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
9f22ef01cSRoman Divacky //
10f22ef01cSRoman Divacky // This pass performs partial inlining, typically by inlining an if statement
11f22ef01cSRoman Divacky // that surrounds the body of the function.
12f22ef01cSRoman Divacky //
13f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
14f22ef01cSRoman Divacky
153ca95b02SDimitry Andric #include "llvm/Transforms/IPO/PartialInlining.h"
162cab237bSDimitry Andric #include "llvm/ADT/DenseMap.h"
172cab237bSDimitry Andric #include "llvm/ADT/DenseSet.h"
182cab237bSDimitry Andric #include "llvm/ADT/None.h"
192cab237bSDimitry Andric #include "llvm/ADT/Optional.h"
202cab237bSDimitry Andric #include "llvm/ADT/STLExtras.h"
212cab237bSDimitry Andric #include "llvm/ADT/SmallVector.h"
22139f7f9bSDimitry Andric #include "llvm/ADT/Statistic.h"
23d88c1a5aSDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h"
24d88c1a5aSDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h"
25f37b6182SDimitry Andric #include "llvm/Analysis/InlineCost.h"
26d88c1a5aSDimitry Andric #include "llvm/Analysis/LoopInfo.h"
272cab237bSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
28f37b6182SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
29f37b6182SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
30f37b6182SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
312cab237bSDimitry Andric #include "llvm/IR/Attributes.h"
322cab237bSDimitry Andric #include "llvm/IR/BasicBlock.h"
3391bc56edSDimitry Andric #include "llvm/IR/CFG.h"
342cab237bSDimitry Andric #include "llvm/IR/CallSite.h"
352cab237bSDimitry Andric #include "llvm/IR/DebugLoc.h"
3651690af2SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
3791bc56edSDimitry Andric #include "llvm/IR/Dominators.h"
382cab237bSDimitry Andric #include "llvm/IR/Function.h"
392cab237bSDimitry Andric #include "llvm/IR/InstrTypes.h"
402cab237bSDimitry Andric #include "llvm/IR/Instruction.h"
41139f7f9bSDimitry Andric #include "llvm/IR/Instructions.h"
422cab237bSDimitry Andric #include "llvm/IR/IntrinsicInst.h"
432cab237bSDimitry Andric #include "llvm/IR/Intrinsics.h"
44139f7f9bSDimitry Andric #include "llvm/IR/Module.h"
452cab237bSDimitry Andric #include "llvm/IR/User.h"
46139f7f9bSDimitry Andric #include "llvm/Pass.h"
472cab237bSDimitry Andric #include "llvm/Support/BlockFrequency.h"
482cab237bSDimitry Andric #include "llvm/Support/BranchProbability.h"
492cab237bSDimitry Andric #include "llvm/Support/Casting.h"
502cab237bSDimitry Andric #include "llvm/Support/CommandLine.h"
512cab237bSDimitry Andric #include "llvm/Support/ErrorHandling.h"
523ca95b02SDimitry Andric #include "llvm/Transforms/IPO.h"
53f22ef01cSRoman Divacky #include "llvm/Transforms/Utils/Cloning.h"
547ae0e2c9SDimitry Andric #include "llvm/Transforms/Utils/CodeExtractor.h"
552cab237bSDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
562cab237bSDimitry Andric #include <algorithm>
572cab237bSDimitry Andric #include <cassert>
582cab237bSDimitry Andric #include <cstdint>
592cab237bSDimitry Andric #include <functional>
602cab237bSDimitry Andric #include <iterator>
612cab237bSDimitry Andric #include <memory>
622cab237bSDimitry Andric #include <tuple>
632cab237bSDimitry Andric #include <vector>
642cab237bSDimitry Andric
65f22ef01cSRoman Divacky using namespace llvm;
66f22ef01cSRoman Divacky
6751690af2SDimitry Andric #define DEBUG_TYPE "partial-inlining"
6891bc56edSDimitry Andric
69f37b6182SDimitry Andric STATISTIC(NumPartialInlined,
70f37b6182SDimitry Andric "Number of callsites functions partially inlined into.");
712cab237bSDimitry Andric STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
722cab237bSDimitry Andric "cold outlined regions were partially "
732cab237bSDimitry Andric "inlined into its caller(s).");
742cab237bSDimitry Andric STATISTIC(NumColdRegionsFound,
752cab237bSDimitry Andric "Number of cold single entry/exit regions found.");
762cab237bSDimitry Andric STATISTIC(NumColdRegionsOutlined,
772cab237bSDimitry Andric "Number of cold single entry/exit regions outlined.");
78f22ef01cSRoman Divacky
7951690af2SDimitry Andric // Command line option to disable partial-inlining. The default is false:
8051690af2SDimitry Andric static cl::opt<bool>
8151690af2SDimitry Andric DisablePartialInlining("disable-partial-inlining", cl::init(false),
822cab237bSDimitry Andric cl::Hidden, cl::desc("Disable partial inlining"));
832cab237bSDimitry Andric // Command line option to disable multi-region partial-inlining. The default is
842cab237bSDimitry Andric // false:
852cab237bSDimitry Andric static cl::opt<bool> DisableMultiRegionPartialInline(
862cab237bSDimitry Andric "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
872cab237bSDimitry Andric cl::desc("Disable multi-region partial inlining"));
882cab237bSDimitry Andric
892cab237bSDimitry Andric // Command line option to force outlining in regions with live exit variables.
902cab237bSDimitry Andric // The default is false:
912cab237bSDimitry Andric static cl::opt<bool>
922cab237bSDimitry Andric ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
932cab237bSDimitry Andric cl::desc("Force outline regions with live exits"));
942cab237bSDimitry Andric
952cab237bSDimitry Andric // Command line option to enable marking outline functions with Cold Calling
962cab237bSDimitry Andric // Convention. The default is false:
972cab237bSDimitry Andric static cl::opt<bool>
982cab237bSDimitry Andric MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
992cab237bSDimitry Andric cl::desc("Mark outline function calls with ColdCC"));
1002cab237bSDimitry Andric
1012cab237bSDimitry Andric #ifndef NDEBUG
1022cab237bSDimitry Andric // Command line option to debug partial-inlining. The default is none:
1032cab237bSDimitry Andric static cl::opt<bool> TracePartialInlining("trace-partial-inlining",
1042cab237bSDimitry Andric cl::init(false), cl::Hidden,
1052cab237bSDimitry Andric cl::desc("Trace partial inlining."));
1062cab237bSDimitry Andric #endif
1072cab237bSDimitry Andric
1085517e702SDimitry Andric // This is an option used by testing:
1095517e702SDimitry Andric static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
1105517e702SDimitry Andric cl::init(false), cl::ZeroOrMore,
1115517e702SDimitry Andric cl::ReallyHidden,
1125517e702SDimitry Andric cl::desc("Skip Cost Analysis"));
1132cab237bSDimitry Andric // Used to determine if a cold region is worth outlining based on
1142cab237bSDimitry Andric // its inlining cost compared to the original function. Default is set at 10%.
1152cab237bSDimitry Andric // ie. if the cold region reduces the inlining cost of the original function by
1162cab237bSDimitry Andric // at least 10%.
1172cab237bSDimitry Andric static cl::opt<float> MinRegionSizeRatio(
1182cab237bSDimitry Andric "min-region-size-ratio", cl::init(0.1), cl::Hidden,
1192cab237bSDimitry Andric cl::desc("Minimum ratio comparing relative sizes of each "
1202cab237bSDimitry Andric "outline candidate and original function"));
1212cab237bSDimitry Andric // Used to tune the minimum number of execution counts needed in the predecessor
1222cab237bSDimitry Andric // block to the cold edge. ie. confidence interval.
1232cab237bSDimitry Andric static cl::opt<unsigned>
1242cab237bSDimitry Andric MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
1252cab237bSDimitry Andric cl::desc("Minimum block executions to consider "
1262cab237bSDimitry Andric "its BranchProbabilityInfo valid"));
1272cab237bSDimitry Andric // Used to determine when an edge is considered cold. Default is set to 10%. ie.
1282cab237bSDimitry Andric // if the branch probability is 10% or less, then it is deemed as 'cold'.
1292cab237bSDimitry Andric static cl::opt<float> ColdBranchRatio(
1302cab237bSDimitry Andric "cold-branch-ratio", cl::init(0.1), cl::Hidden,
1312cab237bSDimitry Andric cl::desc("Minimum BranchProbability to consider a region cold."));
13251690af2SDimitry Andric
133f37b6182SDimitry Andric static cl::opt<unsigned> MaxNumInlineBlocks(
134f37b6182SDimitry Andric "max-num-inline-blocks", cl::init(5), cl::Hidden,
1352cab237bSDimitry Andric cl::desc("Max number of blocks to be partially inlined"));
136f37b6182SDimitry Andric
13751690af2SDimitry Andric // Command line option to set the maximum number of partial inlining allowed
13851690af2SDimitry Andric // for the module. The default value of -1 means no limit.
13951690af2SDimitry Andric static cl::opt<int> MaxNumPartialInlining(
14051690af2SDimitry Andric "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
14151690af2SDimitry Andric cl::desc("Max number of partial inlining. The default is unlimited"));
14251690af2SDimitry Andric
1435517e702SDimitry Andric // Used only when PGO or user annotated branch data is absent. It is
1445517e702SDimitry Andric // the least value that is used to weigh the outline region. If BFI
1455517e702SDimitry Andric // produces larger value, the BFI value will be used.
1465517e702SDimitry Andric static cl::opt<int>
1475517e702SDimitry Andric OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
1485517e702SDimitry Andric cl::Hidden, cl::ZeroOrMore,
1495517e702SDimitry Andric cl::desc("Relative frequency of outline region to "
1505517e702SDimitry Andric "the entry block"));
1515517e702SDimitry Andric
1526d97bb29SDimitry Andric static cl::opt<unsigned> ExtraOutliningPenalty(
1536d97bb29SDimitry Andric "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
1546d97bb29SDimitry Andric cl::desc("A debug option to add additional penalty to the computed one."));
1556d97bb29SDimitry Andric
156f22ef01cSRoman Divacky namespace {
157f37b6182SDimitry Andric
158f37b6182SDimitry Andric struct FunctionOutliningInfo {
1592cab237bSDimitry Andric FunctionOutliningInfo() = default;
1602cab237bSDimitry Andric
161f37b6182SDimitry Andric // Returns the number of blocks to be inlined including all blocks
162f37b6182SDimitry Andric // in Entries and one return block.
GetNumInlinedBlocks__anona2f48faf0111::FunctionOutliningInfo163f37b6182SDimitry Andric unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
164f37b6182SDimitry Andric
165f37b6182SDimitry Andric // A set of blocks including the function entry that guard
166f37b6182SDimitry Andric // the region to be outlined.
167f37b6182SDimitry Andric SmallVector<BasicBlock *, 4> Entries;
1682cab237bSDimitry Andric
169f37b6182SDimitry Andric // The return block that is not included in the outlined region.
1702cab237bSDimitry Andric BasicBlock *ReturnBlock = nullptr;
1712cab237bSDimitry Andric
1726d97bb29SDimitry Andric // The dominating block of the region to be outlined.
1732cab237bSDimitry Andric BasicBlock *NonReturnBlock = nullptr;
1742cab237bSDimitry Andric
175f37b6182SDimitry Andric // The set of blocks in Entries that that are predecessors to ReturnBlock
176f37b6182SDimitry Andric SmallVector<BasicBlock *, 4> ReturnBlockPreds;
177f37b6182SDimitry Andric };
178f37b6182SDimitry Andric
1792cab237bSDimitry Andric struct FunctionOutliningMultiRegionInfo {
FunctionOutliningMultiRegionInfo__anona2f48faf0111::FunctionOutliningMultiRegionInfo1802cab237bSDimitry Andric FunctionOutliningMultiRegionInfo()
1812cab237bSDimitry Andric : ORI() {}
1822cab237bSDimitry Andric
1832cab237bSDimitry Andric // Container for outline regions
1842cab237bSDimitry Andric struct OutlineRegionInfo {
OutlineRegionInfo__anona2f48faf0111::FunctionOutliningMultiRegionInfo::OutlineRegionInfo1852cab237bSDimitry Andric OutlineRegionInfo(SmallVector<BasicBlock *, 8> Region,
1862cab237bSDimitry Andric BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1872cab237bSDimitry Andric BasicBlock *ReturnBlock)
1882cab237bSDimitry Andric : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
1892cab237bSDimitry Andric ReturnBlock(ReturnBlock) {}
1902cab237bSDimitry Andric SmallVector<BasicBlock *, 8> Region;
1912cab237bSDimitry Andric BasicBlock *EntryBlock;
1922cab237bSDimitry Andric BasicBlock *ExitBlock;
1932cab237bSDimitry Andric BasicBlock *ReturnBlock;
1942cab237bSDimitry Andric };
1952cab237bSDimitry Andric
1962cab237bSDimitry Andric SmallVector<OutlineRegionInfo, 4> ORI;
1972cab237bSDimitry Andric };
1982cab237bSDimitry Andric
199d88c1a5aSDimitry Andric struct PartialInlinerImpl {
2002cab237bSDimitry Andric
PartialInlinerImpl__anona2f48faf0111::PartialInlinerImpl201f37b6182SDimitry Andric PartialInlinerImpl(
202f37b6182SDimitry Andric std::function<AssumptionCache &(Function &)> *GetAC,
203f37b6182SDimitry Andric std::function<TargetTransformInfo &(Function &)> *GTTI,
204f37b6182SDimitry Andric Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
2054ba319b5SDimitry Andric ProfileSummaryInfo *ProfSI)
2064ba319b5SDimitry Andric : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
2072cab237bSDimitry Andric
208d88c1a5aSDimitry Andric bool run(Module &M);
2092cab237bSDimitry Andric // Main part of the transformation that calls helper functions to find
2102cab237bSDimitry Andric // outlining candidates, clone & outline the function, and attempt to
2112cab237bSDimitry Andric // partially inline the resulting function. Returns true if
2122cab237bSDimitry Andric // inlining was successful, false otherwise. Also returns the outline
2132cab237bSDimitry Andric // function (only if we partially inlined early returns) as there is a
2142cab237bSDimitry Andric // possibility to further "peel" early return statements that were left in the
2152cab237bSDimitry Andric // outline function due to code size.
2162cab237bSDimitry Andric std::pair<bool, Function *> unswitchFunction(Function *F);
217d88c1a5aSDimitry Andric
2184ba319b5SDimitry Andric // This class speculatively clones the function to be partial inlined.
21924d58133SDimitry Andric // At the end of partial inlining, the remaining callsites to the cloned
22024d58133SDimitry Andric // function that are not partially inlined will be fixed up to reference
22124d58133SDimitry Andric // the original function, and the cloned function will be erased.
22224d58133SDimitry Andric struct FunctionCloner {
2232cab237bSDimitry Andric // Two constructors, one for single region outlining, the other for
2242cab237bSDimitry Andric // multi-region outlining.
2252cab237bSDimitry Andric FunctionCloner(Function *F, FunctionOutliningInfo *OI,
2262cab237bSDimitry Andric OptimizationRemarkEmitter &ORE);
2272cab237bSDimitry Andric FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
2282cab237bSDimitry Andric OptimizationRemarkEmitter &ORE);
22924d58133SDimitry Andric ~FunctionCloner();
23024d58133SDimitry Andric
23124d58133SDimitry Andric // Prepare for function outlining: making sure there is only
23224d58133SDimitry Andric // one incoming edge from the extracted/outlined region to
23324d58133SDimitry Andric // the return block.
23424d58133SDimitry Andric void NormalizeReturnBlock();
23524d58133SDimitry Andric
2362cab237bSDimitry Andric // Do function outlining for cold regions.
2372cab237bSDimitry Andric bool doMultiRegionFunctionOutlining();
2382cab237bSDimitry Andric // Do function outlining for region after early return block(s).
2392cab237bSDimitry Andric // NOTE: For vararg functions that do the vararg handling in the outlined
2402cab237bSDimitry Andric // function, we temporarily generate IR that does not properly
2412cab237bSDimitry Andric // forward varargs to the outlined function. Calling InlineFunction
2422cab237bSDimitry Andric // will update calls to the outlined functions to properly forward
2432cab237bSDimitry Andric // the varargs.
2442cab237bSDimitry Andric Function *doSingleRegionFunctionOutlining();
24524d58133SDimitry Andric
24624d58133SDimitry Andric Function *OrigFunc = nullptr;
24724d58133SDimitry Andric Function *ClonedFunc = nullptr;
2482cab237bSDimitry Andric
2492cab237bSDimitry Andric typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
2502cab237bSDimitry Andric // Keep track of Outlined Functions and the basic block they're called from.
2512cab237bSDimitry Andric SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
2522cab237bSDimitry Andric
25324d58133SDimitry Andric // ClonedFunc is inlined in one of its callers after function
25424d58133SDimitry Andric // outlining.
25524d58133SDimitry Andric bool IsFunctionInlined = false;
25624d58133SDimitry Andric // The cost of the region to be outlined.
25724d58133SDimitry Andric int OutlinedRegionCost = 0;
2582cab237bSDimitry Andric // ClonedOI is specific to outlining non-early return blocks.
25924d58133SDimitry Andric std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
2602cab237bSDimitry Andric // ClonedOMRI is specific to outlining cold regions.
2612cab237bSDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
26224d58133SDimitry Andric std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
2632cab237bSDimitry Andric OptimizationRemarkEmitter &ORE;
26424d58133SDimitry Andric };
26524d58133SDimitry Andric
266f37b6182SDimitry Andric private:
267f37b6182SDimitry Andric int NumPartialInlining = 0;
268f37b6182SDimitry Andric std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
269f37b6182SDimitry Andric std::function<TargetTransformInfo &(Function &)> *GetTTI;
270f37b6182SDimitry Andric Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
271f37b6182SDimitry Andric ProfileSummaryInfo *PSI;
272f37b6182SDimitry Andric
2735517e702SDimitry Andric // Return the frequency of the OutlininingBB relative to F's entry point.
2745517e702SDimitry Andric // The result is no larger than 1 and is represented using BP.
2755517e702SDimitry Andric // (Note that the outlined region's 'head' block can only have incoming
2765517e702SDimitry Andric // edges from the guarding entry blocks).
27724d58133SDimitry Andric BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
2785517e702SDimitry Andric
2795517e702SDimitry Andric // Return true if the callee of CS should be partially inlined with
2805517e702SDimitry Andric // profit.
28124d58133SDimitry Andric bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
2824ba319b5SDimitry Andric BlockFrequency WeightedOutliningRcost,
2834ba319b5SDimitry Andric OptimizationRemarkEmitter &ORE);
2845517e702SDimitry Andric
2855517e702SDimitry Andric // Try to inline DuplicateFunction (cloned from F with call to
2865517e702SDimitry Andric // the OutlinedFunction into its callers. Return true
2875517e702SDimitry Andric // if there is any successful inlining.
28824d58133SDimitry Andric bool tryPartialInline(FunctionCloner &Cloner);
2895517e702SDimitry Andric
2905517e702SDimitry Andric // Compute the mapping from use site of DuplicationFunction to the enclosing
2915517e702SDimitry Andric // BB's profile count.
2925517e702SDimitry Andric void computeCallsiteToProfCountMap(Function *DuplicateFunction,
2935517e702SDimitry Andric DenseMap<User *, uint64_t> &SiteCountMap);
2945517e702SDimitry Andric
IsLimitReached__anona2f48faf0111::PartialInlinerImpl29551690af2SDimitry Andric bool IsLimitReached() {
29651690af2SDimitry Andric return (MaxNumPartialInlining != -1 &&
29751690af2SDimitry Andric NumPartialInlining >= MaxNumPartialInlining);
29851690af2SDimitry Andric }
2995517e702SDimitry Andric
getCallSite__anona2f48faf0111::PartialInlinerImpl30024d58133SDimitry Andric static CallSite getCallSite(User *U) {
3015517e702SDimitry Andric CallSite CS;
3025517e702SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(U))
3035517e702SDimitry Andric CS = CallSite(CI);
3045517e702SDimitry Andric else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
3055517e702SDimitry Andric CS = CallSite(II);
3065517e702SDimitry Andric else
3075517e702SDimitry Andric llvm_unreachable("All uses must be calls");
3085517e702SDimitry Andric return CS;
3095517e702SDimitry Andric }
3105517e702SDimitry Andric
getOneCallSiteTo__anona2f48faf0111::PartialInlinerImpl31124d58133SDimitry Andric static CallSite getOneCallSiteTo(Function *F) {
3125517e702SDimitry Andric User *User = *F->user_begin();
3135517e702SDimitry Andric return getCallSite(User);
3145517e702SDimitry Andric }
3155517e702SDimitry Andric
getOneDebugLoc__anona2f48faf0111::PartialInlinerImpl3165517e702SDimitry Andric std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
3175517e702SDimitry Andric CallSite CS = getOneCallSiteTo(F);
3185517e702SDimitry Andric DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
3195517e702SDimitry Andric BasicBlock *Block = CS.getParent();
3205517e702SDimitry Andric return std::make_tuple(DLoc, Block);
3215517e702SDimitry Andric }
3225517e702SDimitry Andric
3235517e702SDimitry Andric // Returns the costs associated with function outlining:
3245517e702SDimitry Andric // - The first value is the non-weighted runtime cost for making the call
32524d58133SDimitry Andric // to the outlined function, including the addtional setup cost in the
32624d58133SDimitry Andric // outlined function itself;
3275517e702SDimitry Andric // - The second value is the estimated size of the new call sequence in
32824d58133SDimitry Andric // basic block Cloner.OutliningCallBB;
32924d58133SDimitry Andric std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
3302cab237bSDimitry Andric
3315517e702SDimitry Andric // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
3325517e702SDimitry Andric // approximate both the size and runtime cost (Note that in the current
3335517e702SDimitry Andric // inline cost analysis, there is no clear distinction there either).
33424d58133SDimitry Andric static int computeBBInlineCost(BasicBlock *BB);
3355517e702SDimitry Andric
3365517e702SDimitry Andric std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
3372cab237bSDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo>
3384ba319b5SDimitry Andric computeOutliningColdRegionsInfo(Function *F, OptimizationRemarkEmitter &ORE);
339d88c1a5aSDimitry Andric };
340f37b6182SDimitry Andric
3413ca95b02SDimitry Andric struct PartialInlinerLegacyPass : public ModulePass {
342f22ef01cSRoman Divacky static char ID; // Pass identification, replacement for typeid
3432cab237bSDimitry Andric
PartialInlinerLegacyPass__anona2f48faf0111::PartialInlinerLegacyPass3443ca95b02SDimitry Andric PartialInlinerLegacyPass() : ModulePass(ID) {
3453ca95b02SDimitry Andric initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
3462754fe60SDimitry Andric }
347f22ef01cSRoman Divacky
getAnalysisUsage__anona2f48faf0111::PartialInlinerLegacyPass348d88c1a5aSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
349d88c1a5aSDimitry Andric AU.addRequired<AssumptionCacheTracker>();
350f37b6182SDimitry Andric AU.addRequired<ProfileSummaryInfoWrapperPass>();
351f37b6182SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
352d88c1a5aSDimitry Andric }
3532cab237bSDimitry Andric
runOnModule__anona2f48faf0111::PartialInlinerLegacyPass3543ca95b02SDimitry Andric bool runOnModule(Module &M) override {
3553ca95b02SDimitry Andric if (skipModule(M))
3563ca95b02SDimitry Andric return false;
357f22ef01cSRoman Divacky
358d88c1a5aSDimitry Andric AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
359f37b6182SDimitry Andric TargetTransformInfoWrapperPass *TTIWP =
360f37b6182SDimitry Andric &getAnalysis<TargetTransformInfoWrapperPass>();
361f37b6182SDimitry Andric ProfileSummaryInfo *PSI =
362*b5893f02SDimitry Andric &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
363f37b6182SDimitry Andric
364d88c1a5aSDimitry Andric std::function<AssumptionCache &(Function &)> GetAssumptionCache =
365d88c1a5aSDimitry Andric [&ACT](Function &F) -> AssumptionCache & {
366d88c1a5aSDimitry Andric return ACT->getAssumptionCache(F);
367d88c1a5aSDimitry Andric };
368f37b6182SDimitry Andric
369f37b6182SDimitry Andric std::function<TargetTransformInfo &(Function &)> GetTTI =
370f37b6182SDimitry Andric [&TTIWP](Function &F) -> TargetTransformInfo & {
371f37b6182SDimitry Andric return TTIWP->getTTI(F);
372f37b6182SDimitry Andric };
373f37b6182SDimitry Andric
3744ba319b5SDimitry Andric return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI)
3752cab237bSDimitry Andric .run(M);
376d88c1a5aSDimitry Andric }
377f22ef01cSRoman Divacky };
3782cab237bSDimitry Andric
3792cab237bSDimitry Andric } // end anonymous namespace
3802cab237bSDimitry Andric
3812cab237bSDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo>
computeOutliningColdRegionsInfo(Function * F,OptimizationRemarkEmitter & ORE)3824ba319b5SDimitry Andric PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
3834ba319b5SDimitry Andric OptimizationRemarkEmitter &ORE) {
3842cab237bSDimitry Andric BasicBlock *EntryBlock = &F->front();
3852cab237bSDimitry Andric
3862cab237bSDimitry Andric DominatorTree DT(*F);
3872cab237bSDimitry Andric LoopInfo LI(DT);
3882cab237bSDimitry Andric BranchProbabilityInfo BPI(*F, LI);
3892cab237bSDimitry Andric std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
3902cab237bSDimitry Andric BlockFrequencyInfo *BFI;
3912cab237bSDimitry Andric if (!GetBFI) {
3922cab237bSDimitry Andric ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI));
3932cab237bSDimitry Andric BFI = ScopedBFI.get();
3942cab237bSDimitry Andric } else
3952cab237bSDimitry Andric BFI = &(*GetBFI)(*F);
3962cab237bSDimitry Andric
3972cab237bSDimitry Andric // Return if we don't have profiling information.
3982cab237bSDimitry Andric if (!PSI->hasInstrumentationProfile())
3992cab237bSDimitry Andric return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
4002cab237bSDimitry Andric
4012cab237bSDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
4022cab237bSDimitry Andric llvm::make_unique<FunctionOutliningMultiRegionInfo>();
4032cab237bSDimitry Andric
4042cab237bSDimitry Andric auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
4052cab237bSDimitry Andric BasicBlock *Dom = BlockList.front();
406*b5893f02SDimitry Andric return BlockList.size() > 1 && Dom->hasNPredecessors(1);
4072cab237bSDimitry Andric };
4082cab237bSDimitry Andric
4092cab237bSDimitry Andric auto IsSingleExit =
4102cab237bSDimitry Andric [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
4112cab237bSDimitry Andric BasicBlock *ExitBlock = nullptr;
4122cab237bSDimitry Andric for (auto *Block : BlockList) {
4132cab237bSDimitry Andric for (auto SI = succ_begin(Block); SI != succ_end(Block); ++SI) {
4142cab237bSDimitry Andric if (!is_contained(BlockList, *SI)) {
4152cab237bSDimitry Andric if (ExitBlock) {
4162cab237bSDimitry Andric ORE.emit([&]() {
4172cab237bSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
4182cab237bSDimitry Andric &SI->front())
4192cab237bSDimitry Andric << "Region dominated by "
4202cab237bSDimitry Andric << ore::NV("Block", BlockList.front()->getName())
4212cab237bSDimitry Andric << " has more than one region exit edge.";
4222cab237bSDimitry Andric });
4232cab237bSDimitry Andric return nullptr;
4242cab237bSDimitry Andric } else
4252cab237bSDimitry Andric ExitBlock = Block;
4262cab237bSDimitry Andric }
4272cab237bSDimitry Andric }
4282cab237bSDimitry Andric }
4292cab237bSDimitry Andric return ExitBlock;
4302cab237bSDimitry Andric };
4312cab237bSDimitry Andric
4322cab237bSDimitry Andric auto BBProfileCount = [BFI](BasicBlock *BB) {
4332cab237bSDimitry Andric return BFI->getBlockProfileCount(BB)
4342cab237bSDimitry Andric ? BFI->getBlockProfileCount(BB).getValue()
4352cab237bSDimitry Andric : 0;
4362cab237bSDimitry Andric };
4372cab237bSDimitry Andric
4382cab237bSDimitry Andric // Use the same computeBBInlineCost function to compute the cost savings of
4392cab237bSDimitry Andric // the outlining the candidate region.
4402cab237bSDimitry Andric int OverallFunctionCost = 0;
4412cab237bSDimitry Andric for (auto &BB : *F)
4422cab237bSDimitry Andric OverallFunctionCost += computeBBInlineCost(&BB);
4432cab237bSDimitry Andric
4442cab237bSDimitry Andric #ifndef NDEBUG
4452cab237bSDimitry Andric if (TracePartialInlining)
4462cab237bSDimitry Andric dbgs() << "OverallFunctionCost = " << OverallFunctionCost << "\n";
4472cab237bSDimitry Andric #endif
4482cab237bSDimitry Andric int MinOutlineRegionCost =
4492cab237bSDimitry Andric static_cast<int>(OverallFunctionCost * MinRegionSizeRatio);
4502cab237bSDimitry Andric BranchProbability MinBranchProbability(
4512cab237bSDimitry Andric static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
4522cab237bSDimitry Andric MinBlockCounterExecution);
4532cab237bSDimitry Andric bool ColdCandidateFound = false;
4542cab237bSDimitry Andric BasicBlock *CurrEntry = EntryBlock;
4552cab237bSDimitry Andric std::vector<BasicBlock *> DFS;
4562cab237bSDimitry Andric DenseMap<BasicBlock *, bool> VisitedMap;
4572cab237bSDimitry Andric DFS.push_back(CurrEntry);
4582cab237bSDimitry Andric VisitedMap[CurrEntry] = true;
4592cab237bSDimitry Andric // Use Depth First Search on the basic blocks to find CFG edges that are
4602cab237bSDimitry Andric // considered cold.
4612cab237bSDimitry Andric // Cold regions considered must also have its inline cost compared to the
4622cab237bSDimitry Andric // overall inline cost of the original function. The region is outlined only
4632cab237bSDimitry Andric // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
4642cab237bSDimitry Andric // more.
4652cab237bSDimitry Andric while (!DFS.empty()) {
4662cab237bSDimitry Andric auto *thisBB = DFS.back();
4672cab237bSDimitry Andric DFS.pop_back();
4682cab237bSDimitry Andric // Only consider regions with predecessor blocks that are considered
4692cab237bSDimitry Andric // not-cold (default: part of the top 99.99% of all block counters)
4702cab237bSDimitry Andric // AND greater than our minimum block execution count (default: 100).
471*b5893f02SDimitry Andric if (PSI->isColdBlock(thisBB, BFI) ||
4722cab237bSDimitry Andric BBProfileCount(thisBB) < MinBlockCounterExecution)
4732cab237bSDimitry Andric continue;
4742cab237bSDimitry Andric for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) {
4752cab237bSDimitry Andric if (VisitedMap[*SI])
4762cab237bSDimitry Andric continue;
4772cab237bSDimitry Andric VisitedMap[*SI] = true;
4782cab237bSDimitry Andric DFS.push_back(*SI);
4792cab237bSDimitry Andric // If branch isn't cold, we skip to the next one.
4802cab237bSDimitry Andric BranchProbability SuccProb = BPI.getEdgeProbability(thisBB, *SI);
4812cab237bSDimitry Andric if (SuccProb > MinBranchProbability)
4822cab237bSDimitry Andric continue;
4832cab237bSDimitry Andric #ifndef NDEBUG
4842cab237bSDimitry Andric if (TracePartialInlining) {
4852cab237bSDimitry Andric dbgs() << "Found cold edge: " << thisBB->getName() << "->"
4862cab237bSDimitry Andric << (*SI)->getName() << "\nBranch Probability = " << SuccProb
4872cab237bSDimitry Andric << "\n";
4882cab237bSDimitry Andric }
4892cab237bSDimitry Andric #endif
4902cab237bSDimitry Andric SmallVector<BasicBlock *, 8> DominateVector;
4912cab237bSDimitry Andric DT.getDescendants(*SI, DominateVector);
4922cab237bSDimitry Andric // We can only outline single entry regions (for now).
4932cab237bSDimitry Andric if (!IsSingleEntry(DominateVector))
4942cab237bSDimitry Andric continue;
4952cab237bSDimitry Andric BasicBlock *ExitBlock = nullptr;
4962cab237bSDimitry Andric // We can only outline single exit regions (for now).
4972cab237bSDimitry Andric if (!(ExitBlock = IsSingleExit(DominateVector)))
4982cab237bSDimitry Andric continue;
4992cab237bSDimitry Andric int OutlineRegionCost = 0;
5002cab237bSDimitry Andric for (auto *BB : DominateVector)
5012cab237bSDimitry Andric OutlineRegionCost += computeBBInlineCost(BB);
5022cab237bSDimitry Andric
5032cab237bSDimitry Andric #ifndef NDEBUG
5042cab237bSDimitry Andric if (TracePartialInlining)
5052cab237bSDimitry Andric dbgs() << "OutlineRegionCost = " << OutlineRegionCost << "\n";
5062cab237bSDimitry Andric #endif
5072cab237bSDimitry Andric
5082cab237bSDimitry Andric if (OutlineRegionCost < MinOutlineRegionCost) {
5092cab237bSDimitry Andric ORE.emit([&]() {
5102cab237bSDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
5112cab237bSDimitry Andric &SI->front())
5122cab237bSDimitry Andric << ore::NV("Callee", F) << " inline cost-savings smaller than "
5132cab237bSDimitry Andric << ore::NV("Cost", MinOutlineRegionCost);
5142cab237bSDimitry Andric });
5152cab237bSDimitry Andric continue;
5162cab237bSDimitry Andric }
5172cab237bSDimitry Andric // For now, ignore blocks that belong to a SISE region that is a
5182cab237bSDimitry Andric // candidate for outlining. In the future, we may want to look
5192cab237bSDimitry Andric // at inner regions because the outer region may have live-exit
5202cab237bSDimitry Andric // variables.
5212cab237bSDimitry Andric for (auto *BB : DominateVector)
5222cab237bSDimitry Andric VisitedMap[BB] = true;
5232cab237bSDimitry Andric // ReturnBlock here means the block after the outline call
5242cab237bSDimitry Andric BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
5252cab237bSDimitry Andric // assert(ReturnBlock && "ReturnBlock is NULL somehow!");
5262cab237bSDimitry Andric FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
5272cab237bSDimitry Andric DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
5282cab237bSDimitry Andric RegInfo.Region = DominateVector;
5292cab237bSDimitry Andric OutliningInfo->ORI.push_back(RegInfo);
5302cab237bSDimitry Andric #ifndef NDEBUG
5312cab237bSDimitry Andric if (TracePartialInlining) {
5322cab237bSDimitry Andric dbgs() << "Found Cold Candidate starting at block: "
5332cab237bSDimitry Andric << DominateVector.front()->getName() << "\n";
5342cab237bSDimitry Andric }
5352cab237bSDimitry Andric #endif
5362cab237bSDimitry Andric ColdCandidateFound = true;
5372cab237bSDimitry Andric NumColdRegionsFound++;
5382cab237bSDimitry Andric }
5392cab237bSDimitry Andric }
5402cab237bSDimitry Andric if (ColdCandidateFound)
5412cab237bSDimitry Andric return OutliningInfo;
5422cab237bSDimitry Andric else
5432cab237bSDimitry Andric return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
5443dac3a9bSDimitry Andric }
545f22ef01cSRoman Divacky
546f37b6182SDimitry Andric std::unique_ptr<FunctionOutliningInfo>
computeOutliningInfo(Function * F)547f37b6182SDimitry Andric PartialInlinerImpl::computeOutliningInfo(Function *F) {
548d88c1a5aSDimitry Andric BasicBlock *EntryBlock = &F->front();
549d88c1a5aSDimitry Andric BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
550f22ef01cSRoman Divacky if (!BR || BR->isUnconditional())
551f37b6182SDimitry Andric return std::unique_ptr<FunctionOutliningInfo>();
552f22ef01cSRoman Divacky
553f37b6182SDimitry Andric // Returns true if Succ is BB's successor
554f37b6182SDimitry Andric auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
555f37b6182SDimitry Andric return is_contained(successors(BB), Succ);
556f37b6182SDimitry Andric };
557f37b6182SDimitry Andric
558f37b6182SDimitry Andric auto IsReturnBlock = [](BasicBlock *BB) {
559*b5893f02SDimitry Andric Instruction *TI = BB->getTerminator();
560f37b6182SDimitry Andric return isa<ReturnInst>(TI);
561f37b6182SDimitry Andric };
562f37b6182SDimitry Andric
5635517e702SDimitry Andric auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
564f37b6182SDimitry Andric if (IsReturnBlock(Succ1))
565f37b6182SDimitry Andric return std::make_tuple(Succ1, Succ2);
566f37b6182SDimitry Andric if (IsReturnBlock(Succ2))
567f37b6182SDimitry Andric return std::make_tuple(Succ2, Succ1);
568f37b6182SDimitry Andric
569f37b6182SDimitry Andric return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
570f37b6182SDimitry Andric };
571f37b6182SDimitry Andric
572f37b6182SDimitry Andric // Detect a triangular shape:
5735517e702SDimitry Andric auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
574f37b6182SDimitry Andric if (IsSuccessor(Succ1, Succ2))
575f37b6182SDimitry Andric return std::make_tuple(Succ1, Succ2);
576f37b6182SDimitry Andric if (IsSuccessor(Succ2, Succ1))
577f37b6182SDimitry Andric return std::make_tuple(Succ2, Succ1);
578f37b6182SDimitry Andric
579f37b6182SDimitry Andric return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
580f37b6182SDimitry Andric };
581f37b6182SDimitry Andric
582f37b6182SDimitry Andric std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
583f37b6182SDimitry Andric llvm::make_unique<FunctionOutliningInfo>();
584f37b6182SDimitry Andric
585f37b6182SDimitry Andric BasicBlock *CurrEntry = EntryBlock;
586f37b6182SDimitry Andric bool CandidateFound = false;
587f37b6182SDimitry Andric do {
588f37b6182SDimitry Andric // The number of blocks to be inlined has already reached
589f37b6182SDimitry Andric // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
590f37b6182SDimitry Andric // disables partial inlining for the function.
591f37b6182SDimitry Andric if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
592f37b6182SDimitry Andric break;
593f37b6182SDimitry Andric
5944ba319b5SDimitry Andric if (succ_size(CurrEntry) != 2)
595f37b6182SDimitry Andric break;
596f37b6182SDimitry Andric
597f37b6182SDimitry Andric BasicBlock *Succ1 = *succ_begin(CurrEntry);
598f37b6182SDimitry Andric BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
599f37b6182SDimitry Andric
600f37b6182SDimitry Andric BasicBlock *ReturnBlock, *NonReturnBlock;
601f37b6182SDimitry Andric std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
602f37b6182SDimitry Andric
603f37b6182SDimitry Andric if (ReturnBlock) {
604f37b6182SDimitry Andric OutliningInfo->Entries.push_back(CurrEntry);
605f37b6182SDimitry Andric OutliningInfo->ReturnBlock = ReturnBlock;
606f37b6182SDimitry Andric OutliningInfo->NonReturnBlock = NonReturnBlock;
607f37b6182SDimitry Andric CandidateFound = true;
608f37b6182SDimitry Andric break;
609ff0cc061SDimitry Andric }
610f22ef01cSRoman Divacky
611f37b6182SDimitry Andric BasicBlock *CommSucc;
612f37b6182SDimitry Andric BasicBlock *OtherSucc;
613f37b6182SDimitry Andric std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
614f37b6182SDimitry Andric
615f37b6182SDimitry Andric if (!CommSucc)
616f37b6182SDimitry Andric break;
617f37b6182SDimitry Andric
618f37b6182SDimitry Andric OutliningInfo->Entries.push_back(CurrEntry);
619f37b6182SDimitry Andric CurrEntry = OtherSucc;
620f37b6182SDimitry Andric } while (true);
621f37b6182SDimitry Andric
622f37b6182SDimitry Andric if (!CandidateFound)
623f37b6182SDimitry Andric return std::unique_ptr<FunctionOutliningInfo>();
624f37b6182SDimitry Andric
625f37b6182SDimitry Andric // Do sanity check of the entries: threre should not
626f37b6182SDimitry Andric // be any successors (not in the entry set) other than
627f37b6182SDimitry Andric // {ReturnBlock, NonReturnBlock}
6285517e702SDimitry Andric assert(OutliningInfo->Entries[0] == &F->front() &&
6295517e702SDimitry Andric "Function Entry must be the first in Entries vector");
630f37b6182SDimitry Andric DenseSet<BasicBlock *> Entries;
631f37b6182SDimitry Andric for (BasicBlock *E : OutliningInfo->Entries)
632f37b6182SDimitry Andric Entries.insert(E);
633f37b6182SDimitry Andric
634f37b6182SDimitry Andric // Returns true of BB has Predecessor which is not
635f37b6182SDimitry Andric // in Entries set.
636f37b6182SDimitry Andric auto HasNonEntryPred = [Entries](BasicBlock *BB) {
637f37b6182SDimitry Andric for (auto Pred : predecessors(BB)) {
638f37b6182SDimitry Andric if (!Entries.count(Pred))
639f37b6182SDimitry Andric return true;
640f37b6182SDimitry Andric }
641f37b6182SDimitry Andric return false;
642f37b6182SDimitry Andric };
643f37b6182SDimitry Andric auto CheckAndNormalizeCandidate =
644f37b6182SDimitry Andric [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
645f37b6182SDimitry Andric for (BasicBlock *E : OutliningInfo->Entries) {
646f37b6182SDimitry Andric for (auto Succ : successors(E)) {
647f37b6182SDimitry Andric if (Entries.count(Succ))
648f37b6182SDimitry Andric continue;
649f37b6182SDimitry Andric if (Succ == OutliningInfo->ReturnBlock)
650f37b6182SDimitry Andric OutliningInfo->ReturnBlockPreds.push_back(E);
651f37b6182SDimitry Andric else if (Succ != OutliningInfo->NonReturnBlock)
652f37b6182SDimitry Andric return false;
653f37b6182SDimitry Andric }
654f37b6182SDimitry Andric // There should not be any outside incoming edges either:
655f37b6182SDimitry Andric if (HasNonEntryPred(E))
656f37b6182SDimitry Andric return false;
657f37b6182SDimitry Andric }
658f37b6182SDimitry Andric return true;
659f37b6182SDimitry Andric };
660f37b6182SDimitry Andric
661f37b6182SDimitry Andric if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
662f37b6182SDimitry Andric return std::unique_ptr<FunctionOutliningInfo>();
663f37b6182SDimitry Andric
664f37b6182SDimitry Andric // Now further growing the candidate's inlining region by
665f37b6182SDimitry Andric // peeling off dominating blocks from the outlining region:
666f37b6182SDimitry Andric while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
667f37b6182SDimitry Andric BasicBlock *Cand = OutliningInfo->NonReturnBlock;
6684ba319b5SDimitry Andric if (succ_size(Cand) != 2)
669f37b6182SDimitry Andric break;
670f37b6182SDimitry Andric
671f37b6182SDimitry Andric if (HasNonEntryPred(Cand))
672f37b6182SDimitry Andric break;
673f37b6182SDimitry Andric
674f37b6182SDimitry Andric BasicBlock *Succ1 = *succ_begin(Cand);
675f37b6182SDimitry Andric BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
676f37b6182SDimitry Andric
677f37b6182SDimitry Andric BasicBlock *ReturnBlock, *NonReturnBlock;
678f37b6182SDimitry Andric std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
679f37b6182SDimitry Andric if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
680f37b6182SDimitry Andric break;
681f37b6182SDimitry Andric
682f37b6182SDimitry Andric if (NonReturnBlock->getSinglePredecessor() != Cand)
683f37b6182SDimitry Andric break;
684f37b6182SDimitry Andric
685f37b6182SDimitry Andric // Now grow and update OutlininigInfo:
686f37b6182SDimitry Andric OutliningInfo->Entries.push_back(Cand);
687f37b6182SDimitry Andric OutliningInfo->NonReturnBlock = NonReturnBlock;
688f37b6182SDimitry Andric OutliningInfo->ReturnBlockPreds.push_back(Cand);
689f37b6182SDimitry Andric Entries.insert(Cand);
690f37b6182SDimitry Andric }
691f37b6182SDimitry Andric
692f37b6182SDimitry Andric return OutliningInfo;
693f37b6182SDimitry Andric }
694f37b6182SDimitry Andric
6955517e702SDimitry Andric // Check if there is PGO data or user annoated branch data:
hasProfileData(Function * F,FunctionOutliningInfo * OI)6965517e702SDimitry Andric static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
697da09e106SDimitry Andric if (F->hasProfileData())
6985517e702SDimitry Andric return true;
6995517e702SDimitry Andric // Now check if any of the entry block has MD_prof data:
7005517e702SDimitry Andric for (auto *E : OI->Entries) {
7015517e702SDimitry Andric BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
7025517e702SDimitry Andric if (!BR || BR->isUnconditional())
7035517e702SDimitry Andric continue;
7045517e702SDimitry Andric uint64_t T, F;
7055517e702SDimitry Andric if (BR->extractProfMetadata(T, F))
7065517e702SDimitry Andric return true;
7075517e702SDimitry Andric }
7085517e702SDimitry Andric return false;
7095517e702SDimitry Andric }
7105517e702SDimitry Andric
71124d58133SDimitry Andric BranchProbability
getOutliningCallBBRelativeFreq(FunctionCloner & Cloner)71224d58133SDimitry Andric PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
7132cab237bSDimitry Andric BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
7145517e702SDimitry Andric auto EntryFreq =
71524d58133SDimitry Andric Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
71624d58133SDimitry Andric auto OutliningCallFreq =
7172cab237bSDimitry Andric Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
7182cab237bSDimitry Andric // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
7192cab237bSDimitry Andric // we outlined any regions, so we may encounter situations where the
7202cab237bSDimitry Andric // OutliningCallFreq is *slightly* bigger than the EntryFreq.
7212cab237bSDimitry Andric if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) {
7222cab237bSDimitry Andric OutliningCallFreq = EntryFreq;
7232cab237bSDimitry Andric }
7242cab237bSDimitry Andric auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
7252cab237bSDimitry Andric OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
7265517e702SDimitry Andric
72724d58133SDimitry Andric if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
7285517e702SDimitry Andric return OutlineRegionRelFreq;
7295517e702SDimitry Andric
7306d97bb29SDimitry Andric // When profile data is not available, we need to be conservative in
7316d97bb29SDimitry Andric // estimating the overall savings. Static branch prediction can usually
7326d97bb29SDimitry Andric // guess the branch direction right (taken/non-taken), but the guessed
7336d97bb29SDimitry Andric // branch probability is usually not biased enough. In case when the
7346d97bb29SDimitry Andric // outlined region is predicted to be likely, its probability needs
7356d97bb29SDimitry Andric // to be made higher (more biased) to not under-estimate the cost of
7366d97bb29SDimitry Andric // function outlining. On the other hand, if the outlined region
7376d97bb29SDimitry Andric // is predicted to be less likely, the predicted probablity is usually
7386d97bb29SDimitry Andric // higher than the actual. For instance, the actual probability of the
7396d97bb29SDimitry Andric // less likely target is only 5%, but the guessed probablity can be
7406d97bb29SDimitry Andric // 40%. In the latter case, there is no need for further adjustement.
7416d97bb29SDimitry Andric // FIXME: add an option for this.
7426d97bb29SDimitry Andric if (OutlineRegionRelFreq < BranchProbability(45, 100))
7436d97bb29SDimitry Andric return OutlineRegionRelFreq;
7446d97bb29SDimitry Andric
7456d97bb29SDimitry Andric OutlineRegionRelFreq = std::max(
7466d97bb29SDimitry Andric OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
7475517e702SDimitry Andric
7485517e702SDimitry Andric return OutlineRegionRelFreq;
7495517e702SDimitry Andric }
7505517e702SDimitry Andric
shouldPartialInline(CallSite CS,FunctionCloner & Cloner,BlockFrequency WeightedOutliningRcost,OptimizationRemarkEmitter & ORE)7515517e702SDimitry Andric bool PartialInlinerImpl::shouldPartialInline(
7522cab237bSDimitry Andric CallSite CS, FunctionCloner &Cloner,
7534ba319b5SDimitry Andric BlockFrequency WeightedOutliningRcost,
7544ba319b5SDimitry Andric OptimizationRemarkEmitter &ORE) {
755f37b6182SDimitry Andric using namespace ore;
7562cab237bSDimitry Andric
757f37b6182SDimitry Andric Instruction *Call = CS.getInstruction();
758f37b6182SDimitry Andric Function *Callee = CS.getCalledFunction();
75924d58133SDimitry Andric assert(Callee == Cloner.ClonedFunc);
76024d58133SDimitry Andric
7614ba319b5SDimitry Andric if (SkipCostAnalysis)
7624ba319b5SDimitry Andric return isInlineViable(*Callee);
7634ba319b5SDimitry Andric
764f37b6182SDimitry Andric Function *Caller = CS.getCaller();
765f37b6182SDimitry Andric auto &CalleeTTI = (*GetTTI)(*Callee);
766f37b6182SDimitry Andric InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
7672cab237bSDimitry Andric *GetAssumptionCache, GetBFI, PSI, &ORE);
768f37b6182SDimitry Andric
769f37b6182SDimitry Andric if (IC.isAlways()) {
7702cab237bSDimitry Andric ORE.emit([&]() {
7712cab237bSDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
77224d58133SDimitry Andric << NV("Callee", Cloner.OrigFunc)
7732cab237bSDimitry Andric << " should always be fully inlined, not partially";
7742cab237bSDimitry Andric });
775f37b6182SDimitry Andric return false;
776f37b6182SDimitry Andric }
777f37b6182SDimitry Andric
778f37b6182SDimitry Andric if (IC.isNever()) {
7792cab237bSDimitry Andric ORE.emit([&]() {
7802cab237bSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
78124d58133SDimitry Andric << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
782f37b6182SDimitry Andric << NV("Caller", Caller)
7832cab237bSDimitry Andric << " because it should never be inlined (cost=never)";
7842cab237bSDimitry Andric });
785f37b6182SDimitry Andric return false;
786f37b6182SDimitry Andric }
787f37b6182SDimitry Andric
788f37b6182SDimitry Andric if (!IC) {
7892cab237bSDimitry Andric ORE.emit([&]() {
7902cab237bSDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
79124d58133SDimitry Andric << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
792f37b6182SDimitry Andric << NV("Caller", Caller) << " because too costly to inline (cost="
793f37b6182SDimitry Andric << NV("Cost", IC.getCost()) << ", threshold="
7942cab237bSDimitry Andric << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
7952cab237bSDimitry Andric });
796f37b6182SDimitry Andric return false;
797f37b6182SDimitry Andric }
7985517e702SDimitry Andric const DataLayout &DL = Caller->getParent()->getDataLayout();
79924d58133SDimitry Andric
8005517e702SDimitry Andric // The savings of eliminating the call:
8015517e702SDimitry Andric int NonWeightedSavings = getCallsiteCost(CS, DL);
8025517e702SDimitry Andric BlockFrequency NormWeightedSavings(NonWeightedSavings);
8035517e702SDimitry Andric
8045517e702SDimitry Andric // Weighted saving is smaller than weighted cost, return false
80524d58133SDimitry Andric if (NormWeightedSavings < WeightedOutliningRcost) {
8062cab237bSDimitry Andric ORE.emit([&]() {
8072cab237bSDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
8082cab237bSDimitry Andric Call)
80924d58133SDimitry Andric << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
8105517e702SDimitry Andric << NV("Caller", Caller) << " runtime overhead (overhead="
81124d58133SDimitry Andric << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
8125517e702SDimitry Andric << ", savings="
8132cab237bSDimitry Andric << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
8142cab237bSDimitry Andric << ")"
8152cab237bSDimitry Andric << " of making the outlined call is too high";
8162cab237bSDimitry Andric });
8175517e702SDimitry Andric
8185517e702SDimitry Andric return false;
8195517e702SDimitry Andric }
820f37b6182SDimitry Andric
8212cab237bSDimitry Andric ORE.emit([&]() {
8222cab237bSDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
82324d58133SDimitry Andric << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
824f37b6182SDimitry Andric << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
825f37b6182SDimitry Andric << " (threshold="
8262cab237bSDimitry Andric << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
8272cab237bSDimitry Andric });
828f37b6182SDimitry Andric return true;
829f37b6182SDimitry Andric }
830f37b6182SDimitry Andric
8315517e702SDimitry Andric // TODO: Ideally we should share Inliner's InlineCost Analysis code.
8325517e702SDimitry Andric // For now use a simplified version. The returned 'InlineCost' will be used
8335517e702SDimitry Andric // to esimate the size cost as well as runtime cost of the BB.
computeBBInlineCost(BasicBlock * BB)8345517e702SDimitry Andric int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
8355517e702SDimitry Andric int InlineCost = 0;
8365517e702SDimitry Andric const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
837*b5893f02SDimitry Andric for (Instruction &I : BB->instructionsWithoutDebug()) {
838*b5893f02SDimitry Andric // Skip free instructions.
839*b5893f02SDimitry Andric switch (I.getOpcode()) {
8406d97bb29SDimitry Andric case Instruction::BitCast:
8416d97bb29SDimitry Andric case Instruction::PtrToInt:
8426d97bb29SDimitry Andric case Instruction::IntToPtr:
8436d97bb29SDimitry Andric case Instruction::Alloca:
844*b5893f02SDimitry Andric case Instruction::PHI:
8456d97bb29SDimitry Andric continue;
8466d97bb29SDimitry Andric case Instruction::GetElementPtr:
847*b5893f02SDimitry Andric if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
8486d97bb29SDimitry Andric continue;
849da09e106SDimitry Andric break;
8506d97bb29SDimitry Andric default:
8516d97bb29SDimitry Andric break;
8526d97bb29SDimitry Andric }
8536d97bb29SDimitry Andric
854*b5893f02SDimitry Andric if (I.isLifetimeStartOrEnd())
8556d97bb29SDimitry Andric continue;
8566d97bb29SDimitry Andric
857*b5893f02SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&I)) {
8585517e702SDimitry Andric InlineCost += getCallsiteCost(CallSite(CI), DL);
8595517e702SDimitry Andric continue;
8605517e702SDimitry Andric }
8615517e702SDimitry Andric
862*b5893f02SDimitry Andric if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
8635517e702SDimitry Andric InlineCost += getCallsiteCost(CallSite(II), DL);
8645517e702SDimitry Andric continue;
8655517e702SDimitry Andric }
8665517e702SDimitry Andric
867*b5893f02SDimitry Andric if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
8685517e702SDimitry Andric InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
8695517e702SDimitry Andric continue;
8705517e702SDimitry Andric }
8715517e702SDimitry Andric InlineCost += InlineConstants::InstrCost;
8725517e702SDimitry Andric }
8735517e702SDimitry Andric return InlineCost;
8745517e702SDimitry Andric }
8755517e702SDimitry Andric
87624d58133SDimitry Andric std::tuple<int, int>
computeOutliningCosts(FunctionCloner & Cloner)87724d58133SDimitry Andric PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
8782cab237bSDimitry Andric int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
8792cab237bSDimitry Andric for (auto FuncBBPair : Cloner.OutlinedFunctions) {
8802cab237bSDimitry Andric Function *OutlinedFunc = FuncBBPair.first;
8812cab237bSDimitry Andric BasicBlock* OutliningCallBB = FuncBBPair.second;
8825517e702SDimitry Andric // Now compute the cost of the call sequence to the outlined function
8835517e702SDimitry Andric // 'OutlinedFunction' in BB 'OutliningCallBB':
8842cab237bSDimitry Andric OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
8855517e702SDimitry Andric
8865517e702SDimitry Andric // Now compute the cost of the extracted/outlined function itself:
8872cab237bSDimitry Andric for (BasicBlock &BB : *OutlinedFunc)
8885517e702SDimitry Andric OutlinedFunctionCost += computeBBInlineCost(&BB);
8895517e702SDimitry Andric }
89024d58133SDimitry Andric assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
8915517e702SDimitry Andric "Outlined function cost should be no less than the outlined region");
8922cab237bSDimitry Andric
8936d97bb29SDimitry Andric // The code extractor introduces a new root and exit stub blocks with
8946d97bb29SDimitry Andric // additional unconditional branches. Those branches will be eliminated
8956d97bb29SDimitry Andric // later with bb layout. The cost should be adjusted accordingly:
8962cab237bSDimitry Andric OutlinedFunctionCost -=
8972cab237bSDimitry Andric 2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
8986d97bb29SDimitry Andric
89924d58133SDimitry Andric int OutliningRuntimeOverhead =
90024d58133SDimitry Andric OutliningFuncCallCost +
90124d58133SDimitry Andric (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
9026d97bb29SDimitry Andric ExtraOutliningPenalty;
9035517e702SDimitry Andric
90424d58133SDimitry Andric return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
9055517e702SDimitry Andric }
9065517e702SDimitry Andric
9075517e702SDimitry Andric // Create the callsite to profile count map which is
9085517e702SDimitry Andric // used to update the original function's entry count,
9095517e702SDimitry Andric // after the function is partially inlined into the callsite.
computeCallsiteToProfCountMap(Function * DuplicateFunction,DenseMap<User *,uint64_t> & CallSiteToProfCountMap)9105517e702SDimitry Andric void PartialInlinerImpl::computeCallsiteToProfCountMap(
9115517e702SDimitry Andric Function *DuplicateFunction,
9125517e702SDimitry Andric DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
9135517e702SDimitry Andric std::vector<User *> Users(DuplicateFunction->user_begin(),
9145517e702SDimitry Andric DuplicateFunction->user_end());
9155517e702SDimitry Andric Function *CurrentCaller = nullptr;
916302affcbSDimitry Andric std::unique_ptr<BlockFrequencyInfo> TempBFI;
9175517e702SDimitry Andric BlockFrequencyInfo *CurrentCallerBFI = nullptr;
9185517e702SDimitry Andric
9195517e702SDimitry Andric auto ComputeCurrBFI = [&,this](Function *Caller) {
9205517e702SDimitry Andric // For the old pass manager:
9215517e702SDimitry Andric if (!GetBFI) {
9225517e702SDimitry Andric DominatorTree DT(*Caller);
9235517e702SDimitry Andric LoopInfo LI(DT);
9245517e702SDimitry Andric BranchProbabilityInfo BPI(*Caller, LI);
925302affcbSDimitry Andric TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
926302affcbSDimitry Andric CurrentCallerBFI = TempBFI.get();
9275517e702SDimitry Andric } else {
9285517e702SDimitry Andric // New pass manager:
9295517e702SDimitry Andric CurrentCallerBFI = &(*GetBFI)(*Caller);
9305517e702SDimitry Andric }
9315517e702SDimitry Andric };
9325517e702SDimitry Andric
9335517e702SDimitry Andric for (User *User : Users) {
9345517e702SDimitry Andric CallSite CS = getCallSite(User);
9355517e702SDimitry Andric Function *Caller = CS.getCaller();
9365517e702SDimitry Andric if (CurrentCaller != Caller) {
9375517e702SDimitry Andric CurrentCaller = Caller;
9385517e702SDimitry Andric ComputeCurrBFI(Caller);
9395517e702SDimitry Andric } else {
9405517e702SDimitry Andric assert(CurrentCallerBFI && "CallerBFI is not set");
9415517e702SDimitry Andric }
9425517e702SDimitry Andric BasicBlock *CallBB = CS.getInstruction()->getParent();
9435517e702SDimitry Andric auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
9445517e702SDimitry Andric if (Count)
9455517e702SDimitry Andric CallSiteToProfCountMap[User] = *Count;
9465517e702SDimitry Andric else
9475517e702SDimitry Andric CallSiteToProfCountMap[User] = 0;
9485517e702SDimitry Andric }
9495517e702SDimitry Andric }
9505517e702SDimitry Andric
FunctionCloner(Function * F,FunctionOutliningInfo * OI,OptimizationRemarkEmitter & ORE)9512cab237bSDimitry Andric PartialInlinerImpl::FunctionCloner::FunctionCloner(
9522cab237bSDimitry Andric Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE)
9532cab237bSDimitry Andric : OrigFunc(F), ORE(ORE) {
95424d58133SDimitry Andric ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
95524d58133SDimitry Andric
95624d58133SDimitry Andric // Clone the function, so that we can hack away on it.
95724d58133SDimitry Andric ValueToValueMapTy VMap;
95824d58133SDimitry Andric ClonedFunc = CloneFunction(F, VMap);
95924d58133SDimitry Andric
96024d58133SDimitry Andric ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
96124d58133SDimitry Andric ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
96224d58133SDimitry Andric for (BasicBlock *BB : OI->Entries) {
96324d58133SDimitry Andric ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
96424d58133SDimitry Andric }
96524d58133SDimitry Andric for (BasicBlock *E : OI->ReturnBlockPreds) {
96624d58133SDimitry Andric BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
96724d58133SDimitry Andric ClonedOI->ReturnBlockPreds.push_back(NewE);
96824d58133SDimitry Andric }
96924d58133SDimitry Andric // Go ahead and update all uses to the duplicate, so that we can just
97024d58133SDimitry Andric // use the inliner functionality when we're done hacking.
97124d58133SDimitry Andric F->replaceAllUsesWith(ClonedFunc);
97224d58133SDimitry Andric }
97324d58133SDimitry Andric
FunctionCloner(Function * F,FunctionOutliningMultiRegionInfo * OI,OptimizationRemarkEmitter & ORE)9742cab237bSDimitry Andric PartialInlinerImpl::FunctionCloner::FunctionCloner(
9752cab237bSDimitry Andric Function *F, FunctionOutliningMultiRegionInfo *OI,
9762cab237bSDimitry Andric OptimizationRemarkEmitter &ORE)
9772cab237bSDimitry Andric : OrigFunc(F), ORE(ORE) {
9782cab237bSDimitry Andric ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
97924d58133SDimitry Andric
9802cab237bSDimitry Andric // Clone the function, so that we can hack away on it.
9812cab237bSDimitry Andric ValueToValueMapTy VMap;
9822cab237bSDimitry Andric ClonedFunc = CloneFunction(F, VMap);
9832cab237bSDimitry Andric
9842cab237bSDimitry Andric // Go through all Outline Candidate Regions and update all BasicBlock
9852cab237bSDimitry Andric // information.
9862cab237bSDimitry Andric for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
9872cab237bSDimitry Andric OI->ORI) {
9882cab237bSDimitry Andric SmallVector<BasicBlock *, 8> Region;
9892cab237bSDimitry Andric for (BasicBlock *BB : RegionInfo.Region) {
9902cab237bSDimitry Andric Region.push_back(cast<BasicBlock>(VMap[BB]));
9912cab237bSDimitry Andric }
9922cab237bSDimitry Andric BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
9932cab237bSDimitry Andric BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
9942cab237bSDimitry Andric BasicBlock *NewReturnBlock = nullptr;
9952cab237bSDimitry Andric if (RegionInfo.ReturnBlock)
9962cab237bSDimitry Andric NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
9972cab237bSDimitry Andric FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
9982cab237bSDimitry Andric Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
9992cab237bSDimitry Andric ClonedOMRI->ORI.push_back(MappedRegionInfo);
10002cab237bSDimitry Andric }
10012cab237bSDimitry Andric // Go ahead and update all uses to the duplicate, so that we can just
10022cab237bSDimitry Andric // use the inliner functionality when we're done hacking.
10032cab237bSDimitry Andric F->replaceAllUsesWith(ClonedFunc);
10042cab237bSDimitry Andric }
10052cab237bSDimitry Andric
NormalizeReturnBlock()10062cab237bSDimitry Andric void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
100724d58133SDimitry Andric auto getFirstPHI = [](BasicBlock *BB) {
100824d58133SDimitry Andric BasicBlock::iterator I = BB->begin();
100924d58133SDimitry Andric PHINode *FirstPhi = nullptr;
101024d58133SDimitry Andric while (I != BB->end()) {
101124d58133SDimitry Andric PHINode *Phi = dyn_cast<PHINode>(I);
101224d58133SDimitry Andric if (!Phi)
101324d58133SDimitry Andric break;
101424d58133SDimitry Andric if (!FirstPhi) {
101524d58133SDimitry Andric FirstPhi = Phi;
101624d58133SDimitry Andric break;
101724d58133SDimitry Andric }
101824d58133SDimitry Andric }
101924d58133SDimitry Andric return FirstPhi;
102024d58133SDimitry Andric };
102124d58133SDimitry Andric
10222cab237bSDimitry Andric // Shouldn't need to normalize PHIs if we're not outlining non-early return
10232cab237bSDimitry Andric // blocks.
10242cab237bSDimitry Andric if (!ClonedOI)
10252cab237bSDimitry Andric return;
10262cab237bSDimitry Andric
102724d58133SDimitry Andric // Special hackery is needed with PHI nodes that have inputs from more than
102824d58133SDimitry Andric // one extracted block. For simplicity, just split the PHIs into a two-level
102924d58133SDimitry Andric // sequence of PHIs, some of which will go in the extracted region, and some
103024d58133SDimitry Andric // of which will go outside.
103124d58133SDimitry Andric BasicBlock *PreReturn = ClonedOI->ReturnBlock;
103224d58133SDimitry Andric // only split block when necessary:
103324d58133SDimitry Andric PHINode *FirstPhi = getFirstPHI(PreReturn);
103424d58133SDimitry Andric unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
103524d58133SDimitry Andric
103624d58133SDimitry Andric if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
103724d58133SDimitry Andric return;
103824d58133SDimitry Andric
103924d58133SDimitry Andric auto IsTrivialPhi = [](PHINode *PN) -> Value * {
104024d58133SDimitry Andric Value *CommonValue = PN->getIncomingValue(0);
104124d58133SDimitry Andric if (all_of(PN->incoming_values(),
104224d58133SDimitry Andric [&](Value *V) { return V == CommonValue; }))
104324d58133SDimitry Andric return CommonValue;
104424d58133SDimitry Andric return nullptr;
104524d58133SDimitry Andric };
104624d58133SDimitry Andric
104724d58133SDimitry Andric ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
104824d58133SDimitry Andric ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
104924d58133SDimitry Andric BasicBlock::iterator I = PreReturn->begin();
105024d58133SDimitry Andric Instruction *Ins = &ClonedOI->ReturnBlock->front();
105124d58133SDimitry Andric SmallVector<Instruction *, 4> DeadPhis;
105224d58133SDimitry Andric while (I != PreReturn->end()) {
105324d58133SDimitry Andric PHINode *OldPhi = dyn_cast<PHINode>(I);
105424d58133SDimitry Andric if (!OldPhi)
105524d58133SDimitry Andric break;
105624d58133SDimitry Andric
105724d58133SDimitry Andric PHINode *RetPhi =
105824d58133SDimitry Andric PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
105924d58133SDimitry Andric OldPhi->replaceAllUsesWith(RetPhi);
106024d58133SDimitry Andric Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
106124d58133SDimitry Andric
106224d58133SDimitry Andric RetPhi->addIncoming(&*I, PreReturn);
106324d58133SDimitry Andric for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
106424d58133SDimitry Andric RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
106524d58133SDimitry Andric OldPhi->removeIncomingValue(E);
106624d58133SDimitry Andric }
106724d58133SDimitry Andric
106824d58133SDimitry Andric // After incoming values splitting, the old phi may become trivial.
106924d58133SDimitry Andric // Keeping the trivial phi can introduce definition inside the outline
107024d58133SDimitry Andric // region which is live-out, causing necessary overhead (load, store
107124d58133SDimitry Andric // arg passing etc).
107224d58133SDimitry Andric if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
107324d58133SDimitry Andric OldPhi->replaceAllUsesWith(OldPhiVal);
107424d58133SDimitry Andric DeadPhis.push_back(OldPhi);
107524d58133SDimitry Andric }
107624d58133SDimitry Andric ++I;
107724d58133SDimitry Andric }
107824d58133SDimitry Andric for (auto *DP : DeadPhis)
107924d58133SDimitry Andric DP->eraseFromParent();
108024d58133SDimitry Andric
108124d58133SDimitry Andric for (auto E : ClonedOI->ReturnBlockPreds) {
108224d58133SDimitry Andric E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
108324d58133SDimitry Andric }
108424d58133SDimitry Andric }
108524d58133SDimitry Andric
doMultiRegionFunctionOutlining()10862cab237bSDimitry Andric bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
10872cab237bSDimitry Andric
10882cab237bSDimitry Andric auto ComputeRegionCost = [](SmallVectorImpl<BasicBlock *> &Region) {
10892cab237bSDimitry Andric int Cost = 0;
10902cab237bSDimitry Andric for (BasicBlock* BB : Region)
10912cab237bSDimitry Andric Cost += computeBBInlineCost(BB);
10922cab237bSDimitry Andric return Cost;
10932cab237bSDimitry Andric };
10942cab237bSDimitry Andric
10952cab237bSDimitry Andric assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
10962cab237bSDimitry Andric
10972cab237bSDimitry Andric if (ClonedOMRI->ORI.empty())
10982cab237bSDimitry Andric return false;
10992cab237bSDimitry Andric
11002cab237bSDimitry Andric // The CodeExtractor needs a dominator tree.
11012cab237bSDimitry Andric DominatorTree DT;
11022cab237bSDimitry Andric DT.recalculate(*ClonedFunc);
11032cab237bSDimitry Andric
11042cab237bSDimitry Andric // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
11052cab237bSDimitry Andric LoopInfo LI(DT);
11062cab237bSDimitry Andric BranchProbabilityInfo BPI(*ClonedFunc, LI);
11072cab237bSDimitry Andric ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
11082cab237bSDimitry Andric
11092cab237bSDimitry Andric SetVector<Value *> Inputs, Outputs, Sinks;
11102cab237bSDimitry Andric for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
11112cab237bSDimitry Andric ClonedOMRI->ORI) {
11122cab237bSDimitry Andric int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
11132cab237bSDimitry Andric
11142cab237bSDimitry Andric CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
11152cab237bSDimitry Andric ClonedFuncBFI.get(), &BPI, /* AllowVarargs */ false);
11162cab237bSDimitry Andric
11172cab237bSDimitry Andric CE.findInputsOutputs(Inputs, Outputs, Sinks);
11182cab237bSDimitry Andric
11192cab237bSDimitry Andric #ifndef NDEBUG
11202cab237bSDimitry Andric if (TracePartialInlining) {
11212cab237bSDimitry Andric dbgs() << "inputs: " << Inputs.size() << "\n";
11222cab237bSDimitry Andric dbgs() << "outputs: " << Outputs.size() << "\n";
11232cab237bSDimitry Andric for (Value *value : Inputs)
11242cab237bSDimitry Andric dbgs() << "value used in func: " << *value << "\n";
11252cab237bSDimitry Andric for (Value *output : Outputs)
11262cab237bSDimitry Andric dbgs() << "instr used in func: " << *output << "\n";
11272cab237bSDimitry Andric }
11282cab237bSDimitry Andric #endif
11292cab237bSDimitry Andric // Do not extract regions that have live exit variables.
11302cab237bSDimitry Andric if (Outputs.size() > 0 && !ForceLiveExit)
11312cab237bSDimitry Andric continue;
11322cab237bSDimitry Andric
11332cab237bSDimitry Andric Function *OutlinedFunc = CE.extractCodeRegion();
11342cab237bSDimitry Andric
11352cab237bSDimitry Andric if (OutlinedFunc) {
11362cab237bSDimitry Andric CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
11372cab237bSDimitry Andric BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent();
11382cab237bSDimitry Andric assert(OutliningCallBB->getParent() == ClonedFunc);
11392cab237bSDimitry Andric OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
11402cab237bSDimitry Andric NumColdRegionsOutlined++;
11412cab237bSDimitry Andric OutlinedRegionCost += CurrentOutlinedRegionCost;
11422cab237bSDimitry Andric
11432cab237bSDimitry Andric if (MarkOutlinedColdCC) {
11442cab237bSDimitry Andric OutlinedFunc->setCallingConv(CallingConv::Cold);
11452cab237bSDimitry Andric OCS.setCallingConv(CallingConv::Cold);
11462cab237bSDimitry Andric }
11472cab237bSDimitry Andric } else
11482cab237bSDimitry Andric ORE.emit([&]() {
11492cab237bSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
11502cab237bSDimitry Andric &RegionInfo.Region.front()->front())
11512cab237bSDimitry Andric << "Failed to extract region at block "
11522cab237bSDimitry Andric << ore::NV("Block", RegionInfo.Region.front());
11532cab237bSDimitry Andric });
11542cab237bSDimitry Andric }
11552cab237bSDimitry Andric
11562cab237bSDimitry Andric return !OutlinedFunctions.empty();
11572cab237bSDimitry Andric }
11582cab237bSDimitry Andric
11592cab237bSDimitry Andric Function *
doSingleRegionFunctionOutlining()11602cab237bSDimitry Andric PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
116124d58133SDimitry Andric // Returns true if the block is to be partial inlined into the caller
116224d58133SDimitry Andric // (i.e. not to be extracted to the out of line function)
116324d58133SDimitry Andric auto ToBeInlined = [&, this](BasicBlock *BB) {
116424d58133SDimitry Andric return BB == ClonedOI->ReturnBlock ||
116524d58133SDimitry Andric (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
116624d58133SDimitry Andric ClonedOI->Entries.end());
116724d58133SDimitry Andric };
116824d58133SDimitry Andric
11692cab237bSDimitry Andric assert(ClonedOI && "Expecting OutlineInfo for single region outline");
11702cab237bSDimitry Andric // The CodeExtractor needs a dominator tree.
11712cab237bSDimitry Andric DominatorTree DT;
11722cab237bSDimitry Andric DT.recalculate(*ClonedFunc);
11732cab237bSDimitry Andric
11742cab237bSDimitry Andric // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
11752cab237bSDimitry Andric LoopInfo LI(DT);
11762cab237bSDimitry Andric BranchProbabilityInfo BPI(*ClonedFunc, LI);
11772cab237bSDimitry Andric ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
11782cab237bSDimitry Andric
117924d58133SDimitry Andric // Gather up the blocks that we're going to extract.
118024d58133SDimitry Andric std::vector<BasicBlock *> ToExtract;
118124d58133SDimitry Andric ToExtract.push_back(ClonedOI->NonReturnBlock);
118224d58133SDimitry Andric OutlinedRegionCost +=
118324d58133SDimitry Andric PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
118424d58133SDimitry Andric for (BasicBlock &BB : *ClonedFunc)
118524d58133SDimitry Andric if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
118624d58133SDimitry Andric ToExtract.push_back(&BB);
118724d58133SDimitry Andric // FIXME: the code extractor may hoist/sink more code
118824d58133SDimitry Andric // into the outlined function which may make the outlining
118924d58133SDimitry Andric // overhead (the difference of the outlined function cost
119024d58133SDimitry Andric // and OutliningRegionCost) look larger.
119124d58133SDimitry Andric OutlinedRegionCost += computeBBInlineCost(&BB);
119224d58133SDimitry Andric }
119324d58133SDimitry Andric
119424d58133SDimitry Andric // Extract the body of the if.
11952cab237bSDimitry Andric Function *OutlinedFunc =
11962cab237bSDimitry Andric CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
11972cab237bSDimitry Andric ClonedFuncBFI.get(), &BPI,
11982cab237bSDimitry Andric /* AllowVarargs */ true)
119924d58133SDimitry Andric .extractCodeRegion();
120024d58133SDimitry Andric
120124d58133SDimitry Andric if (OutlinedFunc) {
12022cab237bSDimitry Andric BasicBlock *OutliningCallBB =
12032cab237bSDimitry Andric PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
120424d58133SDimitry Andric .getInstruction()
120524d58133SDimitry Andric ->getParent();
120624d58133SDimitry Andric assert(OutliningCallBB->getParent() == ClonedFunc);
12072cab237bSDimitry Andric OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
12082cab237bSDimitry Andric } else
12092cab237bSDimitry Andric ORE.emit([&]() {
12102cab237bSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
12112cab237bSDimitry Andric &ToExtract.front()->front())
12122cab237bSDimitry Andric << "Failed to extract region at block "
12132cab237bSDimitry Andric << ore::NV("Block", ToExtract.front());
12142cab237bSDimitry Andric });
121524d58133SDimitry Andric
121624d58133SDimitry Andric return OutlinedFunc;
121724d58133SDimitry Andric }
121824d58133SDimitry Andric
~FunctionCloner()121924d58133SDimitry Andric PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
122024d58133SDimitry Andric // Ditch the duplicate, since we're done with it, and rewrite all remaining
122124d58133SDimitry Andric // users (function pointers, etc.) back to the original function.
122224d58133SDimitry Andric ClonedFunc->replaceAllUsesWith(OrigFunc);
122324d58133SDimitry Andric ClonedFunc->eraseFromParent();
122424d58133SDimitry Andric if (!IsFunctionInlined) {
12252cab237bSDimitry Andric // Remove each function that was speculatively created if there is no
122624d58133SDimitry Andric // reference.
12272cab237bSDimitry Andric for (auto FuncBBPair : OutlinedFunctions) {
12282cab237bSDimitry Andric Function *Func = FuncBBPair.first;
12292cab237bSDimitry Andric Func->eraseFromParent();
12302cab237bSDimitry Andric }
123124d58133SDimitry Andric }
123224d58133SDimitry Andric }
123324d58133SDimitry Andric
unswitchFunction(Function * F)12342cab237bSDimitry Andric std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
1235f37b6182SDimitry Andric
1236f37b6182SDimitry Andric if (F->hasAddressTaken())
12372cab237bSDimitry Andric return {false, nullptr};
1238f37b6182SDimitry Andric
1239f37b6182SDimitry Andric // Let inliner handle it
1240f37b6182SDimitry Andric if (F->hasFnAttribute(Attribute::AlwaysInline))
12412cab237bSDimitry Andric return {false, nullptr};
1242f37b6182SDimitry Andric
1243f37b6182SDimitry Andric if (F->hasFnAttribute(Attribute::NoInline))
12442cab237bSDimitry Andric return {false, nullptr};
1245f37b6182SDimitry Andric
1246f37b6182SDimitry Andric if (PSI->isFunctionEntryCold(F))
12472cab237bSDimitry Andric return {false, nullptr};
1248f37b6182SDimitry Andric
1249*b5893f02SDimitry Andric if (empty(F->users()))
12502cab237bSDimitry Andric return {false, nullptr};
1251f37b6182SDimitry Andric
12524ba319b5SDimitry Andric OptimizationRemarkEmitter ORE(F);
12532cab237bSDimitry Andric
12542cab237bSDimitry Andric // Only try to outline cold regions if we have a profile summary, which
12552cab237bSDimitry Andric // implies we have profiling information.
1256da09e106SDimitry Andric if (PSI->hasProfileSummary() && F->hasProfileData() &&
12572cab237bSDimitry Andric !DisableMultiRegionPartialInline) {
12582cab237bSDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
12594ba319b5SDimitry Andric computeOutliningColdRegionsInfo(F, ORE);
12602cab237bSDimitry Andric if (OMRI) {
12612cab237bSDimitry Andric FunctionCloner Cloner(F, OMRI.get(), ORE);
12622cab237bSDimitry Andric
12632cab237bSDimitry Andric #ifndef NDEBUG
12642cab237bSDimitry Andric if (TracePartialInlining) {
12652cab237bSDimitry Andric dbgs() << "HotCountThreshold = " << PSI->getHotCountThreshold() << "\n";
12662cab237bSDimitry Andric dbgs() << "ColdCountThreshold = " << PSI->getColdCountThreshold()
12672cab237bSDimitry Andric << "\n";
12682cab237bSDimitry Andric }
12692cab237bSDimitry Andric #endif
12702cab237bSDimitry Andric bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
12712cab237bSDimitry Andric
12722cab237bSDimitry Andric if (DidOutline) {
12732cab237bSDimitry Andric #ifndef NDEBUG
12742cab237bSDimitry Andric if (TracePartialInlining) {
12752cab237bSDimitry Andric dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
12762cab237bSDimitry Andric Cloner.ClonedFunc->print(dbgs());
12772cab237bSDimitry Andric dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
12782cab237bSDimitry Andric }
12792cab237bSDimitry Andric #endif
12802cab237bSDimitry Andric
12812cab237bSDimitry Andric if (tryPartialInline(Cloner))
12822cab237bSDimitry Andric return {true, nullptr};
12832cab237bSDimitry Andric }
12842cab237bSDimitry Andric }
12852cab237bSDimitry Andric }
12862cab237bSDimitry Andric
12872cab237bSDimitry Andric // Fall-thru to regular partial inlining if we:
12882cab237bSDimitry Andric // i) can't find any cold regions to outline, or
12892cab237bSDimitry Andric // ii) can't inline the outlined function anywhere.
12905517e702SDimitry Andric std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
12915517e702SDimitry Andric if (!OI)
12922cab237bSDimitry Andric return {false, nullptr};
1293f22ef01cSRoman Divacky
12942cab237bSDimitry Andric FunctionCloner Cloner(F, OI.get(), ORE);
129524d58133SDimitry Andric Cloner.NormalizeReturnBlock();
12962cab237bSDimitry Andric
12972cab237bSDimitry Andric Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
12982cab237bSDimitry Andric
12992cab237bSDimitry Andric if (!OutlinedFunction)
13002cab237bSDimitry Andric return {false, nullptr};
1301f22ef01cSRoman Divacky
130224d58133SDimitry Andric bool AnyInline = tryPartialInline(Cloner);
1303f22ef01cSRoman Divacky
13045517e702SDimitry Andric if (AnyInline)
13052cab237bSDimitry Andric return {true, OutlinedFunction};
1306f22ef01cSRoman Divacky
13072cab237bSDimitry Andric return {false, nullptr};
13085517e702SDimitry Andric }
13095517e702SDimitry Andric
tryPartialInline(FunctionCloner & Cloner)131024d58133SDimitry Andric bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
13112cab237bSDimitry Andric if (Cloner.OutlinedFunctions.empty())
131224d58133SDimitry Andric return false;
13135517e702SDimitry Andric
13142cab237bSDimitry Andric int SizeCost = 0;
13152cab237bSDimitry Andric BlockFrequency WeightedRcost;
13162cab237bSDimitry Andric int NonWeightedRcost;
131724d58133SDimitry Andric std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
131824d58133SDimitry Andric
13192cab237bSDimitry Andric // Only calculate RelativeToEntryFreq when we are doing single region
13202cab237bSDimitry Andric // outlining.
13212cab237bSDimitry Andric BranchProbability RelativeToEntryFreq;
13222cab237bSDimitry Andric if (Cloner.ClonedOI) {
13232cab237bSDimitry Andric RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
13242cab237bSDimitry Andric } else
13252cab237bSDimitry Andric // RelativeToEntryFreq doesn't make sense when we have more than one
13262cab237bSDimitry Andric // outlined call because each call will have a different relative frequency
13272cab237bSDimitry Andric // to the entry block. We can consider using the average, but the
13282cab237bSDimitry Andric // usefulness of that information is questionable. For now, assume we never
13292cab237bSDimitry Andric // execute the calls to outlined functions.
13302cab237bSDimitry Andric RelativeToEntryFreq = BranchProbability(0, 1);
13315517e702SDimitry Andric
13322cab237bSDimitry Andric WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
13332cab237bSDimitry Andric
13342cab237bSDimitry Andric // The call sequence(s) to the outlined function(s) are larger than the sum of
13352cab237bSDimitry Andric // the original outlined region size(s), it does not increase the chances of
13362cab237bSDimitry Andric // inlining the function with outlining (The inliner uses the size increase to
133724d58133SDimitry Andric // model the cost of inlining a callee).
133824d58133SDimitry Andric if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
13394ba319b5SDimitry Andric OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
13405517e702SDimitry Andric DebugLoc DLoc;
13415517e702SDimitry Andric BasicBlock *Block;
134224d58133SDimitry Andric std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
13434ba319b5SDimitry Andric OrigFuncORE.emit([&]() {
13442cab237bSDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
13455517e702SDimitry Andric DLoc, Block)
134624d58133SDimitry Andric << ore::NV("Function", Cloner.OrigFunc)
13475517e702SDimitry Andric << " not partially inlined into callers (Original Size = "
134824d58133SDimitry Andric << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
13495517e702SDimitry Andric << ", Size of call sequence to outlined function = "
13502cab237bSDimitry Andric << ore::NV("NewSize", SizeCost) << ")";
13512cab237bSDimitry Andric });
13525517e702SDimitry Andric return false;
13535517e702SDimitry Andric }
13545517e702SDimitry Andric
1355*b5893f02SDimitry Andric assert(empty(Cloner.OrigFunc->users()) &&
13565517e702SDimitry Andric "F's users should all be replaced!");
135724d58133SDimitry Andric
135824d58133SDimitry Andric std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
135924d58133SDimitry Andric Cloner.ClonedFunc->user_end());
13605517e702SDimitry Andric
13615517e702SDimitry Andric DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1362da09e106SDimitry Andric auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1363da09e106SDimitry Andric if (CalleeEntryCount)
136424d58133SDimitry Andric computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
13655517e702SDimitry Andric
13664ba319b5SDimitry Andric uint64_t CalleeEntryCountV =
13674ba319b5SDimitry Andric (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
136824d58133SDimitry Andric
13695517e702SDimitry Andric bool AnyInline = false;
13705517e702SDimitry Andric for (User *User : Users) {
13715517e702SDimitry Andric CallSite CS = getCallSite(User);
13725517e702SDimitry Andric
13735517e702SDimitry Andric if (IsLimitReached())
13745517e702SDimitry Andric continue;
13755517e702SDimitry Andric
13764ba319b5SDimitry Andric OptimizationRemarkEmitter CallerORE(CS.getCaller());
13774ba319b5SDimitry Andric if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE))
13785517e702SDimitry Andric continue;
13795517e702SDimitry Andric
13802cab237bSDimitry Andric // Construct remark before doing the inlining, as after successful inlining
13812cab237bSDimitry Andric // the callsite is removed.
13822cab237bSDimitry Andric OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
13832cab237bSDimitry Andric OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
13842cab237bSDimitry Andric << ore::NV("Caller", CS.getCaller());
13855517e702SDimitry Andric
13865517e702SDimitry Andric InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
13872cab237bSDimitry Andric // We can only forward varargs when we outlined a single region, else we
13882cab237bSDimitry Andric // bail on vararg functions.
13892cab237bSDimitry Andric if (!InlineFunction(CS, IFI, nullptr, true,
13902cab237bSDimitry Andric (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
13912cab237bSDimitry Andric : nullptr)))
13922cab237bSDimitry Andric continue;
13932cab237bSDimitry Andric
13944ba319b5SDimitry Andric CallerORE.emit(OR);
13955517e702SDimitry Andric
13965517e702SDimitry Andric // Now update the entry count:
13975517e702SDimitry Andric if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
13985517e702SDimitry Andric uint64_t CallSiteCount = CallSiteToProfCountMap[User];
13995517e702SDimitry Andric CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
14005517e702SDimitry Andric }
14015517e702SDimitry Andric
14025517e702SDimitry Andric AnyInline = true;
14035517e702SDimitry Andric NumPartialInlining++;
14045517e702SDimitry Andric // Update the stats
14052cab237bSDimitry Andric if (Cloner.ClonedOI)
14065517e702SDimitry Andric NumPartialInlined++;
14072cab237bSDimitry Andric else
14082cab237bSDimitry Andric NumColdOutlinePartialInlined++;
14092cab237bSDimitry Andric
14105517e702SDimitry Andric }
14115517e702SDimitry Andric
141224d58133SDimitry Andric if (AnyInline) {
141324d58133SDimitry Andric Cloner.IsFunctionInlined = true;
141424d58133SDimitry Andric if (CalleeEntryCount)
14154ba319b5SDimitry Andric Cloner.OrigFunc->setEntryCount(
14164ba319b5SDimitry Andric CalleeEntryCount.setCount(CalleeEntryCountV));
14174ba319b5SDimitry Andric OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
14184ba319b5SDimitry Andric OrigFuncORE.emit([&]() {
14192cab237bSDimitry Andric return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
14202cab237bSDimitry Andric << "Partially inlined into at least one caller";
14212cab237bSDimitry Andric });
14222cab237bSDimitry Andric
142324d58133SDimitry Andric }
14245517e702SDimitry Andric
14255517e702SDimitry Andric return AnyInline;
1426f22ef01cSRoman Divacky }
1427f22ef01cSRoman Divacky
run(Module & M)1428d88c1a5aSDimitry Andric bool PartialInlinerImpl::run(Module &M) {
142951690af2SDimitry Andric if (DisablePartialInlining)
143051690af2SDimitry Andric return false;
143151690af2SDimitry Andric
1432d88c1a5aSDimitry Andric std::vector<Function *> Worklist;
1433d88c1a5aSDimitry Andric Worklist.reserve(M.size());
14343ca95b02SDimitry Andric for (Function &F : M)
14353ca95b02SDimitry Andric if (!F.use_empty() && !F.isDeclaration())
1436d88c1a5aSDimitry Andric Worklist.push_back(&F);
1437f22ef01cSRoman Divacky
1438d88c1a5aSDimitry Andric bool Changed = false;
1439d88c1a5aSDimitry Andric while (!Worklist.empty()) {
1440d88c1a5aSDimitry Andric Function *CurrFunc = Worklist.back();
1441d88c1a5aSDimitry Andric Worklist.pop_back();
1442f22ef01cSRoman Divacky
1443d88c1a5aSDimitry Andric if (CurrFunc->use_empty())
1444d88c1a5aSDimitry Andric continue;
1445f22ef01cSRoman Divacky
1446d88c1a5aSDimitry Andric bool Recursive = false;
1447d88c1a5aSDimitry Andric for (User *U : CurrFunc->users())
144891bc56edSDimitry Andric if (Instruction *I = dyn_cast<Instruction>(U))
1449d88c1a5aSDimitry Andric if (I->getParent()->getParent() == CurrFunc) {
1450d88c1a5aSDimitry Andric Recursive = true;
1451f22ef01cSRoman Divacky break;
1452f22ef01cSRoman Divacky }
1453d88c1a5aSDimitry Andric if (Recursive)
1454d88c1a5aSDimitry Andric continue;
1455f22ef01cSRoman Divacky
14562cab237bSDimitry Andric std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
14572cab237bSDimitry Andric if (Result.second)
14582cab237bSDimitry Andric Worklist.push_back(Result.second);
1459*b5893f02SDimitry Andric Changed |= Result.first;
1460f22ef01cSRoman Divacky }
1461f22ef01cSRoman Divacky
1462d88c1a5aSDimitry Andric return Changed;
1463f22ef01cSRoman Divacky }
1464f22ef01cSRoman Divacky
1465d88c1a5aSDimitry Andric char PartialInlinerLegacyPass::ID = 0;
14662cab237bSDimitry Andric
1467d88c1a5aSDimitry Andric INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
1468d88c1a5aSDimitry Andric "Partial Inliner", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)1469d88c1a5aSDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1470f37b6182SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
1471f37b6182SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1472d88c1a5aSDimitry Andric INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
1473d88c1a5aSDimitry Andric "Partial Inliner", false, false)
1474d88c1a5aSDimitry Andric
1475d88c1a5aSDimitry Andric ModulePass *llvm::createPartialInliningPass() {
1476d88c1a5aSDimitry Andric return new PartialInlinerLegacyPass();
1477d88c1a5aSDimitry Andric }
1478d88c1a5aSDimitry Andric
run(Module & M,ModuleAnalysisManager & AM)1479d88c1a5aSDimitry Andric PreservedAnalyses PartialInlinerPass::run(Module &M,
1480d88c1a5aSDimitry Andric ModuleAnalysisManager &AM) {
1481d88c1a5aSDimitry Andric auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1482f37b6182SDimitry Andric
1483d88c1a5aSDimitry Andric std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1484d88c1a5aSDimitry Andric [&FAM](Function &F) -> AssumptionCache & {
1485d88c1a5aSDimitry Andric return FAM.getResult<AssumptionAnalysis>(F);
1486d88c1a5aSDimitry Andric };
1487f37b6182SDimitry Andric
1488f37b6182SDimitry Andric std::function<BlockFrequencyInfo &(Function &)> GetBFI =
1489f37b6182SDimitry Andric [&FAM](Function &F) -> BlockFrequencyInfo & {
1490f37b6182SDimitry Andric return FAM.getResult<BlockFrequencyAnalysis>(F);
1491f37b6182SDimitry Andric };
1492f37b6182SDimitry Andric
1493f37b6182SDimitry Andric std::function<TargetTransformInfo &(Function &)> GetTTI =
1494f37b6182SDimitry Andric [&FAM](Function &F) -> TargetTransformInfo & {
1495f37b6182SDimitry Andric return FAM.getResult<TargetIRAnalysis>(F);
1496f37b6182SDimitry Andric };
1497f37b6182SDimitry Andric
1498f37b6182SDimitry Andric ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
1499f37b6182SDimitry Andric
15004ba319b5SDimitry Andric if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI)
15012cab237bSDimitry Andric .run(M))
15023ca95b02SDimitry Andric return PreservedAnalyses::none();
15033ca95b02SDimitry Andric return PreservedAnalyses::all();
1504f22ef01cSRoman Divacky }
1505