1 //===- Inliner.cpp - Code common to all inliners --------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the mechanics required to implement inlining without
11 // missing any calls and updating the call graph.  The decisions of which calls
12 // are profitable to inline are implemented elsewhere.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/AssumptionCache.h"
20 #include "llvm/Analysis/BasicAliasAnalysis.h"
21 #include "llvm/Analysis/CallGraph.h"
22 #include "llvm/Analysis/InlineCost.h"
23 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
24 #include "llvm/Analysis/ProfileSummaryInfo.h"
25 #include "llvm/Analysis/TargetLibraryInfo.h"
26 #include "llvm/IR/CallSite.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/DiagnosticInfo.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include "llvm/Transforms/IPO/InlinerPass.h"
35 #include "llvm/Transforms/Utils/Cloning.h"
36 #include "llvm/Transforms/Utils/Local.h"
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "inline"
40 
41 STATISTIC(NumInlined, "Number of functions inlined");
42 STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
43 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
44 STATISTIC(NumMergedAllocas, "Number of allocas merged together");
45 
46 // This weirdly named statistic tracks the number of times that, when attempting
47 // to inline a function A into B, we analyze the callers of B in order to see
48 // if those would be more profitable and blocked inline steps.
49 STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
50 
51 /// Flag to disable manual alloca merging.
52 ///
53 /// Merging of allocas was originally done as a stack-size saving technique
54 /// prior to LLVM's code generator having support for stack coloring based on
55 /// lifetime markers. It is now in the process of being removed. To experiment
56 /// with disabling it and relying fully on lifetime marker based stack
57 /// coloring, you can pass this flag to LLVM.
58 static cl::opt<bool>
59     DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
60                                 cl::init(false), cl::Hidden);
61 
62 namespace {
63 enum class InlinerFunctionImportStatsOpts {
64   No = 0,
65   Basic = 1,
66   Verbose = 2,
67 };
68 
69 cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
70     "inliner-function-import-stats",
71     cl::init(InlinerFunctionImportStatsOpts::No),
72     cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic",
73                           "basic statistics"),
74                clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose",
75                           "printing of statistics for each inlined function"),
76                clEnumValEnd),
77     cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
78 } // namespace
79 
80 Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {}
81 
82 Inliner::Inliner(char &ID, bool InsertLifetime)
83     : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
84 
85 /// For this class, we declare that we require and preserve the call graph.
86 /// If the derived class implements this method, it should
87 /// always explicitly call the implementation here.
88 void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
89   AU.addRequired<AssumptionCacheTracker>();
90   AU.addRequired<ProfileSummaryInfoWrapperPass>();
91   AU.addRequired<TargetLibraryInfoWrapperPass>();
92   getAAResultsAnalysisUsage(AU);
93   CallGraphSCCPass::getAnalysisUsage(AU);
94 }
95 
96 typedef DenseMap<ArrayType *, std::vector<AllocaInst *>> InlinedArrayAllocasTy;
97 
98 /// Look at all of the allocas that we inlined through this call site.  If we
99 /// have already inlined other allocas through other calls into this function,
100 /// then we know that they have disjoint lifetimes and that we can merge them.
101 ///
102 /// There are many heuristics possible for merging these allocas, and the
103 /// different options have different tradeoffs.  One thing that we *really*
104 /// don't want to hurt is SRoA: once inlining happens, often allocas are no
105 /// longer address taken and so they can be promoted.
106 ///
107 /// Our "solution" for that is to only merge allocas whose outermost type is an
108 /// array type.  These are usually not promoted because someone is using a
109 /// variable index into them.  These are also often the most important ones to
110 /// merge.
111 ///
112 /// A better solution would be to have real memory lifetime markers in the IR
113 /// and not have the inliner do any merging of allocas at all.  This would
114 /// allow the backend to do proper stack slot coloring of all allocas that
115 /// *actually make it to the backend*, which is really what we want.
116 ///
117 /// Because we don't have this information, we do this simple and useful hack.
118 static void mergeInlinedArrayAllocas(
119     Function *Caller, InlineFunctionInfo &IFI,
120     InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) {
121   SmallPtrSet<AllocaInst *, 16> UsedAllocas;
122 
123   // When processing our SCC, check to see if CS was inlined from some other
124   // call site.  For example, if we're processing "A" in this code:
125   //   A() { B() }
126   //   B() { x = alloca ... C() }
127   //   C() { y = alloca ... }
128   // Assume that C was not inlined into B initially, and so we're processing A
129   // and decide to inline B into A.  Doing this makes an alloca available for
130   // reuse and makes a callsite (C) available for inlining.  When we process
131   // the C call site we don't want to do any alloca merging between X and Y
132   // because their scopes are not disjoint.  We could make this smarter by
133   // keeping track of the inline history for each alloca in the
134   // InlinedArrayAllocas but this isn't likely to be a significant win.
135   if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
136     return;
137 
138   // Loop over all the allocas we have so far and see if they can be merged with
139   // a previously inlined alloca.  If not, remember that we had it.
140   for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e;
141        ++AllocaNo) {
142     AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
143 
144     // Don't bother trying to merge array allocations (they will usually be
145     // canonicalized to be an allocation *of* an array), or allocations whose
146     // type is not itself an array (because we're afraid of pessimizing SRoA).
147     ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
148     if (!ATy || AI->isArrayAllocation())
149       continue;
150 
151     // Get the list of all available allocas for this array type.
152     std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy];
153 
154     // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
155     // that we have to be careful not to reuse the same "available" alloca for
156     // multiple different allocas that we just inlined, we use the 'UsedAllocas'
157     // set to keep track of which "available" allocas are being used by this
158     // function.  Also, AllocasForType can be empty of course!
159     bool MergedAwayAlloca = false;
160     for (AllocaInst *AvailableAlloca : AllocasForType) {
161 
162       unsigned Align1 = AI->getAlignment(),
163                Align2 = AvailableAlloca->getAlignment();
164 
165       // The available alloca has to be in the right function, not in some other
166       // function in this SCC.
167       if (AvailableAlloca->getParent() != AI->getParent())
168         continue;
169 
170       // If the inlined function already uses this alloca then we can't reuse
171       // it.
172       if (!UsedAllocas.insert(AvailableAlloca).second)
173         continue;
174 
175       // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
176       // success!
177       DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI
178                    << "\n\t\tINTO: " << *AvailableAlloca << '\n');
179 
180       // Move affected dbg.declare calls immediately after the new alloca to
181       // avoid the situation when a dbg.declare preceeds its alloca.
182       if (auto *L = LocalAsMetadata::getIfExists(AI))
183         if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
184           for (User *U : MDV->users())
185             if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
186               DDI->moveBefore(AvailableAlloca->getNextNode());
187 
188       AI->replaceAllUsesWith(AvailableAlloca);
189 
190       if (Align1 != Align2) {
191         if (!Align1 || !Align2) {
192           const DataLayout &DL = Caller->getParent()->getDataLayout();
193           unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
194 
195           Align1 = Align1 ? Align1 : TypeAlign;
196           Align2 = Align2 ? Align2 : TypeAlign;
197         }
198 
199         if (Align1 > Align2)
200           AvailableAlloca->setAlignment(AI->getAlignment());
201       }
202 
203       AI->eraseFromParent();
204       MergedAwayAlloca = true;
205       ++NumMergedAllocas;
206       IFI.StaticAllocas[AllocaNo] = nullptr;
207       break;
208     }
209 
210     // If we already nuked the alloca, we're done with it.
211     if (MergedAwayAlloca)
212       continue;
213 
214     // If we were unable to merge away the alloca either because there are no
215     // allocas of the right type available or because we reused them all
216     // already, remember that this alloca came from an inlined function and mark
217     // it used so we don't reuse it for other allocas from this inline
218     // operation.
219     AllocasForType.push_back(AI);
220     UsedAllocas.insert(AI);
221   }
222 }
223 
224 /// If it is possible to inline the specified call site,
225 /// do so and update the CallGraph for this operation.
226 ///
227 /// This function also does some basic book-keeping to update the IR.  The
228 /// InlinedArrayAllocas map keeps track of any allocas that are already
229 /// available from other functions inlined into the caller.  If we are able to
230 /// inline this call site we attempt to reuse already available allocas or add
231 /// any new allocas to the set if not possible.
232 static bool InlineCallIfPossible(
233     CallSite CS, InlineFunctionInfo &IFI,
234     InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
235     bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
236     ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
237   Function *Callee = CS.getCalledFunction();
238   Function *Caller = CS.getCaller();
239 
240   AAResults &AAR = AARGetter(*Callee);
241 
242   // Try to inline the function.  Get the list of static allocas that were
243   // inlined.
244   if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
245     return false;
246 
247   if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
248     ImportedFunctionsStats.recordInline(*Caller, *Callee);
249 
250   AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);
251 
252   if (!DisableInlinedAllocaMerging)
253     mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);
254 
255   return true;
256 }
257 
258 /// Return true if inlining of CS can block the caller from being
259 /// inlined which is proved to be more beneficial. \p IC is the
260 /// estimated inline cost associated with callsite \p CS.
261 /// \p TotalAltCost will be set to the estimated cost of inlining the caller
262 /// if \p CS is suppressed for inlining.
263 static bool
264 shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
265                  int &TotalSecondaryCost,
266                  function_ref<InlineCost(CallSite CS)> GetInlineCost) {
267 
268   // For now we only handle local or inline functions.
269   if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
270     return false;
271   // Try to detect the case where the current inlining candidate caller (call
272   // it B) is a static or linkonce-ODR function and is an inlining candidate
273   // elsewhere, and the current candidate callee (call it C) is large enough
274   // that inlining it into B would make B too big to inline later. In these
275   // circumstances it may be best not to inline C into B, but to inline B into
276   // its callers.
277   //
278   // This only applies to static and linkonce-ODR functions because those are
279   // expected to be available for inlining in the translation units where they
280   // are used. Thus we will always have the opportunity to make local inlining
281   // decisions. Importantly the linkonce-ODR linkage covers inline functions
282   // and templates in C++.
283   //
284   // FIXME: All of this logic should be sunk into getInlineCost. It relies on
285   // the internal implementation of the inline cost metrics rather than
286   // treating them as truly abstract units etc.
287   TotalSecondaryCost = 0;
288   // The candidate cost to be imposed upon the current function.
289   int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
290   // This bool tracks what happens if we do NOT inline C into B.
291   bool callerWillBeRemoved = Caller->hasLocalLinkage();
292   // This bool tracks what happens if we DO inline C into B.
293   bool inliningPreventsSomeOuterInline = false;
294   for (User *U : Caller->users()) {
295     CallSite CS2(U);
296 
297     // If this isn't a call to Caller (it could be some other sort
298     // of reference) skip it.  Such references will prevent the caller
299     // from being removed.
300     if (!CS2 || CS2.getCalledFunction() != Caller) {
301       callerWillBeRemoved = false;
302       continue;
303     }
304 
305     InlineCost IC2 = GetInlineCost(CS2);
306     ++NumCallerCallersAnalyzed;
307     if (!IC2) {
308       callerWillBeRemoved = false;
309       continue;
310     }
311     if (IC2.isAlways())
312       continue;
313 
314     // See if inlining or original callsite would erase the cost delta of
315     // this callsite. We subtract off the penalty for the call instruction,
316     // which we would be deleting.
317     if (IC2.getCostDelta() <= CandidateCost) {
318       inliningPreventsSomeOuterInline = true;
319       TotalSecondaryCost += IC2.getCost();
320     }
321   }
322   // If all outer calls to Caller would get inlined, the cost for the last
323   // one is set very low by getInlineCost, in anticipation that Caller will
324   // be removed entirely.  We did not account for this above unless there
325   // is only one caller of Caller.
326   if (callerWillBeRemoved && !Caller->use_empty())
327     TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
328 
329   if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
330     return true;
331 
332   return false;
333 }
334 
335 /// Return true if the inliner should attempt to inline at the given CallSite.
336 static bool shouldInline(CallSite CS,
337                          function_ref<InlineCost(CallSite CS)> GetInlineCost,
338                          OptimizationRemarkEmitter &ORE) {
339   using namespace ore;
340   InlineCost IC = GetInlineCost(CS);
341   Instruction *Call = CS.getInstruction();
342   Function *Callee = CS.getCalledFunction();
343 
344   if (IC.isAlways()) {
345     DEBUG(dbgs() << "    Inlining: cost=always"
346                  << ", Call: " << *CS.getInstruction() << "\n");
347     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
348              << NV("Callee", Callee)
349              << " should always be inlined (cost=always)");
350     return true;
351   }
352 
353   if (IC.isNever()) {
354     DEBUG(dbgs() << "    NOT Inlining: cost=never"
355                  << ", Call: " << *CS.getInstruction() << "\n");
356     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "NeverInline", Call)
357              << NV("Callee", Callee)
358              << " should never be inlined (cost=never)");
359     return false;
360   }
361 
362   Function *Caller = CS.getCaller();
363   if (!IC) {
364     DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
365                  << ", thres=" << (IC.getCostDelta() + IC.getCost())
366                  << ", Call: " << *CS.getInstruction() << "\n");
367     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
368              << NV("Callee", Callee) << " too costly to inline (cost="
369              << NV("Cost", IC.getCost()) << ", threshold="
370              << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
371     return false;
372   }
373 
374   int TotalSecondaryCost = 0;
375   if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) {
376     DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction()
377                  << " Cost = " << IC.getCost()
378                  << ", outer Cost = " << TotalSecondaryCost << '\n');
379     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE,
380                                         "IncreaseCostInOtherContexts", Call)
381              << "Not inlining. Cost of inlining " << NV("Callee", Callee)
382              << " increases the cost of inlining " << NV("Caller", Caller)
383              << " in other contexts");
384     return false;
385   }
386 
387   DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
388                << ", thres=" << (IC.getCostDelta() + IC.getCost())
389                << ", Call: " << *CS.getInstruction() << '\n');
390   ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBeInlined", Call)
391            << NV("Callee", Callee) << " can be inlined into "
392            << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
393            << " (threshold="
394            << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
395   return true;
396 }
397 
398 /// Return true if the specified inline history ID
399 /// indicates an inline history that includes the specified function.
400 static bool InlineHistoryIncludes(
401     Function *F, int InlineHistoryID,
402     const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
403   while (InlineHistoryID != -1) {
404     assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
405            "Invalid inline history ID");
406     if (InlineHistory[InlineHistoryID].first == F)
407       return true;
408     InlineHistoryID = InlineHistory[InlineHistoryID].second;
409   }
410   return false;
411 }
412 
413 bool Inliner::doInitialization(CallGraph &CG) {
414   if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
415     ImportedFunctionsStats.setModuleInfo(CG.getModule());
416   return false; // No changes to CallGraph.
417 }
418 
419 bool Inliner::runOnSCC(CallGraphSCC &SCC) {
420   if (skipSCC(SCC))
421     return false;
422   return inlineCalls(SCC);
423 }
424 
425 static bool
426 inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
427                 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
428                 ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI,
429                 bool InsertLifetime,
430                 function_ref<InlineCost(CallSite CS)> GetInlineCost,
431                 function_ref<AAResults &(Function &)> AARGetter,
432                 ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
433   SmallPtrSet<Function *, 8> SCCFunctions;
434   DEBUG(dbgs() << "Inliner visiting SCC:");
435   for (CallGraphNode *Node : SCC) {
436     Function *F = Node->getFunction();
437     if (F)
438       SCCFunctions.insert(F);
439     DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
440   }
441 
442   // Scan through and identify all call sites ahead of time so that we only
443   // inline call sites in the original functions, not call sites that result
444   // from inlining other functions.
445   SmallVector<std::pair<CallSite, int>, 16> CallSites;
446 
447   // When inlining a callee produces new call sites, we want to keep track of
448   // the fact that they were inlined from the callee.  This allows us to avoid
449   // infinite inlining in some obscure cases.  To represent this, we use an
450   // index into the InlineHistory vector.
451   SmallVector<std::pair<Function *, int>, 8> InlineHistory;
452 
453   for (CallGraphNode *Node : SCC) {
454     Function *F = Node->getFunction();
455     if (!F || F->isDeclaration())
456       continue;
457 
458     OptimizationRemarkEmitter ORE(F);
459     for (BasicBlock &BB : *F)
460       for (Instruction &I : BB) {
461         CallSite CS(cast<Value>(&I));
462         // If this isn't a call, or it is a call to an intrinsic, it can
463         // never be inlined.
464         if (!CS || isa<IntrinsicInst>(I))
465           continue;
466 
467         // If this is a direct call to an external function, we can never inline
468         // it.  If it is an indirect call, inlining may resolve it to be a
469         // direct call, so we keep it.
470         if (Function *Callee = CS.getCalledFunction())
471           if (Callee->isDeclaration()) {
472             using namespace ore;
473             ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
474                      << NV("Callee", Callee) << " will not be inlined into "
475                      << NV("Caller", CS.getCaller())
476                      << " because its definition is unavailable"
477                      << setIsVerbose());
478             continue;
479           }
480 
481         CallSites.push_back(std::make_pair(CS, -1));
482       }
483   }
484 
485   DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
486 
487   // If there are no calls in this function, exit early.
488   if (CallSites.empty())
489     return false;
490 
491   // Now that we have all of the call sites, move the ones to functions in the
492   // current SCC to the end of the list.
493   unsigned FirstCallInSCC = CallSites.size();
494   for (unsigned i = 0; i < FirstCallInSCC; ++i)
495     if (Function *F = CallSites[i].first.getCalledFunction())
496       if (SCCFunctions.count(F))
497         std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
498 
499   InlinedArrayAllocasTy InlinedArrayAllocas;
500   InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache);
501 
502   // Now that we have all of the call sites, loop over them and inline them if
503   // it looks profitable to do so.
504   bool Changed = false;
505   bool LocalChange;
506   do {
507     LocalChange = false;
508     // Iterate over the outer loop because inlining functions can cause indirect
509     // calls to become direct calls.
510     // CallSites may be modified inside so ranged for loop can not be used.
511     for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
512       CallSite CS = CallSites[CSi].first;
513 
514       Function *Caller = CS.getCaller();
515       Function *Callee = CS.getCalledFunction();
516 
517       // If this call site is dead and it is to a readonly function, we should
518       // just delete the call instead of trying to inline it, regardless of
519       // size.  This happens because IPSCCP propagates the result out of the
520       // call and then we're left with the dead call.
521       if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
522         DEBUG(dbgs() << "    -> Deleting dead call: " << *CS.getInstruction()
523                      << "\n");
524         // Update the call graph by deleting the edge from Callee to Caller.
525         CG[Caller]->removeCallEdgeFor(CS);
526         CS.getInstruction()->eraseFromParent();
527         ++NumCallsDeleted;
528       } else {
529         // We can only inline direct calls to non-declarations.
530         if (!Callee || Callee->isDeclaration())
531           continue;
532 
533         // If this call site was obtained by inlining another function, verify
534         // that the include path for the function did not include the callee
535         // itself.  If so, we'd be recursively inlining the same function,
536         // which would provide the same callsites, which would cause us to
537         // infinitely inline.
538         int InlineHistoryID = CallSites[CSi].second;
539         if (InlineHistoryID != -1 &&
540             InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
541           continue;
542 
543         // Get DebugLoc to report. CS will be invalid after Inliner.
544         DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
545         BasicBlock *Block = CS.getParent();
546         // FIXME for new PM: because of the old PM we currently generate ORE and
547         // in turn BFI on demand.  With the new PM, the ORE dependency should
548         // just become a regular analysis dependency.
549         OptimizationRemarkEmitter ORE(Caller);
550 
551         // If the policy determines that we should inline this function,
552         // try to do so.
553         using namespace ore;
554         if (!shouldInline(CS, GetInlineCost, ORE)) {
555           ORE.emit(
556               OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
557               << NV("Callee", Callee) << " will not be inlined into "
558               << NV("Caller", Caller));
559           continue;
560         }
561 
562         // Attempt to inline the function.
563         if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
564                                   InlineHistoryID, InsertLifetime, AARGetter,
565                                   ImportedFunctionsStats)) {
566           ORE.emit(
567               OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
568               << NV("Callee", Callee) << " will not be inlined into "
569               << NV("Caller", Caller));
570           continue;
571         }
572         ++NumInlined;
573 
574         // Report the inline decision.
575         ORE.emit(OptimizationRemark(DEBUG_TYPE, "Inlined", DLoc, Block)
576                  << NV("Callee", Callee) << " inlined into "
577                  << NV("Caller", Caller));
578 
579         // If inlining this function gave us any new call sites, throw them
580         // onto our worklist to process.  They are useful inline candidates.
581         if (!InlineInfo.InlinedCalls.empty()) {
582           // Create a new inline history entry for this, so that we remember
583           // that these new callsites came about due to inlining Callee.
584           int NewHistoryID = InlineHistory.size();
585           InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
586 
587           for (Value *Ptr : InlineInfo.InlinedCalls)
588             CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
589         }
590       }
591 
592       // If we inlined or deleted the last possible call site to the function,
593       // delete the function body now.
594       if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
595           // TODO: Can remove if in SCC now.
596           !SCCFunctions.count(Callee) &&
597 
598           // The function may be apparently dead, but if there are indirect
599           // callgraph references to the node, we cannot delete it yet, this
600           // could invalidate the CGSCC iterator.
601           CG[Callee]->getNumReferences() == 0) {
602         DEBUG(dbgs() << "    -> Deleting dead function: " << Callee->getName()
603                      << "\n");
604         CallGraphNode *CalleeNode = CG[Callee];
605 
606         // Remove any call graph edges from the callee to its callees.
607         CalleeNode->removeAllCalledFunctions();
608 
609         // Removing the node for callee from the call graph and delete it.
610         delete CG.removeFunctionFromModule(CalleeNode);
611         ++NumDeleted;
612       }
613 
614       // Remove this call site from the list.  If possible, use
615       // swap/pop_back for efficiency, but do not use it if doing so would
616       // move a call site to a function in this SCC before the
617       // 'FirstCallInSCC' barrier.
618       if (SCC.isSingular()) {
619         CallSites[CSi] = CallSites.back();
620         CallSites.pop_back();
621       } else {
622         CallSites.erase(CallSites.begin() + CSi);
623       }
624       --CSi;
625 
626       Changed = true;
627       LocalChange = true;
628     }
629   } while (LocalChange);
630 
631   return Changed;
632 }
633 
634 bool Inliner::inlineCalls(CallGraphSCC &SCC) {
635   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
636   ACT = &getAnalysis<AssumptionCacheTracker>();
637   PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
638   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
639   // We compute dedicated AA results for each function in the SCC as needed. We
640   // use a lambda referencing external objects so that they live long enough to
641   // be queried, but we re-use them each time.
642   Optional<BasicAAResult> BAR;
643   Optional<AAResults> AAR;
644   auto AARGetter = [&](Function &F) -> AAResults & {
645     BAR.emplace(createLegacyPMBasicAAResult(*this, F));
646     AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
647     return *AAR;
648   };
649   auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
650     return ACT->getAssumptionCache(F);
651   };
652   return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime,
653                          [this](CallSite CS) { return getInlineCost(CS); },
654                          AARGetter, ImportedFunctionsStats);
655 }
656 
657 /// Remove now-dead linkonce functions at the end of
658 /// processing to avoid breaking the SCC traversal.
659 bool Inliner::doFinalization(CallGraph &CG) {
660   if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
661     ImportedFunctionsStats.dump(InlinerFunctionImportStats ==
662                                 InlinerFunctionImportStatsOpts::Verbose);
663   return removeDeadFunctions(CG);
664 }
665 
666 /// Remove dead functions that are not included in DNR (Do Not Remove) list.
667 bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
668   SmallVector<CallGraphNode *, 16> FunctionsToRemove;
669   SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats;
670   SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive;
671 
672   auto RemoveCGN = [&](CallGraphNode *CGN) {
673     // Remove any call graph edges from the function to its callees.
674     CGN->removeAllCalledFunctions();
675 
676     // Remove any edges from the external node to the function's call graph
677     // node.  These edges might have been made irrelegant due to
678     // optimization of the program.
679     CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
680 
681     // Removing the node for callee from the call graph and delete it.
682     FunctionsToRemove.push_back(CGN);
683   };
684 
685   // Scan for all of the functions, looking for ones that should now be removed
686   // from the program.  Insert the dead ones in the FunctionsToRemove set.
687   for (const auto &I : CG) {
688     CallGraphNode *CGN = I.second.get();
689     Function *F = CGN->getFunction();
690     if (!F || F->isDeclaration())
691       continue;
692 
693     // Handle the case when this function is called and we only want to care
694     // about always-inline functions. This is a bit of a hack to share code
695     // between here and the InlineAlways pass.
696     if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
697       continue;
698 
699     // If the only remaining users of the function are dead constants, remove
700     // them.
701     F->removeDeadConstantUsers();
702 
703     if (!F->isDefTriviallyDead())
704       continue;
705 
706     // It is unsafe to drop a function with discardable linkage from a COMDAT
707     // without also dropping the other members of the COMDAT.
708     // The inliner doesn't visit non-function entities which are in COMDAT
709     // groups so it is unsafe to do so *unless* the linkage is local.
710     if (!F->hasLocalLinkage()) {
711       if (const Comdat *C = F->getComdat()) {
712         --ComdatEntriesAlive[C];
713         DeadFunctionsInComdats.push_back(CGN);
714         continue;
715       }
716     }
717 
718     RemoveCGN(CGN);
719   }
720   if (!DeadFunctionsInComdats.empty()) {
721     // Count up all the entities in COMDAT groups
722     auto ComdatGroupReferenced = [&](const Comdat *C) {
723       auto I = ComdatEntriesAlive.find(C);
724       if (I != ComdatEntriesAlive.end())
725         ++(I->getSecond());
726     };
727     for (const Function &F : CG.getModule())
728       if (const Comdat *C = F.getComdat())
729         ComdatGroupReferenced(C);
730     for (const GlobalVariable &GV : CG.getModule().globals())
731       if (const Comdat *C = GV.getComdat())
732         ComdatGroupReferenced(C);
733     for (const GlobalAlias &GA : CG.getModule().aliases())
734       if (const Comdat *C = GA.getComdat())
735         ComdatGroupReferenced(C);
736     for (CallGraphNode *CGN : DeadFunctionsInComdats) {
737       Function *F = CGN->getFunction();
738       const Comdat *C = F->getComdat();
739       int NumAlive = ComdatEntriesAlive[C];
740       // We can remove functions in a COMDAT group if the entire group is dead.
741       assert(NumAlive >= 0);
742       if (NumAlive > 0)
743         continue;
744 
745       RemoveCGN(CGN);
746     }
747   }
748 
749   if (FunctionsToRemove.empty())
750     return false;
751 
752   // Now that we know which functions to delete, do so.  We didn't want to do
753   // this inline, because that would invalidate our CallGraph::iterator
754   // objects. :(
755   //
756   // Note that it doesn't matter that we are iterating over a non-stable order
757   // here to do this, it doesn't matter which order the functions are deleted
758   // in.
759   array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
760   FunctionsToRemove.erase(
761       std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()),
762       FunctionsToRemove.end());
763   for (CallGraphNode *CGN : FunctionsToRemove) {
764     delete CG.removeFunctionFromModule(CGN);
765     ++NumDeleted;
766   }
767   return true;
768 }
769