1 //===- Inliner.cpp - Code common to all inliners --------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the mechanics required to implement inlining without
11 // missing any calls and updating the call graph.  The decisions of which calls
12 // are profitable to inline are implemented elsewhere.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/AssumptionCache.h"
20 #include "llvm/Analysis/BasicAliasAnalysis.h"
21 #include "llvm/Analysis/CallGraph.h"
22 #include "llvm/Analysis/InlineCost.h"
23 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
24 #include "llvm/Analysis/ProfileSummaryInfo.h"
25 #include "llvm/Analysis/TargetLibraryInfo.h"
26 #include "llvm/IR/CallSite.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/DiagnosticInfo.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include "llvm/Transforms/IPO/InlinerPass.h"
35 #include "llvm/Transforms/Utils/Cloning.h"
36 #include "llvm/Transforms/Utils/Local.h"
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "inline"
40 
41 STATISTIC(NumInlined, "Number of functions inlined");
42 STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
43 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
44 STATISTIC(NumMergedAllocas, "Number of allocas merged together");
45 
46 // This weirdly named statistic tracks the number of times that, when attempting
47 // to inline a function A into B, we analyze the callers of B in order to see
48 // if those would be more profitable and blocked inline steps.
49 STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
50 
51 /// Flag to disable manual alloca merging.
52 ///
53 /// Merging of allocas was originally done as a stack-size saving technique
54 /// prior to LLVM's code generator having support for stack coloring based on
55 /// lifetime markers. It is now in the process of being removed. To experiment
56 /// with disabling it and relying fully on lifetime marker based stack
57 /// coloring, you can pass this flag to LLVM.
58 static cl::opt<bool>
59     DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
60                                 cl::init(false), cl::Hidden);
61 
62 namespace {
63 enum class InlinerFunctionImportStatsOpts {
64   No = 0,
65   Basic = 1,
66   Verbose = 2,
67 };
68 
69 cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
70     "inliner-function-import-stats",
71     cl::init(InlinerFunctionImportStatsOpts::No),
72     cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic",
73                           "basic statistics"),
74                clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose",
75                           "printing of statistics for each inlined function"),
76                clEnumValEnd),
77     cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
78 } // namespace
79 
80 Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {}
81 
82 Inliner::Inliner(char &ID, bool InsertLifetime)
83     : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
84 
85 /// For this class, we declare that we require and preserve the call graph.
86 /// If the derived class implements this method, it should
87 /// always explicitly call the implementation here.
88 void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
89   AU.addRequired<AssumptionCacheTracker>();
90   AU.addRequired<ProfileSummaryInfoWrapperPass>();
91   AU.addRequired<TargetLibraryInfoWrapperPass>();
92   getAAResultsAnalysisUsage(AU);
93   CallGraphSCCPass::getAnalysisUsage(AU);
94 }
95 
96 typedef DenseMap<ArrayType *, std::vector<AllocaInst *>> InlinedArrayAllocasTy;
97 
98 /// Look at all of the allocas that we inlined through this call site.  If we
99 /// have already inlined other allocas through other calls into this function,
100 /// then we know that they have disjoint lifetimes and that we can merge them.
101 ///
102 /// There are many heuristics possible for merging these allocas, and the
103 /// different options have different tradeoffs.  One thing that we *really*
104 /// don't want to hurt is SRoA: once inlining happens, often allocas are no
105 /// longer address taken and so they can be promoted.
106 ///
107 /// Our "solution" for that is to only merge allocas whose outermost type is an
108 /// array type.  These are usually not promoted because someone is using a
109 /// variable index into them.  These are also often the most important ones to
110 /// merge.
111 ///
112 /// A better solution would be to have real memory lifetime markers in the IR
113 /// and not have the inliner do any merging of allocas at all.  This would
114 /// allow the backend to do proper stack slot coloring of all allocas that
115 /// *actually make it to the backend*, which is really what we want.
116 ///
117 /// Because we don't have this information, we do this simple and useful hack.
118 static void mergeInlinedArrayAllocas(
119     Function *Caller, InlineFunctionInfo &IFI,
120     InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) {
121   SmallPtrSet<AllocaInst *, 16> UsedAllocas;
122 
123   // When processing our SCC, check to see if CS was inlined from some other
124   // call site.  For example, if we're processing "A" in this code:
125   //   A() { B() }
126   //   B() { x = alloca ... C() }
127   //   C() { y = alloca ... }
128   // Assume that C was not inlined into B initially, and so we're processing A
129   // and decide to inline B into A.  Doing this makes an alloca available for
130   // reuse and makes a callsite (C) available for inlining.  When we process
131   // the C call site we don't want to do any alloca merging between X and Y
132   // because their scopes are not disjoint.  We could make this smarter by
133   // keeping track of the inline history for each alloca in the
134   // InlinedArrayAllocas but this isn't likely to be a significant win.
135   if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
136     return;
137 
138   // Loop over all the allocas we have so far and see if they can be merged with
139   // a previously inlined alloca.  If not, remember that we had it.
140   for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e;
141        ++AllocaNo) {
142     AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
143 
144     // Don't bother trying to merge array allocations (they will usually be
145     // canonicalized to be an allocation *of* an array), or allocations whose
146     // type is not itself an array (because we're afraid of pessimizing SRoA).
147     ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
148     if (!ATy || AI->isArrayAllocation())
149       continue;
150 
151     // Get the list of all available allocas for this array type.
152     std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy];
153 
154     // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
155     // that we have to be careful not to reuse the same "available" alloca for
156     // multiple different allocas that we just inlined, we use the 'UsedAllocas'
157     // set to keep track of which "available" allocas are being used by this
158     // function.  Also, AllocasForType can be empty of course!
159     bool MergedAwayAlloca = false;
160     for (AllocaInst *AvailableAlloca : AllocasForType) {
161 
162       unsigned Align1 = AI->getAlignment(),
163                Align2 = AvailableAlloca->getAlignment();
164 
165       // The available alloca has to be in the right function, not in some other
166       // function in this SCC.
167       if (AvailableAlloca->getParent() != AI->getParent())
168         continue;
169 
170       // If the inlined function already uses this alloca then we can't reuse
171       // it.
172       if (!UsedAllocas.insert(AvailableAlloca).second)
173         continue;
174 
175       // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
176       // success!
177       DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI
178                    << "\n\t\tINTO: " << *AvailableAlloca << '\n');
179 
180       // Move affected dbg.declare calls immediately after the new alloca to
181       // avoid the situation when a dbg.declare preceeds its alloca.
182       if (auto *L = LocalAsMetadata::getIfExists(AI))
183         if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
184           for (User *U : MDV->users())
185             if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
186               DDI->moveBefore(AvailableAlloca->getNextNode());
187 
188       AI->replaceAllUsesWith(AvailableAlloca);
189 
190       if (Align1 != Align2) {
191         if (!Align1 || !Align2) {
192           const DataLayout &DL = Caller->getParent()->getDataLayout();
193           unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
194 
195           Align1 = Align1 ? Align1 : TypeAlign;
196           Align2 = Align2 ? Align2 : TypeAlign;
197         }
198 
199         if (Align1 > Align2)
200           AvailableAlloca->setAlignment(AI->getAlignment());
201       }
202 
203       AI->eraseFromParent();
204       MergedAwayAlloca = true;
205       ++NumMergedAllocas;
206       IFI.StaticAllocas[AllocaNo] = nullptr;
207       break;
208     }
209 
210     // If we already nuked the alloca, we're done with it.
211     if (MergedAwayAlloca)
212       continue;
213 
214     // If we were unable to merge away the alloca either because there are no
215     // allocas of the right type available or because we reused them all
216     // already, remember that this alloca came from an inlined function and mark
217     // it used so we don't reuse it for other allocas from this inline
218     // operation.
219     AllocasForType.push_back(AI);
220     UsedAllocas.insert(AI);
221   }
222 }
223 
224 /// If it is possible to inline the specified call site,
225 /// do so and update the CallGraph for this operation.
226 ///
227 /// This function also does some basic book-keeping to update the IR.  The
228 /// InlinedArrayAllocas map keeps track of any allocas that are already
229 /// available from other functions inlined into the caller.  If we are able to
230 /// inline this call site we attempt to reuse already available allocas or add
231 /// any new allocas to the set if not possible.
232 static bool InlineCallIfPossible(
233     CallSite CS, InlineFunctionInfo &IFI,
234     InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
235     bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
236     ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
237   Function *Callee = CS.getCalledFunction();
238   Function *Caller = CS.getCaller();
239 
240   AAResults &AAR = AARGetter(*Callee);
241 
242   // Try to inline the function.  Get the list of static allocas that were
243   // inlined.
244   if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
245     return false;
246 
247   if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
248     ImportedFunctionsStats.recordInline(*Caller, *Callee);
249 
250   AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);
251 
252   if (!DisableInlinedAllocaMerging)
253     mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);
254 
255   return true;
256 }
257 
258 static void emitAnalysis(CallSite CS, OptimizationRemarkEmitter &ORE,
259                          const Twine &Msg) {
260   ORE.emitOptimizationRemarkAnalysis(DEBUG_TYPE, CS.getInstruction(), Msg);
261 }
262 
263 /// Return true if inlining of CS can block the caller from being
264 /// inlined which is proved to be more beneficial. \p IC is the
265 /// estimated inline cost associated with callsite \p CS.
266 /// \p TotalAltCost will be set to the estimated cost of inlining the caller
267 /// if \p CS is suppressed for inlining.
268 static bool
269 shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
270                  int &TotalSecondaryCost,
271                  function_ref<InlineCost(CallSite CS)> GetInlineCost) {
272 
273   // For now we only handle local or inline functions.
274   if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
275     return false;
276   // Try to detect the case where the current inlining candidate caller (call
277   // it B) is a static or linkonce-ODR function and is an inlining candidate
278   // elsewhere, and the current candidate callee (call it C) is large enough
279   // that inlining it into B would make B too big to inline later. In these
280   // circumstances it may be best not to inline C into B, but to inline B into
281   // its callers.
282   //
283   // This only applies to static and linkonce-ODR functions because those are
284   // expected to be available for inlining in the translation units where they
285   // are used. Thus we will always have the opportunity to make local inlining
286   // decisions. Importantly the linkonce-ODR linkage covers inline functions
287   // and templates in C++.
288   //
289   // FIXME: All of this logic should be sunk into getInlineCost. It relies on
290   // the internal implementation of the inline cost metrics rather than
291   // treating them as truly abstract units etc.
292   TotalSecondaryCost = 0;
293   // The candidate cost to be imposed upon the current function.
294   int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
295   // This bool tracks what happens if we do NOT inline C into B.
296   bool callerWillBeRemoved = Caller->hasLocalLinkage();
297   // This bool tracks what happens if we DO inline C into B.
298   bool inliningPreventsSomeOuterInline = false;
299   for (User *U : Caller->users()) {
300     CallSite CS2(U);
301 
302     // If this isn't a call to Caller (it could be some other sort
303     // of reference) skip it.  Such references will prevent the caller
304     // from being removed.
305     if (!CS2 || CS2.getCalledFunction() != Caller) {
306       callerWillBeRemoved = false;
307       continue;
308     }
309 
310     InlineCost IC2 = GetInlineCost(CS2);
311     ++NumCallerCallersAnalyzed;
312     if (!IC2) {
313       callerWillBeRemoved = false;
314       continue;
315     }
316     if (IC2.isAlways())
317       continue;
318 
319     // See if inlining or original callsite would erase the cost delta of
320     // this callsite. We subtract off the penalty for the call instruction,
321     // which we would be deleting.
322     if (IC2.getCostDelta() <= CandidateCost) {
323       inliningPreventsSomeOuterInline = true;
324       TotalSecondaryCost += IC2.getCost();
325     }
326   }
327   // If all outer calls to Caller would get inlined, the cost for the last
328   // one is set very low by getInlineCost, in anticipation that Caller will
329   // be removed entirely.  We did not account for this above unless there
330   // is only one caller of Caller.
331   if (callerWillBeRemoved && !Caller->use_empty())
332     TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
333 
334   if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
335     return true;
336 
337   return false;
338 }
339 
340 /// Return true if the inliner should attempt to inline at the given CallSite.
341 static bool shouldInline(CallSite CS,
342                          function_ref<InlineCost(CallSite CS)> GetInlineCost,
343                          OptimizationRemarkEmitter &ORE) {
344   InlineCost IC = GetInlineCost(CS);
345 
346   if (IC.isAlways()) {
347     DEBUG(dbgs() << "    Inlining: cost=always"
348                  << ", Call: " << *CS.getInstruction() << "\n");
349     emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName()) +
350                               " should always be inlined (cost=always)");
351     return true;
352   }
353 
354   if (IC.isNever()) {
355     DEBUG(dbgs() << "    NOT Inlining: cost=never"
356                  << ", Call: " << *CS.getInstruction() << "\n");
357     emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() +
358                                 " should never be inlined (cost=never)"));
359     return false;
360   }
361 
362   Function *Caller = CS.getCaller();
363   if (!IC) {
364     DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
365                  << ", thres=" << (IC.getCostDelta() + IC.getCost())
366                  << ", Call: " << *CS.getInstruction() << "\n");
367     emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() +
368                                 " too costly to inline (cost=") +
369                               Twine(IC.getCost()) + ", threshold=" +
370                               Twine(IC.getCostDelta() + IC.getCost()) + ")");
371     return false;
372   }
373 
374   int TotalSecondaryCost = 0;
375   if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) {
376     DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction()
377                  << " Cost = " << IC.getCost()
378                  << ", outer Cost = " << TotalSecondaryCost << '\n');
379     emitAnalysis(CS, ORE,
380                  Twine("Not inlining. Cost of inlining " +
381                        CS.getCalledFunction()->getName() +
382                        " increases the cost of inlining " +
383                        CS.getCaller()->getName() + " in other contexts"));
384     return false;
385   }
386 
387   DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
388                << ", thres=" << (IC.getCostDelta() + IC.getCost())
389                << ", Call: " << *CS.getInstruction() << '\n');
390   emitAnalysis(CS, ORE, CS.getCalledFunction()->getName() +
391                             Twine(" can be inlined into ") +
392                             CS.getCaller()->getName() + " with cost=" +
393                             Twine(IC.getCost()) + " (threshold=" +
394                             Twine(IC.getCostDelta() + IC.getCost()) + ")");
395   return true;
396 }
397 
398 /// Return true if the specified inline history ID
399 /// indicates an inline history that includes the specified function.
400 static bool InlineHistoryIncludes(
401     Function *F, int InlineHistoryID,
402     const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
403   while (InlineHistoryID != -1) {
404     assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
405            "Invalid inline history ID");
406     if (InlineHistory[InlineHistoryID].first == F)
407       return true;
408     InlineHistoryID = InlineHistory[InlineHistoryID].second;
409   }
410   return false;
411 }
412 
413 bool Inliner::doInitialization(CallGraph &CG) {
414   if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
415     ImportedFunctionsStats.setModuleInfo(CG.getModule());
416   return false; // No changes to CallGraph.
417 }
418 
419 bool Inliner::runOnSCC(CallGraphSCC &SCC) {
420   if (skipSCC(SCC))
421     return false;
422   return inlineCalls(SCC);
423 }
424 
425 static bool
426 inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
427                 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
428                 ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI,
429                 bool InsertLifetime,
430                 function_ref<InlineCost(CallSite CS)> GetInlineCost,
431                 function_ref<AAResults &(Function &)> AARGetter,
432                 ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
433   SmallPtrSet<Function *, 8> SCCFunctions;
434   DEBUG(dbgs() << "Inliner visiting SCC:");
435   for (CallGraphNode *Node : SCC) {
436     Function *F = Node->getFunction();
437     if (F)
438       SCCFunctions.insert(F);
439     DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
440   }
441 
442   // Scan through and identify all call sites ahead of time so that we only
443   // inline call sites in the original functions, not call sites that result
444   // from inlining other functions.
445   SmallVector<std::pair<CallSite, int>, 16> CallSites;
446 
447   // When inlining a callee produces new call sites, we want to keep track of
448   // the fact that they were inlined from the callee.  This allows us to avoid
449   // infinite inlining in some obscure cases.  To represent this, we use an
450   // index into the InlineHistory vector.
451   SmallVector<std::pair<Function *, int>, 8> InlineHistory;
452 
453   for (CallGraphNode *Node : SCC) {
454     Function *F = Node->getFunction();
455     if (!F)
456       continue;
457 
458     for (BasicBlock &BB : *F)
459       for (Instruction &I : BB) {
460         CallSite CS(cast<Value>(&I));
461         // If this isn't a call, or it is a call to an intrinsic, it can
462         // never be inlined.
463         if (!CS || isa<IntrinsicInst>(I))
464           continue;
465 
466         // If this is a direct call to an external function, we can never inline
467         // it.  If it is an indirect call, inlining may resolve it to be a
468         // direct call, so we keep it.
469         if (Function *Callee = CS.getCalledFunction())
470           if (Callee->isDeclaration())
471             continue;
472 
473         CallSites.push_back(std::make_pair(CS, -1));
474       }
475   }
476 
477   DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
478 
479   // If there are no calls in this function, exit early.
480   if (CallSites.empty())
481     return false;
482 
483   // Now that we have all of the call sites, move the ones to functions in the
484   // current SCC to the end of the list.
485   unsigned FirstCallInSCC = CallSites.size();
486   for (unsigned i = 0; i < FirstCallInSCC; ++i)
487     if (Function *F = CallSites[i].first.getCalledFunction())
488       if (SCCFunctions.count(F))
489         std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
490 
491   InlinedArrayAllocasTy InlinedArrayAllocas;
492   InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache);
493 
494   // Now that we have all of the call sites, loop over them and inline them if
495   // it looks profitable to do so.
496   bool Changed = false;
497   bool LocalChange;
498   do {
499     LocalChange = false;
500     // Iterate over the outer loop because inlining functions can cause indirect
501     // calls to become direct calls.
502     // CallSites may be modified inside so ranged for loop can not be used.
503     for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
504       CallSite CS = CallSites[CSi].first;
505 
506       Function *Caller = CS.getCaller();
507       Function *Callee = CS.getCalledFunction();
508 
509       // If this call site is dead and it is to a readonly function, we should
510       // just delete the call instead of trying to inline it, regardless of
511       // size.  This happens because IPSCCP propagates the result out of the
512       // call and then we're left with the dead call.
513       if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
514         DEBUG(dbgs() << "    -> Deleting dead call: " << *CS.getInstruction()
515                      << "\n");
516         // Update the call graph by deleting the edge from Callee to Caller.
517         CG[Caller]->removeCallEdgeFor(CS);
518         CS.getInstruction()->eraseFromParent();
519         ++NumCallsDeleted;
520       } else {
521         // We can only inline direct calls to non-declarations.
522         if (!Callee || Callee->isDeclaration())
523           continue;
524 
525         // If this call site was obtained by inlining another function, verify
526         // that the include path for the function did not include the callee
527         // itself.  If so, we'd be recursively inlining the same function,
528         // which would provide the same callsites, which would cause us to
529         // infinitely inline.
530         int InlineHistoryID = CallSites[CSi].second;
531         if (InlineHistoryID != -1 &&
532             InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
533           continue;
534 
535         // Get DebugLoc to report. CS will be invalid after Inliner.
536         DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
537         BasicBlock *Block = CS.getParent();
538         // FIXME for new PM: because of the old PM we currently generate ORE and
539         // in turn BFI on demand.  With the new PM, the ORE dependency should
540         // just become a regular analysis dependency.
541         OptimizationRemarkEmitter ORE(Caller);
542 
543         // If the policy determines that we should inline this function,
544         // try to do so.
545         if (!shouldInline(CS, GetInlineCost, ORE)) {
546           ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block,
547                                            Twine(Callee->getName() +
548                                                  " will not be inlined into " +
549                                                  Caller->getName()));
550           continue;
551         }
552 
553         // Attempt to inline the function.
554         if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
555                                   InlineHistoryID, InsertLifetime, AARGetter,
556                                   ImportedFunctionsStats)) {
557           ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block,
558                                            Twine(Callee->getName() +
559                                                  " will not be inlined into " +
560                                                  Caller->getName()));
561           continue;
562         }
563         ++NumInlined;
564 
565         // Report the inline decision.
566         ORE.emitOptimizationRemark(
567             DEBUG_TYPE, DLoc, Block,
568             Twine(Callee->getName() + " inlined into " + Caller->getName()));
569 
570         // If inlining this function gave us any new call sites, throw them
571         // onto our worklist to process.  They are useful inline candidates.
572         if (!InlineInfo.InlinedCalls.empty()) {
573           // Create a new inline history entry for this, so that we remember
574           // that these new callsites came about due to inlining Callee.
575           int NewHistoryID = InlineHistory.size();
576           InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
577 
578           for (Value *Ptr : InlineInfo.InlinedCalls)
579             CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
580         }
581       }
582 
583       // If we inlined or deleted the last possible call site to the function,
584       // delete the function body now.
585       if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
586           // TODO: Can remove if in SCC now.
587           !SCCFunctions.count(Callee) &&
588 
589           // The function may be apparently dead, but if there are indirect
590           // callgraph references to the node, we cannot delete it yet, this
591           // could invalidate the CGSCC iterator.
592           CG[Callee]->getNumReferences() == 0) {
593         DEBUG(dbgs() << "    -> Deleting dead function: " << Callee->getName()
594                      << "\n");
595         CallGraphNode *CalleeNode = CG[Callee];
596 
597         // Remove any call graph edges from the callee to its callees.
598         CalleeNode->removeAllCalledFunctions();
599 
600         // Removing the node for callee from the call graph and delete it.
601         delete CG.removeFunctionFromModule(CalleeNode);
602         ++NumDeleted;
603       }
604 
605       // Remove this call site from the list.  If possible, use
606       // swap/pop_back for efficiency, but do not use it if doing so would
607       // move a call site to a function in this SCC before the
608       // 'FirstCallInSCC' barrier.
609       if (SCC.isSingular()) {
610         CallSites[CSi] = CallSites.back();
611         CallSites.pop_back();
612       } else {
613         CallSites.erase(CallSites.begin() + CSi);
614       }
615       --CSi;
616 
617       Changed = true;
618       LocalChange = true;
619     }
620   } while (LocalChange);
621 
622   return Changed;
623 }
624 
625 bool Inliner::inlineCalls(CallGraphSCC &SCC) {
626   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
627   ACT = &getAnalysis<AssumptionCacheTracker>();
628   PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(CG.getModule());
629   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
630   // We compute dedicated AA results for each function in the SCC as needed. We
631   // use a lambda referencing external objects so that they live long enough to
632   // be queried, but we re-use them each time.
633   Optional<BasicAAResult> BAR;
634   Optional<AAResults> AAR;
635   auto AARGetter = [&](Function &F) -> AAResults & {
636     BAR.emplace(createLegacyPMBasicAAResult(*this, F));
637     AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
638     return *AAR;
639   };
640   auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
641     return ACT->getAssumptionCache(F);
642   };
643   return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime,
644                          [this](CallSite CS) { return getInlineCost(CS); },
645                          AARGetter, ImportedFunctionsStats);
646 }
647 
648 /// Remove now-dead linkonce functions at the end of
649 /// processing to avoid breaking the SCC traversal.
650 bool Inliner::doFinalization(CallGraph &CG) {
651   if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
652     ImportedFunctionsStats.dump(InlinerFunctionImportStats ==
653                                 InlinerFunctionImportStatsOpts::Verbose);
654   return removeDeadFunctions(CG);
655 }
656 
657 /// Remove dead functions that are not included in DNR (Do Not Remove) list.
658 bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
659   SmallVector<CallGraphNode *, 16> FunctionsToRemove;
660   SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats;
661   SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive;
662 
663   auto RemoveCGN = [&](CallGraphNode *CGN) {
664     // Remove any call graph edges from the function to its callees.
665     CGN->removeAllCalledFunctions();
666 
667     // Remove any edges from the external node to the function's call graph
668     // node.  These edges might have been made irrelegant due to
669     // optimization of the program.
670     CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
671 
672     // Removing the node for callee from the call graph and delete it.
673     FunctionsToRemove.push_back(CGN);
674   };
675 
676   // Scan for all of the functions, looking for ones that should now be removed
677   // from the program.  Insert the dead ones in the FunctionsToRemove set.
678   for (const auto &I : CG) {
679     CallGraphNode *CGN = I.second.get();
680     Function *F = CGN->getFunction();
681     if (!F || F->isDeclaration())
682       continue;
683 
684     // Handle the case when this function is called and we only want to care
685     // about always-inline functions. This is a bit of a hack to share code
686     // between here and the InlineAlways pass.
687     if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
688       continue;
689 
690     // If the only remaining users of the function are dead constants, remove
691     // them.
692     F->removeDeadConstantUsers();
693 
694     if (!F->isDefTriviallyDead())
695       continue;
696 
697     // It is unsafe to drop a function with discardable linkage from a COMDAT
698     // without also dropping the other members of the COMDAT.
699     // The inliner doesn't visit non-function entities which are in COMDAT
700     // groups so it is unsafe to do so *unless* the linkage is local.
701     if (!F->hasLocalLinkage()) {
702       if (const Comdat *C = F->getComdat()) {
703         --ComdatEntriesAlive[C];
704         DeadFunctionsInComdats.push_back(CGN);
705         continue;
706       }
707     }
708 
709     RemoveCGN(CGN);
710   }
711   if (!DeadFunctionsInComdats.empty()) {
712     // Count up all the entities in COMDAT groups
713     auto ComdatGroupReferenced = [&](const Comdat *C) {
714       auto I = ComdatEntriesAlive.find(C);
715       if (I != ComdatEntriesAlive.end())
716         ++(I->getSecond());
717     };
718     for (const Function &F : CG.getModule())
719       if (const Comdat *C = F.getComdat())
720         ComdatGroupReferenced(C);
721     for (const GlobalVariable &GV : CG.getModule().globals())
722       if (const Comdat *C = GV.getComdat())
723         ComdatGroupReferenced(C);
724     for (const GlobalAlias &GA : CG.getModule().aliases())
725       if (const Comdat *C = GA.getComdat())
726         ComdatGroupReferenced(C);
727     for (CallGraphNode *CGN : DeadFunctionsInComdats) {
728       Function *F = CGN->getFunction();
729       const Comdat *C = F->getComdat();
730       int NumAlive = ComdatEntriesAlive[C];
731       // We can remove functions in a COMDAT group if the entire group is dead.
732       assert(NumAlive >= 0);
733       if (NumAlive > 0)
734         continue;
735 
736       RemoveCGN(CGN);
737     }
738   }
739 
740   if (FunctionsToRemove.empty())
741     return false;
742 
743   // Now that we know which functions to delete, do so.  We didn't want to do
744   // this inline, because that would invalidate our CallGraph::iterator
745   // objects. :(
746   //
747   // Note that it doesn't matter that we are iterating over a non-stable order
748   // here to do this, it doesn't matter which order the functions are deleted
749   // in.
750   array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
751   FunctionsToRemove.erase(
752       std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()),
753       FunctionsToRemove.end());
754   for (CallGraphNode *CGN : FunctionsToRemove) {
755     delete CG.removeFunctionFromModule(CGN);
756     ++NumDeleted;
757   }
758   return true;
759 }
760