1 //===- Inliner.cpp - Code common to all inliners --------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the mechanics required to implement inlining without
11 // missing any calls and updating the call graph.  The decisions of which calls
12 // are profitable to inline are implemented elsewhere.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/IPO/InlinerPass.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/AliasAnalysis.h"
20 #include "llvm/Analysis/AssumptionCache.h"
21 #include "llvm/Analysis/BasicAliasAnalysis.h"
22 #include "llvm/Analysis/BlockFrequencyInfo.h"
23 #include "llvm/Analysis/CallGraph.h"
24 #include "llvm/Analysis/InlineCost.h"
25 #include "llvm/Analysis/TargetLibraryInfo.h"
26 #include "llvm/IR/CallSite.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/DiagnosticInfo.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/Transforms/Utils/Cloning.h"
36 #include "llvm/Transforms/Utils/Local.h"
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "inline"
40 
41 STATISTIC(NumInlined, "Number of functions inlined");
42 STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
43 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
44 STATISTIC(NumMergedAllocas, "Number of allocas merged together");
45 
46 // This weirdly named statistic tracks the number of times that, when attempting
47 // to inline a function A into B, we analyze the callers of B in order to see
48 // if those would be more profitable and blocked inline steps.
49 STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
50 
51 Inliner::Inliner(char &ID)
52     : CallGraphSCCPass(ID), InsertLifetime(true),
53       BFA(new BlockFrequencyAnalysis()) {}
54 
55 Inliner::Inliner(char &ID, bool InsertLifetime)
56     : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime),
57       BFA(new BlockFrequencyAnalysis()) {}
58 
59 /// For this class, we declare that we require and preserve the call graph.
60 /// If the derived class implements this method, it should
61 /// always explicitly call the implementation here.
62 void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
63   AU.addRequired<AssumptionCacheTracker>();
64   AU.addRequired<TargetLibraryInfoWrapperPass>();
65   getAAResultsAnalysisUsage(AU);
66   CallGraphSCCPass::getAnalysisUsage(AU);
67 }
68 
69 
70 typedef DenseMap<ArrayType*, std::vector<AllocaInst*> >
71 InlinedArrayAllocasTy;
72 
73 /// If it is possible to inline the specified call site,
74 /// do so and update the CallGraph for this operation.
75 ///
76 /// This function also does some basic book-keeping to update the IR.  The
77 /// InlinedArrayAllocas map keeps track of any allocas that are already
78 /// available from other functions inlined into the caller.  If we are able to
79 /// inline this call site we attempt to reuse already available allocas or add
80 /// any new allocas to the set if not possible.
81 static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
82                                  InlinedArrayAllocasTy &InlinedArrayAllocas,
83                                  int InlineHistory, bool InsertLifetime) {
84   Function *Callee = CS.getCalledFunction();
85   Function *Caller = CS.getCaller();
86 
87   // We need to manually construct BasicAA directly in order to disable
88   // its use of other function analyses.
89   BasicAAResult BAR(createLegacyPMBasicAAResult(P, *Callee));
90 
91   // Construct our own AA results for this function. We do this manually to
92   // work around the limitations of the legacy pass manager.
93   AAResults AAR(createLegacyPMAAResults(P, *Callee, BAR));
94 
95   // Try to inline the function.  Get the list of static allocas that were
96   // inlined.
97   if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
98     return false;
99 
100   AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);
101 
102   // Look at all of the allocas that we inlined through this call site.  If we
103   // have already inlined other allocas through other calls into this function,
104   // then we know that they have disjoint lifetimes and that we can merge them.
105   //
106   // There are many heuristics possible for merging these allocas, and the
107   // different options have different tradeoffs.  One thing that we *really*
108   // don't want to hurt is SRoA: once inlining happens, often allocas are no
109   // longer address taken and so they can be promoted.
110   //
111   // Our "solution" for that is to only merge allocas whose outermost type is an
112   // array type.  These are usually not promoted because someone is using a
113   // variable index into them.  These are also often the most important ones to
114   // merge.
115   //
116   // A better solution would be to have real memory lifetime markers in the IR
117   // and not have the inliner do any merging of allocas at all.  This would
118   // allow the backend to do proper stack slot coloring of all allocas that
119   // *actually make it to the backend*, which is really what we want.
120   //
121   // Because we don't have this information, we do this simple and useful hack.
122   //
123   SmallPtrSet<AllocaInst*, 16> UsedAllocas;
124 
125   // When processing our SCC, check to see if CS was inlined from some other
126   // call site.  For example, if we're processing "A" in this code:
127   //   A() { B() }
128   //   B() { x = alloca ... C() }
129   //   C() { y = alloca ... }
130   // Assume that C was not inlined into B initially, and so we're processing A
131   // and decide to inline B into A.  Doing this makes an alloca available for
132   // reuse and makes a callsite (C) available for inlining.  When we process
133   // the C call site we don't want to do any alloca merging between X and Y
134   // because their scopes are not disjoint.  We could make this smarter by
135   // keeping track of the inline history for each alloca in the
136   // InlinedArrayAllocas but this isn't likely to be a significant win.
137   if (InlineHistory != -1)  // Only do merging for top-level call sites in SCC.
138     return true;
139 
140   // Loop over all the allocas we have so far and see if they can be merged with
141   // a previously inlined alloca.  If not, remember that we had it.
142   for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
143        AllocaNo != e; ++AllocaNo) {
144     AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
145 
146     // Don't bother trying to merge array allocations (they will usually be
147     // canonicalized to be an allocation *of* an array), or allocations whose
148     // type is not itself an array (because we're afraid of pessimizing SRoA).
149     ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
150     if (!ATy || AI->isArrayAllocation())
151       continue;
152 
153     // Get the list of all available allocas for this array type.
154     std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
155 
156     // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
157     // that we have to be careful not to reuse the same "available" alloca for
158     // multiple different allocas that we just inlined, we use the 'UsedAllocas'
159     // set to keep track of which "available" allocas are being used by this
160     // function.  Also, AllocasForType can be empty of course!
161     bool MergedAwayAlloca = false;
162     for (AllocaInst *AvailableAlloca : AllocasForType) {
163 
164       unsigned Align1 = AI->getAlignment(),
165                Align2 = AvailableAlloca->getAlignment();
166 
167       // The available alloca has to be in the right function, not in some other
168       // function in this SCC.
169       if (AvailableAlloca->getParent() != AI->getParent())
170         continue;
171 
172       // If the inlined function already uses this alloca then we can't reuse
173       // it.
174       if (!UsedAllocas.insert(AvailableAlloca).second)
175         continue;
176 
177       // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
178       // success!
179       DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
180                    << *AvailableAlloca << '\n');
181 
182       // Move affected dbg.declare calls immediately after the new alloca to
183       // avoid the situation when a dbg.declare preceeds its alloca.
184       if (auto *L = LocalAsMetadata::getIfExists(AI))
185         if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
186           for (User *U : MDV->users())
187             if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
188               DDI->moveBefore(AvailableAlloca->getNextNode());
189 
190       AI->replaceAllUsesWith(AvailableAlloca);
191 
192       if (Align1 != Align2) {
193         if (!Align1 || !Align2) {
194           const DataLayout &DL = Caller->getParent()->getDataLayout();
195           unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
196 
197           Align1 = Align1 ? Align1 : TypeAlign;
198           Align2 = Align2 ? Align2 : TypeAlign;
199         }
200 
201         if (Align1 > Align2)
202           AvailableAlloca->setAlignment(AI->getAlignment());
203       }
204 
205       AI->eraseFromParent();
206       MergedAwayAlloca = true;
207       ++NumMergedAllocas;
208       IFI.StaticAllocas[AllocaNo] = nullptr;
209       break;
210     }
211 
212     // If we already nuked the alloca, we're done with it.
213     if (MergedAwayAlloca)
214       continue;
215 
216     // If we were unable to merge away the alloca either because there are no
217     // allocas of the right type available or because we reused them all
218     // already, remember that this alloca came from an inlined function and mark
219     // it used so we don't reuse it for other allocas from this inline
220     // operation.
221     AllocasForType.push_back(AI);
222     UsedAllocas.insert(AI);
223   }
224 
225   return true;
226 }
227 
228 static void emitAnalysis(CallSite CS, const Twine &Msg) {
229   Function *Caller = CS.getCaller();
230   LLVMContext &Ctx = Caller->getContext();
231   DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
232   emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
233 }
234 
235 /// Return true if the inliner should attempt to inline at the given CallSite.
236 bool Inliner::shouldInline(CallSite CS) {
237   InlineCost IC = getInlineCost(CS);
238 
239   if (IC.isAlways()) {
240     DEBUG(dbgs() << "    Inlining: cost=always"
241           << ", Call: " << *CS.getInstruction() << "\n");
242     emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) +
243                          " should always be inlined (cost=always)");
244     return true;
245   }
246 
247   if (IC.isNever()) {
248     DEBUG(dbgs() << "    NOT Inlining: cost=never"
249           << ", Call: " << *CS.getInstruction() << "\n");
250     emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
251                            " should never be inlined (cost=never)"));
252     return false;
253   }
254 
255   Function *Caller = CS.getCaller();
256   if (!IC) {
257     DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
258           << ", thres=" << (IC.getCostDelta() + IC.getCost())
259           << ", Call: " << *CS.getInstruction() << "\n");
260     emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
261                            " too costly to inline (cost=") +
262                          Twine(IC.getCost()) + ", threshold=" +
263                          Twine(IC.getCostDelta() + IC.getCost()) + ")");
264     return false;
265   }
266 
267   // Try to detect the case where the current inlining candidate caller (call
268   // it B) is a static or linkonce-ODR function and is an inlining candidate
269   // elsewhere, and the current candidate callee (call it C) is large enough
270   // that inlining it into B would make B too big to inline later. In these
271   // circumstances it may be best not to inline C into B, but to inline B into
272   // its callers.
273   //
274   // This only applies to static and linkonce-ODR functions because those are
275   // expected to be available for inlining in the translation units where they
276   // are used. Thus we will always have the opportunity to make local inlining
277   // decisions. Importantly the linkonce-ODR linkage covers inline functions
278   // and templates in C++.
279   //
280   // FIXME: All of this logic should be sunk into getInlineCost. It relies on
281   // the internal implementation of the inline cost metrics rather than
282   // treating them as truly abstract units etc.
283   if (Caller->hasLocalLinkage() || Caller->hasLinkOnceODRLinkage()) {
284     int TotalSecondaryCost = 0;
285     // The candidate cost to be imposed upon the current function.
286     int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
287     // This bool tracks what happens if we do NOT inline C into B.
288     bool callerWillBeRemoved = Caller->hasLocalLinkage();
289     // This bool tracks what happens if we DO inline C into B.
290     bool inliningPreventsSomeOuterInline = false;
291     for (User *U : Caller->users()) {
292       CallSite CS2(U);
293 
294       // If this isn't a call to Caller (it could be some other sort
295       // of reference) skip it.  Such references will prevent the caller
296       // from being removed.
297       if (!CS2 || CS2.getCalledFunction() != Caller) {
298         callerWillBeRemoved = false;
299         continue;
300       }
301 
302       InlineCost IC2 = getInlineCost(CS2);
303       ++NumCallerCallersAnalyzed;
304       if (!IC2) {
305         callerWillBeRemoved = false;
306         continue;
307       }
308       if (IC2.isAlways())
309         continue;
310 
311       // See if inlining or original callsite would erase the cost delta of
312       // this callsite. We subtract off the penalty for the call instruction,
313       // which we would be deleting.
314       if (IC2.getCostDelta() <= CandidateCost) {
315         inliningPreventsSomeOuterInline = true;
316         TotalSecondaryCost += IC2.getCost();
317       }
318     }
319     // If all outer calls to Caller would get inlined, the cost for the last
320     // one is set very low by getInlineCost, in anticipation that Caller will
321     // be removed entirely.  We did not account for this above unless there
322     // is only one caller of Caller.
323     if (callerWillBeRemoved && !Caller->use_empty())
324       TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
325 
326     if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) {
327       DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction() <<
328            " Cost = " << IC.getCost() <<
329            ", outer Cost = " << TotalSecondaryCost << '\n');
330       emitAnalysis(
331           CS, Twine("Not inlining. Cost of inlining " +
332                     CS.getCalledFunction()->getName() +
333                     " increases the cost of inlining " +
334                     CS.getCaller()->getName() + " in other contexts"));
335       return false;
336     }
337   }
338 
339   DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
340         << ", thres=" << (IC.getCostDelta() + IC.getCost())
341         << ", Call: " << *CS.getInstruction() << '\n');
342   emitAnalysis(
343       CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") +
344               CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) +
345               " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")");
346   return true;
347 }
348 
349 /// Return true if the specified inline history ID
350 /// indicates an inline history that includes the specified function.
351 static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
352             const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
353   while (InlineHistoryID != -1) {
354     assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
355            "Invalid inline history ID");
356     if (InlineHistory[InlineHistoryID].first == F)
357       return true;
358     InlineHistoryID = InlineHistory[InlineHistoryID].second;
359   }
360   return false;
361 }
362 
363 /// \brief Update the frequency of a block that is cloned into the caller.
364 /// This is invoked when \p OrigBB from the callee is cloned into \p NewBB in
365 /// the caller.
366 void Inliner::updateBlockFreq(CallSite &CS, const BasicBlock *OrigBB,
367                               const BasicBlock *NewBB) {
368   if (!HasProfileData)
369     return;
370   Instruction *Call = CS.getInstruction();
371   BasicBlock *CallBB = Call->getParent();
372   BlockFrequencyInfo *CalleeBFI =
373       BFA->getBlockFrequencyInfo(CS.getCalledFunction());
374   BlockFrequencyInfo *CallerBFI =
375       BFA->getBlockFrequencyInfo(CallBB->getParent());
376   // Find the number of times OrigBB is executed per invocation of the callee
377   // and multiply by the number of times callee is executed in the caller.
378   // Freq(NewBB) = Freq(OrigBB) * CallSiteFreq / CalleeEntryFreq.
379   uint64_t CallSiteFreq = CallerBFI->getBlockFreq(CallBB).getFrequency();
380   uint64_t CalleeEntryFreq = CalleeBFI->getEntryFreq();
381   // Frequency of OrigBB in the callee.
382   BlockFrequency OrigBBFreq = CalleeBFI->getBlockFreq(OrigBB);
383   CallerBFI->setBlockFreq(NewBB, (double)(OrigBBFreq.getFrequency()) /
384                                      CalleeEntryFreq * CallSiteFreq);
385 }
386 
387 /// \brief Update entry count of \p Callee after it got inlined at a callsite
388 /// in block \p CallBB.
389 void Inliner::updateEntryCount(BasicBlock *CallBB, Function *Callee) {
390   if (!HasProfileData)
391     return;
392   // If the callee has a original count of N, and the estimated count of
393   // callsite is M, the new callee count is set to N - M. M is estimated from
394   // the caller's entry count, its entry block frequency and the block frequency
395   // of the callsite.
396   Optional<uint64_t> CalleeCount = Callee->getEntryCount();
397   if (!CalleeCount)
398     return;
399   Optional<uint64_t> CallSiteCount = llvm::getBlockCount(CallBB, BFA.get());
400   if (!CallSiteCount)
401     return;
402   // Since CallSiteCount is an estimate, it could exceed the original callee
403   // count and has to be set to 0.
404   if (CallSiteCount.getValue() > CalleeCount.getValue()) {
405     Callee->setEntryCount(0);
406     DEBUG(llvm::dbgs() << "Estimated count of block " << CallBB->getName()
407                        << " is " << CallSiteCount.getValue()
408                        << " which exceeds the entry count "
409                        << CalleeCount.getValue() << " of the callee "
410                        << Callee->getName() << "\n");
411   } else
412     Callee->setEntryCount(CalleeCount.getValue() - CallSiteCount.getValue());
413 }
414 
415 void Inliner::invalidateBFI(Function *F) {
416   if (!HasProfileData)
417     return;
418   if (F)
419     BFA->invalidateBlockFrequencyInfo(F);
420 }
421 void Inliner::invalidateBFI(CallGraphSCC &SCC) {
422   if (!HasProfileData)
423     return;
424   for (CallGraphNode *Node : SCC) {
425     Function *F = Node->getFunction();
426     invalidateBFI(F);
427   }
428 }
429 void Inliner::copyBlockFrequency(BasicBlock *Src, BasicBlock *Dst) {
430   if (!HasProfileData)
431     return;
432   Function *F = Src->getParent();
433   BlockFrequencyInfo *BFI = BFA->getBlockFrequencyInfo(F);
434   BFI->setBlockFreq(Dst, BFI->getBlockFreq(Src).getFrequency());
435 }
436 
437 static bool hasProfileData(Module &M) {
438   // We check for the presence of MaxFunctionCount in the module.
439   // FIXME: This now only works for frontend based instrumentation.
440   return M.getMaximumFunctionCount().hasValue();
441 }
442 
443 bool Inliner::runOnSCC(CallGraphSCC &SCC) {
444   using namespace std::placeholders;
445   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
446   HasProfileData = hasProfileData(CG.getModule());
447   ACT = &getAnalysis<AssumptionCacheTracker>();
448   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
449 
450   SmallPtrSet<Function*, 8> SCCFunctions;
451   DEBUG(dbgs() << "Inliner visiting SCC:");
452   for (CallGraphNode *Node : SCC) {
453     Function *F = Node->getFunction();
454     if (F) SCCFunctions.insert(F);
455     DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
456   }
457 
458   // Scan through and identify all call sites ahead of time so that we only
459   // inline call sites in the original functions, not call sites that result
460   // from inlining other functions.
461   SmallVector<std::pair<CallSite, int>, 16> CallSites;
462 
463   // When inlining a callee produces new call sites, we want to keep track of
464   // the fact that they were inlined from the callee.  This allows us to avoid
465   // infinite inlining in some obscure cases.  To represent this, we use an
466   // index into the InlineHistory vector.
467   SmallVector<std::pair<Function*, int>, 8> InlineHistory;
468 
469   for (CallGraphNode *Node : SCC) {
470     Function *F = Node->getFunction();
471     if (!F) continue;
472 
473     for (BasicBlock &BB : *F)
474       for (Instruction &I : BB) {
475         CallSite CS(cast<Value>(&I));
476         // If this isn't a call, or it is a call to an intrinsic, it can
477         // never be inlined.
478         if (!CS || isa<IntrinsicInst>(I))
479           continue;
480 
481         // If this is a direct call to an external function, we can never inline
482         // it.  If it is an indirect call, inlining may resolve it to be a
483         // direct call, so we keep it.
484         if (Function *Callee = CS.getCalledFunction())
485           if (Callee->isDeclaration())
486             continue;
487 
488         CallSites.push_back(std::make_pair(CS, -1));
489       }
490   }
491 
492   DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
493 
494   // If there are no calls in this function, exit early.
495   if (CallSites.empty())
496     return false;
497 
498   // Now that we have all of the call sites, move the ones to functions in the
499   // current SCC to the end of the list.
500   unsigned FirstCallInSCC = CallSites.size();
501   for (unsigned i = 0; i < FirstCallInSCC; ++i)
502     if (Function *F = CallSites[i].first.getCalledFunction())
503       if (SCCFunctions.count(F))
504         std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
505 
506 
507   InlinedArrayAllocasTy InlinedArrayAllocas;
508 
509   // Now that we have all of the call sites, loop over them and inline them if
510   // it looks profitable to do so.
511   bool Changed = false;
512   bool LocalChange;
513   do {
514     LocalChange = false;
515     // Iterate over the outer loop because inlining functions can cause indirect
516     // calls to become direct calls.
517     // CallSites may be modified inside so ranged for loop can not be used.
518     for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
519       CallSite CS = CallSites[CSi].first;
520 
521       Function *Caller = CS.getCaller();
522       Function *Callee = CS.getCalledFunction();
523 
524       // If this call site is dead and it is to a readonly function, we should
525       // just delete the call instead of trying to inline it, regardless of
526       // size.  This happens because IPSCCP propagates the result out of the
527       // call and then we're left with the dead call.
528       if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
529         DEBUG(dbgs() << "    -> Deleting dead call: "
530                      << *CS.getInstruction() << "\n");
531         // Update the call graph by deleting the edge from Callee to Caller.
532         CG[Caller]->removeCallEdgeFor(CS);
533         CS.getInstruction()->eraseFromParent();
534         ++NumCallsDeleted;
535       } else {
536         Instruction *TheCall = CS.getInstruction();
537         BasicBlock *CallSiteBlock = TheCall->getParent();
538         Instruction *CallSuccessor = &*(++BasicBlock::iterator(TheCall));
539 
540         // We can only inline direct calls to non-declarations.
541         if (!Callee || Callee->isDeclaration()) continue;
542 
543         // If this call site was obtained by inlining another function, verify
544         // that the include path for the function did not include the callee
545         // itself.  If so, we'd be recursively inlining the same function,
546         // which would provide the same callsites, which would cause us to
547         // infinitely inline.
548         int InlineHistoryID = CallSites[CSi].second;
549         if (InlineHistoryID != -1 &&
550             InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
551           continue;
552 
553         LLVMContext &CallerCtx = Caller->getContext();
554 
555         // Get DebugLoc to report. CS will be invalid after Inliner.
556         DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
557 
558         // If the policy determines that we should inline this function,
559         // try to do so.
560         if (!shouldInline(CS)) {
561           emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
562                                        Twine(Callee->getName() +
563                                              " will not be inlined into " +
564                                              Caller->getName()));
565           continue;
566         }
567 
568         BlockCloningFunctor BCF = nullptr;
569         if (HasProfileData)
570           BCF = std::bind(&Inliner::updateBlockFreq, this, CS, _1, _2);
571         InlineFunctionInfo InlineInfo(&CG, ACT, BCF);
572 
573         // Attempt to inline the function.
574         if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas,
575                                   InlineHistoryID, InsertLifetime)) {
576           emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
577                                        Twine(Callee->getName() +
578                                              " will not be inlined into " +
579                                              Caller->getName()));
580           continue;
581         }
582         updateEntryCount(CallSiteBlock, Callee);
583         if (!InlineInfo.CallSuccessorBlockDeleted) {
584           // The instruction following the call is part of a new basic block
585           // created during the inlining process. This does not have an entry in
586           // the BFI. We create an entry by copying the frequency of the
587           // original block containing the call.
588           copyBlockFrequency(CallSiteBlock, CallSuccessor->getParent());
589         }
590 
591         ++NumInlined;
592 
593         // Report the inline decision.
594         emitOptimizationRemark(
595             CallerCtx, DEBUG_TYPE, *Caller, DLoc,
596             Twine(Callee->getName() + " inlined into " + Caller->getName()));
597 
598         // If inlining this function gave us any new call sites, throw them
599         // onto our worklist to process.  They are useful inline candidates.
600         if (!InlineInfo.InlinedCalls.empty()) {
601           // Create a new inline history entry for this, so that we remember
602           // that these new callsites came about due to inlining Callee.
603           int NewHistoryID = InlineHistory.size();
604           InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
605 
606           for (Value *Ptr : InlineInfo.InlinedCalls)
607             CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
608         }
609       }
610 
611       // If we inlined or deleted the last possible call site to the function,
612       // delete the function body now.
613       if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
614           // TODO: Can remove if in SCC now.
615           !SCCFunctions.count(Callee) &&
616 
617           // The function may be apparently dead, but if there are indirect
618           // callgraph references to the node, we cannot delete it yet, this
619           // could invalidate the CGSCC iterator.
620           CG[Callee]->getNumReferences() == 0) {
621         DEBUG(dbgs() << "    -> Deleting dead function: "
622               << Callee->getName() << "\n");
623         CallGraphNode *CalleeNode = CG[Callee];
624 
625         // Remove any call graph edges from the callee to its callees.
626         CalleeNode->removeAllCalledFunctions();
627 
628         // Removing the node for callee from the call graph and delete it.
629         Function *F = CG.removeFunctionFromModule(CalleeNode);
630         invalidateBFI(F);
631         delete F;
632         ++NumDeleted;
633       }
634 
635       // Remove this call site from the list.  If possible, use
636       // swap/pop_back for efficiency, but do not use it if doing so would
637       // move a call site to a function in this SCC before the
638       // 'FirstCallInSCC' barrier.
639       if (SCC.isSingular()) {
640         CallSites[CSi] = CallSites.back();
641         CallSites.pop_back();
642       } else {
643         CallSites.erase(CallSites.begin()+CSi);
644       }
645       --CSi;
646 
647       Changed = true;
648       LocalChange = true;
649     }
650   } while (LocalChange);
651 
652   invalidateBFI(SCC);
653   return Changed;
654 }
655 
656 /// Remove now-dead linkonce functions at the end of
657 /// processing to avoid breaking the SCC traversal.
658 bool Inliner::doFinalization(CallGraph &CG) {
659   return removeDeadFunctions(CG);
660 }
661 
662 /// Remove dead functions that are not included in DNR (Do Not Remove) list.
663 bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
664   SmallVector<CallGraphNode*, 16> FunctionsToRemove;
665   SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats;
666   SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive;
667 
668   auto RemoveCGN = [&](CallGraphNode *CGN) {
669     // Remove any call graph edges from the function to its callees.
670     CGN->removeAllCalledFunctions();
671 
672     // Remove any edges from the external node to the function's call graph
673     // node.  These edges might have been made irrelegant due to
674     // optimization of the program.
675     CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
676 
677     // Removing the node for callee from the call graph and delete it.
678     FunctionsToRemove.push_back(CGN);
679   };
680 
681   // Scan for all of the functions, looking for ones that should now be removed
682   // from the program.  Insert the dead ones in the FunctionsToRemove set.
683   for (const auto &I : CG) {
684     CallGraphNode *CGN = I.second.get();
685     Function *F = CGN->getFunction();
686     if (!F || F->isDeclaration())
687       continue;
688 
689     // Handle the case when this function is called and we only want to care
690     // about always-inline functions. This is a bit of a hack to share code
691     // between here and the InlineAlways pass.
692     if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
693       continue;
694 
695     // If the only remaining users of the function are dead constants, remove
696     // them.
697     F->removeDeadConstantUsers();
698 
699     if (!F->isDefTriviallyDead())
700       continue;
701 
702     // It is unsafe to drop a function with discardable linkage from a COMDAT
703     // without also dropping the other members of the COMDAT.
704     // The inliner doesn't visit non-function entities which are in COMDAT
705     // groups so it is unsafe to do so *unless* the linkage is local.
706     if (!F->hasLocalLinkage()) {
707       if (const Comdat *C = F->getComdat()) {
708         --ComdatEntriesAlive[C];
709         DeadFunctionsInComdats.push_back(CGN);
710         continue;
711       }
712     }
713 
714     RemoveCGN(CGN);
715   }
716   if (!DeadFunctionsInComdats.empty()) {
717     // Count up all the entities in COMDAT groups
718     auto ComdatGroupReferenced = [&](const Comdat *C) {
719       auto I = ComdatEntriesAlive.find(C);
720       if (I != ComdatEntriesAlive.end())
721         ++(I->getSecond());
722     };
723     for (const Function &F : CG.getModule())
724       if (const Comdat *C = F.getComdat())
725         ComdatGroupReferenced(C);
726     for (const GlobalVariable &GV : CG.getModule().globals())
727       if (const Comdat *C = GV.getComdat())
728         ComdatGroupReferenced(C);
729     for (const GlobalAlias &GA : CG.getModule().aliases())
730       if (const Comdat *C = GA.getComdat())
731         ComdatGroupReferenced(C);
732     for (CallGraphNode *CGN : DeadFunctionsInComdats) {
733       Function *F = CGN->getFunction();
734       const Comdat *C = F->getComdat();
735       int NumAlive = ComdatEntriesAlive[C];
736       // We can remove functions in a COMDAT group if the entire group is dead.
737       assert(NumAlive >= 0);
738       if (NumAlive > 0)
739         continue;
740 
741       RemoveCGN(CGN);
742     }
743   }
744 
745   if (FunctionsToRemove.empty())
746     return false;
747 
748   // Now that we know which functions to delete, do so.  We didn't want to do
749   // this inline, because that would invalidate our CallGraph::iterator
750   // objects. :(
751   //
752   // Note that it doesn't matter that we are iterating over a non-stable order
753   // here to do this, it doesn't matter which order the functions are deleted
754   // in.
755   array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
756   FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(),
757                                       FunctionsToRemove.end()),
758                           FunctionsToRemove.end());
759   for (CallGraphNode *CGN : FunctionsToRemove) {
760     Function *F = CG.removeFunctionFromModule(CGN);
761     invalidateBFI(F);
762     delete F;
763     ++NumDeleted;
764   }
765   return true;
766 }
767