1 //===- ModuleInliner.cpp - Code related to module inliner -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the mechanics required to implement inlining without
10 // missing any calls in the module level. It doesn't need any infromation about
11 // SCC or call graph, which is different from the SCC inliner.  The decisions of
12 // which calls are profitable to inline are implemented elsewhere.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/IPO/ModuleInliner.h"
17 #include "llvm/ADT/ScopeExit.h"
18 #include "llvm/ADT/SetVector.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/AliasAnalysis.h"
22 #include "llvm/Analysis/AssumptionCache.h"
23 #include "llvm/Analysis/BlockFrequencyInfo.h"
24 #include "llvm/Analysis/InlineAdvisor.h"
25 #include "llvm/Analysis/InlineCost.h"
26 #include "llvm/Analysis/InlineOrder.h"
27 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
28 #include "llvm/Analysis/ProfileSummaryInfo.h"
29 #include "llvm/Analysis/ReplayInlineAdvisor.h"
30 #include "llvm/Analysis/TargetLibraryInfo.h"
31 #include "llvm/IR/DiagnosticInfo.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/InstIterator.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Module.h"
37 #include "llvm/IR/PassManager.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Transforms/Utils/CallPromotionUtils.h"
42 #include "llvm/Transforms/Utils/Cloning.h"
43 #include <cassert>
44 
45 using namespace llvm;
46 
47 #define DEBUG_TYPE "module-inline"
48 
49 STATISTIC(NumInlined, "Number of functions inlined");
50 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
51 
52 static cl::opt<bool> InlineEnablePriorityOrder(
53     "module-inline-enable-priority-order", cl::Hidden, cl::init(true),
54     cl::desc("Enable the priority inline order for the module inliner"));
55 
56 /// Return true if the specified inline history ID
57 /// indicates an inline history that includes the specified function.
58 static bool inlineHistoryIncludes(
59     Function *F, int InlineHistoryID,
60     const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
61   while (InlineHistoryID != -1) {
62     assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
63            "Invalid inline history ID");
64     if (InlineHistory[InlineHistoryID].first == F)
65       return true;
66     InlineHistoryID = InlineHistory[InlineHistoryID].second;
67   }
68   return false;
69 }
70 
71 InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM,
72                                              FunctionAnalysisManager &FAM,
73                                              Module &M) {
74   if (OwnedAdvisor)
75     return *OwnedAdvisor;
76 
77   auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M);
78   if (!IAA) {
79     // It should still be possible to run the inliner as a stand-alone module
80     // pass, for test scenarios. In that case, we default to the
81     // DefaultInlineAdvisor, which doesn't need to keep state between module
82     // pass runs. It also uses just the default InlineParams. In this case, we
83     // need to use the provided FAM, which is valid for the duration of the
84     // inliner pass, and thus the lifetime of the owned advisor. The one we
85     // would get from the MAM can be invalidated as a result of the inliner's
86     // activity.
87     OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(M, FAM, Params);
88 
89     return *OwnedAdvisor;
90   }
91   assert(IAA->getAdvisor() &&
92          "Expected a present InlineAdvisorAnalysis also have an "
93          "InlineAdvisor initialized");
94   return *IAA->getAdvisor();
95 }
96 
97 static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) {
98   LibFunc LF;
99 
100   // Either this is a normal library function or a "vectorizable"
101   // function.  Not using the VFDatabase here because this query
102   // is related only to libraries handled via the TLI.
103   return TLI.getLibFunc(F, LF) ||
104          TLI.isKnownVectorFunctionInLibrary(F.getName());
105 }
106 
107 PreservedAnalyses ModuleInlinerPass::run(Module &M,
108                                          ModuleAnalysisManager &MAM) {
109   LLVM_DEBUG(dbgs() << "---- Module Inliner is Running ---- \n");
110 
111   auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
112   if (!IAA.tryCreate(Params, Mode, {})) {
113     M.getContext().emitError(
114         "Could not setup Inlining Advisor for the requested "
115         "mode and/or options");
116     return PreservedAnalyses::all();
117   }
118 
119   bool Changed = false;
120 
121   ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
122 
123   FunctionAnalysisManager &FAM =
124       MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
125 
126   auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
127     return FAM.getResult<TargetLibraryAnalysis>(F);
128   };
129 
130   InlineAdvisor &Advisor = getAdvisor(MAM, FAM, M);
131   Advisor.onPassEntry();
132 
133   auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); });
134 
135   // In the module inliner, a priority-based worklist is used for calls across
136   // the entire Module. With this module inliner, the inline order is not
137   // limited to bottom-up order. More globally scope inline order is enabled.
138   // Also, the inline deferral logic become unnecessary in this module inliner.
139   // It is possible to use other priority heuristics, e.g. profile-based
140   // heuristic.
141   //
142   // TODO: Here is a huge amount duplicate code between the module inliner and
143   // the SCC inliner, which need some refactoring.
144   std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
145   if (InlineEnablePriorityOrder)
146     Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
147   else
148     Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
149   assert(Calls != nullptr && "Expected an initialized InlineOrder");
150 
151   // Populate the initial list of calls in this module.
152   for (Function &F : M) {
153     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
154     // We want to generally process call sites top-down in order for
155     // simplifications stemming from replacing the call with the returned value
156     // after inlining to be visible to subsequent inlining decisions.
157     // FIXME: Using instructions sequence is a really bad way to do this.
158     // Instead we should do an actual RPO walk of the function body.
159     for (Instruction &I : instructions(F))
160       if (auto *CB = dyn_cast<CallBase>(&I))
161         if (Function *Callee = CB->getCalledFunction()) {
162           if (!Callee->isDeclaration())
163             Calls->push({CB, -1});
164           else if (!isa<IntrinsicInst>(I)) {
165             using namespace ore;
166             setInlineRemark(*CB, "unavailable definition");
167             ORE.emit([&]() {
168               return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
169                      << NV("Callee", Callee) << " will not be inlined into "
170                      << NV("Caller", CB->getCaller())
171                      << " because its definition is unavailable"
172                      << setIsVerbose();
173             });
174           }
175         }
176   }
177   if (Calls->empty())
178     return PreservedAnalyses::all();
179 
180   // When inlining a callee produces new call sites, we want to keep track of
181   // the fact that they were inlined from the callee.  This allows us to avoid
182   // infinite inlining in some obscure cases.  To represent this, we use an
183   // index into the InlineHistory vector.
184   SmallVector<std::pair<Function *, int>, 16> InlineHistory;
185 
186   // Track a set vector of inlined callees so that we can augment the caller
187   // with all of their edges in the call graph before pruning out the ones that
188   // got simplified away.
189   SmallSetVector<Function *, 4> InlinedCallees;
190 
191   // Track the dead functions to delete once finished with inlining calls. We
192   // defer deleting these to make it easier to handle the call graph updates.
193   SmallVector<Function *, 4> DeadFunctions;
194 
195   // Loop forward over all of the calls.
196   while (!Calls->empty()) {
197     // We expect the calls to typically be batched with sequences of calls that
198     // have the same caller, so we first set up some shared infrastructure for
199     // this caller. We also do any pruning we can at this layer on the caller
200     // alone.
201     Function &F = *Calls->front().first->getCaller();
202 
203     LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"
204                       << "    Function size: " << F.getInstructionCount()
205                       << "\n");
206 
207     auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
208       return FAM.getResult<AssumptionAnalysis>(F);
209     };
210 
211     // Now process as many calls as we have within this caller in the sequence.
212     // We bail out as soon as the caller has to change so we can
213     // prepare the context of that new caller.
214     bool DidInline = false;
215     while (!Calls->empty() && Calls->front().first->getCaller() == &F) {
216       auto P = Calls->pop();
217       CallBase *CB = P.first;
218       const int InlineHistoryID = P.second;
219       Function &Callee = *CB->getCalledFunction();
220 
221       if (InlineHistoryID != -1 &&
222           inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
223         setInlineRemark(*CB, "recursive");
224         continue;
225       }
226 
227       auto Advice = Advisor.getAdvice(*CB, /*OnlyMandatory*/ false);
228       // Check whether we want to inline this callsite.
229       if (!Advice->isInliningRecommended()) {
230         Advice->recordUnattemptedInlining();
231         continue;
232       }
233 
234       // Setup the data structure used to plumb customization into the
235       // `InlineFunction` routine.
236       InlineFunctionInfo IFI(
237           /*cg=*/nullptr, GetAssumptionCache, PSI,
238           &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
239           &FAM.getResult<BlockFrequencyAnalysis>(Callee));
240 
241       InlineResult IR =
242           InlineFunction(*CB, IFI, &FAM.getResult<AAManager>(*CB->getCaller()));
243       if (!IR.isSuccess()) {
244         Advice->recordUnsuccessfulInlining(IR);
245         continue;
246       }
247 
248       DidInline = true;
249       InlinedCallees.insert(&Callee);
250       ++NumInlined;
251 
252       LLVM_DEBUG(dbgs() << "    Size after inlining: "
253                         << F.getInstructionCount() << "\n");
254 
255       // Add any new callsites to defined functions to the worklist.
256       if (!IFI.InlinedCallSites.empty()) {
257         int NewHistoryID = InlineHistory.size();
258         InlineHistory.push_back({&Callee, InlineHistoryID});
259 
260         for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
261           Function *NewCallee = ICB->getCalledFunction();
262           if (!NewCallee) {
263             // Try to promote an indirect (virtual) call without waiting for
264             // the post-inline cleanup and the next DevirtSCCRepeatedPass
265             // iteration because the next iteration may not happen and we may
266             // miss inlining it.
267             if (tryPromoteCall(*ICB))
268               NewCallee = ICB->getCalledFunction();
269           }
270           if (NewCallee)
271             if (!NewCallee->isDeclaration())
272               Calls->push({ICB, NewHistoryID});
273         }
274       }
275 
276       // Merge the attributes based on the inlining.
277       AttributeFuncs::mergeAttributesForInlining(F, Callee);
278 
279       // For local functions, check whether this makes the callee trivially
280       // dead. In that case, we can drop the body of the function eagerly
281       // which may reduce the number of callers of other functions to one,
282       // changing inline cost thresholds.
283       bool CalleeWasDeleted = false;
284       if (Callee.hasLocalLinkage()) {
285         // To check this we also need to nuke any dead constant uses (perhaps
286         // made dead by this operation on other functions).
287         Callee.removeDeadConstantUsers();
288         // if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
289         if (Callee.use_empty() && !isKnownLibFunction(Callee, GetTLI(Callee))) {
290           Calls->erase_if([&](const std::pair<CallBase *, int> &Call) {
291             return Call.first->getCaller() == &Callee;
292           });
293           // Clear the body and queue the function itself for deletion when we
294           // finish inlining.
295           // Note that after this point, it is an error to do anything other
296           // than use the callee's address or delete it.
297           Callee.dropAllReferences();
298           assert(!is_contained(DeadFunctions, &Callee) &&
299                  "Cannot put cause a function to become dead twice!");
300           DeadFunctions.push_back(&Callee);
301           CalleeWasDeleted = true;
302         }
303       }
304       if (CalleeWasDeleted)
305         Advice->recordInliningWithCalleeDeleted();
306       else
307         Advice->recordInlining();
308     }
309 
310     if (!DidInline)
311       continue;
312     Changed = true;
313 
314     InlinedCallees.clear();
315   }
316 
317   // Now that we've finished inlining all of the calls across this module,
318   // delete all of the trivially dead functions.
319   //
320   // Note that this walks a pointer set which has non-deterministic order but
321   // that is OK as all we do is delete things and add pointers to unordered
322   // sets.
323   for (Function *DeadF : DeadFunctions) {
324     // Clear out any cached analyses.
325     FAM.clear(*DeadF, DeadF->getName());
326 
327     // And delete the actual function from the module.
328     M.getFunctionList().erase(DeadF);
329 
330     ++NumDeleted;
331   }
332 
333   if (!Changed)
334     return PreservedAnalyses::all();
335 
336   return PreservedAnalyses::none();
337 }
338