1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interface between the inliner and a learned model.
10 // It delegates model evaluation to either the AOT compiled model (the
11 // 'release' mode) or a runtime-loaded model (the 'development' case).
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Config/config.h"
15 #if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
16 
17 #include <limits>
18 #include <unordered_map>
19 #include <unordered_set>
20 
21 #include "llvm/ADT/SCCIterator.h"
22 #include "llvm/Analysis/CallGraph.h"
23 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
24 #include "llvm/Analysis/InlineCost.h"
25 #include "llvm/Analysis/LazyCallGraph.h"
26 #include "llvm/Analysis/MLInlineAdvisor.h"
27 #include "llvm/Analysis/MLModelRunner.h"
28 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
29 #include "llvm/Analysis/TargetLibraryInfo.h"
30 #include "llvm/Analysis/TargetTransformInfo.h"
31 #include "llvm/IR/InstIterator.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/PassManager.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Path.h"
36 
37 using namespace llvm;
38 
39 #ifdef LLVM_HAVE_TF_AOT
40 #include "llvm/Analysis/ReleaseModeModelRunner.h"
41 // codegen-ed file
42 #include "InlinerSizeModel.h" // NOLINT
43 #include "llvm/Analysis/InlineModelFeatureMaps.h"
44 
45 std::unique_ptr<InlineAdvisor>
46 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
47   auto AOTRunner =
48       std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>(
49           M.getContext(), FeatureNameMap, DecisionName);
50   return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner));
51 }
52 #endif
53 
54 #define DEBUG_TYPE "inline-ml"
55 
56 static cl::opt<float> SizeIncreaseThreshold(
57     "ml-advisor-size-increase-threshold", cl::Hidden,
58     cl::desc("Maximum factor by which expected native size may increase before "
59              "blocking any further inlining."),
60     cl::init(2.0));
61 
62 // clang-format off
63 const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{
64 // InlineCost features - these must come first
65 #define POPULATE_NAMES(INDEX_NAME, NAME) NAME,
66   INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
67 #undef POPULATE_NAMES
68 
69 // Non-cost features
70 #define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME,
71   INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
72 #undef POPULATE_NAMES
73 };
74 // clang-format on
75 
76 const char *const llvm::DecisionName = "inlining_decision";
77 const char *const llvm::DefaultDecisionName = "inlining_default";
78 const char *const llvm::RewardName = "delta_size";
79 
80 CallBase *getInlinableCS(Instruction &I) {
81   if (auto *CS = dyn_cast<CallBase>(&I))
82     if (Function *Callee = CS->getCalledFunction()) {
83       if (!Callee->isDeclaration()) {
84         return CS;
85       }
86     }
87   return nullptr;
88 }
89 
90 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM,
91                                  std::unique_ptr<MLModelRunner> Runner)
92     : InlineAdvisor(
93           M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
94       ModelRunner(std::move(Runner)),
95       CG(MAM.getResult<LazyCallGraphAnalysis>(M)),
96       InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) {
97   assert(ModelRunner);
98 
99   // Extract the 'call site height' feature - the position of a call site
100   // relative to the farthest statically reachable SCC node. We don't mutate
101   // this value while inlining happens. Empirically, this feature proved
102   // critical in behavioral cloning - i.e. training a model to mimic the manual
103   // heuristic's decisions - and, thus, equally important for training for
104   // improvement.
105   CallGraph CGraph(M);
106   for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) {
107     const std::vector<CallGraphNode *> &CGNodes = *I;
108     unsigned Level = 0;
109     for (auto *CGNode : CGNodes) {
110       Function *F = CGNode->getFunction();
111       if (!F || F->isDeclaration())
112         continue;
113       for (auto &I : instructions(F)) {
114         if (auto *CS = getInlinableCS(I)) {
115           auto *Called = CS->getCalledFunction();
116           auto Pos = FunctionLevels.find(&CG.get(*Called));
117           // In bottom up traversal, an inlinable callee is either in the
118           // same SCC, or to a function in a visited SCC. So not finding its
119           // level means we haven't visited it yet, meaning it's in this SCC.
120           if (Pos == FunctionLevels.end())
121             continue;
122           Level = std::max(Level, Pos->second + 1);
123         }
124       }
125     }
126     for (auto *CGNode : CGNodes) {
127       Function *F = CGNode->getFunction();
128       if (F && !F->isDeclaration())
129         FunctionLevels[&CG.get(*F)] = Level;
130     }
131   }
132   for (auto KVP : FunctionLevels) {
133     AllNodes.insert(KVP.first);
134     EdgeCount += getLocalCalls(KVP.first->getFunction());
135   }
136   NodeCount = AllNodes.size();
137 }
138 
139 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const {
140   return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0;
141 }
142 
143 void MLInlineAdvisor::onPassEntry() {
144   // Function passes executed between InlinerPass runs may have changed the
145   // module-wide features.
146   // The cgscc pass manager rules are such that:
147   // - if a pass leads to merging SCCs, then the pipeline is restarted on the
148   // merged SCC
149   // - if a pass leads to splitting the SCC, then we continue with one of the
150   // splits
151   // This means that the NodesInLastSCC is a superset (not strict) of the nodes
152   // that subsequent passes would have processed
153   // - in addition, if new Nodes were created by a pass (e.g. CoroSplit),
154   // they'd be adjacent to Nodes in the last SCC. So we just need to check the
155   // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't
156   // care about the nature of the Edge (call or ref).
157   NodeCount -= static_cast<int64_t>(NodesInLastSCC.size());
158   while (!NodesInLastSCC.empty()) {
159     const auto *N = NodesInLastSCC.front();
160     NodesInLastSCC.pop_front();
161     // The Function wrapped by N could have been deleted since we last saw it.
162     if (N->isDead()) {
163       assert(!N->getFunction().isDeclaration());
164       continue;
165     }
166     ++NodeCount;
167     EdgeCount += getLocalCalls(N->getFunction());
168     for (const auto &E : *(*N)) {
169       const auto *AdjNode = &E.getNode();
170       assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration());
171       auto I = AllNodes.insert(AdjNode);
172       if (I.second)
173         NodesInLastSCC.push_back(AdjNode);
174     }
175   }
176 
177   EdgeCount -= EdgesOfLastSeenNodes;
178   EdgesOfLastSeenNodes = 0;
179 }
180 
181 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) {
182   if (!LastSCC)
183     return;
184   // Keep track of the nodes and edges we last saw. Then, in onPassEntry,
185   // we update the node count and edge count from the subset of these nodes that
186   // survived.
187   assert(NodesInLastSCC.empty());
188   assert(NodeCount >= LastSCC->size());
189   EdgesOfLastSeenNodes = 0;
190   for (const auto &N : *LastSCC) {
191     assert(!N.isDead());
192     EdgesOfLastSeenNodes += getLocalCalls(N.getFunction());
193     NodesInLastSCC.push_back(&N);
194   }
195   assert(EdgeCount >= EdgesOfLastSeenNodes);
196 }
197 
198 int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
199   return FAM.getResult<FunctionPropertiesAnalysis>(F)
200       .DirectCallsToDefinedFunctions;
201 }
202 
203 // Update the internal state of the advisor, and force invalidate feature
204 // analysis. Currently, we maintain minimal (and very simple) global state - the
205 // number of functions and the number of static calls. We also keep track of the
206 // total IR size in this module, to stop misbehaving policies at a certain bloat
207 // factor (SizeIncreaseThreshold)
208 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
209                                            bool CalleeWasDeleted) {
210   assert(!ForceStop);
211   Function *Caller = Advice.getCaller();
212   Function *Callee = Advice.getCallee();
213 
214   // The caller features aren't valid anymore.
215   {
216     PreservedAnalyses PA = PreservedAnalyses::all();
217     PA.abandon<FunctionPropertiesAnalysis>();
218     FAM.invalidate(*Caller, PA);
219   }
220   int64_t IRSizeAfter =
221       getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
222   CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
223   if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize)
224     ForceStop = true;
225 
226   // We can delta-update module-wide features. We know the inlining only changed
227   // the caller, and maybe the callee (by deleting the latter).
228   // Nodes are simple to update.
229   // For edges, we 'forget' the edges that the caller and callee used to have
230   // before inlining, and add back what they currently have together.
231   int64_t NewCallerAndCalleeEdges =
232       FAM.getResult<FunctionPropertiesAnalysis>(*Caller)
233           .DirectCallsToDefinedFunctions;
234 
235   if (CalleeWasDeleted)
236     --NodeCount;
237   else
238     NewCallerAndCalleeEdges +=
239         FAM.getResult<FunctionPropertiesAnalysis>(*Callee)
240             .DirectCallsToDefinedFunctions;
241   EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
242   assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
243 }
244 
245 int64_t MLInlineAdvisor::getModuleIRSize() const {
246   int64_t Ret = 0;
247   for (auto &F : M)
248     if (!F.isDeclaration())
249       Ret += getIRSize(F);
250   return Ret;
251 }
252 
253 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
254   auto &Caller = *CB.getCaller();
255   auto &Callee = *CB.getCalledFunction();
256 
257   auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
258     return FAM.getResult<AssumptionAnalysis>(F);
259   };
260   auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
261   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
262 
263   auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
264   // If this is a "never inline" case, there won't be any changes to internal
265   // state we need to track, so we can just return the base InlineAdvice, which
266   // will do nothing interesting.
267   // Same thing if this is a recursive case.
268   if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never ||
269       &Caller == &Callee)
270     return getMandatoryAdvice(CB, false);
271 
272   bool Mandatory =
273       MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always;
274 
275   // If we need to stop, we won't want to track anymore any state changes, so
276   // we just return the base InlineAdvice, which acts as a noop.
277   if (ForceStop) {
278     ORE.emit([&] {
279       return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB)
280              << "Won't attempt inlining because module size grew too much.";
281     });
282     return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory);
283   }
284 
285   int CostEstimate = 0;
286   if (!Mandatory) {
287     auto IsCallSiteInlinable =
288         llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache);
289     if (!IsCallSiteInlinable) {
290       // We can't inline this for correctness reasons, so return the base
291       // InlineAdvice, as we don't care about tracking any state changes (which
292       // won't happen).
293       return std::make_unique<InlineAdvice>(this, CB, ORE, false);
294     }
295     CostEstimate = *IsCallSiteInlinable;
296   }
297 
298   const auto CostFeatures =
299       llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache);
300   if (!CostFeatures) {
301     return std::make_unique<InlineAdvice>(this, CB, ORE, false);
302   }
303 
304   if (Mandatory)
305     return getMandatoryAdvice(CB, true);
306 
307   auto NrCtantParams = 0;
308   for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
309     NrCtantParams += (isa<Constant>(*I));
310   }
311 
312   auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller);
313   auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee);
314 
315   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) =
316       CalleeBefore.BasicBlockCount;
317   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) =
318       getInitialFunctionLevel(Caller);
319   *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount;
320   *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams;
321   *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount;
322   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) =
323       CallerBefore.Uses;
324   *ModelRunner->getTensor<int64_t>(
325       FeatureIndex::CallerConditionallyExecutedBlocks) =
326       CallerBefore.BlocksReachedFromConditionalInstruction;
327   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) =
328       CallerBefore.BasicBlockCount;
329   *ModelRunner->getTensor<int64_t>(
330       FeatureIndex::CalleeConditionallyExecutedBlocks) =
331       CalleeBefore.BlocksReachedFromConditionalInstruction;
332   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) =
333       CalleeBefore.Uses;
334   *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate;
335 
336   // Add the cost features
337   for (size_t I = 0;
338        I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) {
339     *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature(
340         static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I);
341   }
342 
343   return getAdviceFromModel(CB, ORE);
344 }
345 
346 std::unique_ptr<MLInlineAdvice>
347 MLInlineAdvisor::getAdviceFromModel(CallBase &CB,
348                                     OptimizationRemarkEmitter &ORE) {
349   return std::make_unique<MLInlineAdvice>(
350       this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>()));
351 }
352 
353 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
354                                                                   bool Advice) {
355   // Make sure we track inlinings in all cases - mandatory or not.
356   if (Advice && !ForceStop)
357     return getMandatoryAdviceImpl(CB);
358 
359   // If this is a "never inline" case, there won't be any changes to internal
360   // state we need to track, so we can just return the base InlineAdvice, which
361   // will do nothing interesting.
362   // Same if we are forced to stop - we don't track anymore.
363   return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
364 }
365 
366 std::unique_ptr<MLInlineAdvice>
367 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
368   return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true);
369 }
370 
371 void MLInlineAdvice::reportContextForRemark(
372     DiagnosticInfoOptimizationBase &OR) {
373   using namespace ore;
374   OR << NV("Callee", Callee->getName());
375   for (size_t I = 0; I < NumberOfFeatures; ++I)
376     OR << NV(FeatureNameMap[I],
377              *getAdvisor()->getModelRunner().getTensor<int64_t>(I));
378   OR << NV("ShouldInline", isInliningRecommended());
379 }
380 
381 void MLInlineAdvice::recordInliningImpl() {
382   ORE.emit([&]() {
383     OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block);
384     reportContextForRemark(R);
385     return R;
386   });
387   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false);
388 }
389 
390 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() {
391   ORE.emit([&]() {
392     OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc,
393                          Block);
394     reportContextForRemark(R);
395     return R;
396   });
397   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true);
398 }
399 
400 void MLInlineAdvice::recordUnsuccessfulInliningImpl(
401     const InlineResult &Result) {
402   ORE.emit([&]() {
403     OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful",
404                                DLoc, Block);
405     reportContextForRemark(R);
406     return R;
407   });
408 }
409 void MLInlineAdvice::recordUnattemptedInliningImpl() {
410   ORE.emit([&]() {
411     OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block);
412     reportContextForRemark(R);
413     return R;
414   });
415 }
416 #endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
417