1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interface between the inliner and a learned model.
10 // It delegates model evaluation to either the AOT compiled model (the
11 // 'release' mode) or a runtime-loaded model (the 'development' case).
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/MLInlineAdvisor.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/Analysis/AssumptionCache.h"
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
19 #include "llvm/Analysis/InlineCost.h"
20 #include "llvm/Analysis/InlineModelFeatureMaps.h"
21 #include "llvm/Analysis/LazyCallGraph.h"
22 #include "llvm/Analysis/MLModelRunner.h"
23 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/IR/InstIterator.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Support/CommandLine.h"
28 
29 using namespace llvm;
30 
31 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
32 #include "llvm/Analysis/ReleaseModeModelRunner.h"
33 // codegen-ed file
34 #include "InlinerSizeModel.h" // NOLINT
35 
36 std::unique_ptr<InlineAdvisor>
37 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
38   auto AOTRunner =
39       std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>(
40           M.getContext(), FeatureNameMap, DecisionName);
41   return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner));
42 }
43 #endif
44 
45 #define DEBUG_TYPE "inline-ml"
46 
47 static cl::opt<float> SizeIncreaseThreshold(
48     "ml-advisor-size-increase-threshold", cl::Hidden,
49     cl::desc("Maximum factor by which expected native size may increase before "
50              "blocking any further inlining."),
51     cl::init(2.0));
52 
53 // clang-format off
54 const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{
55 // InlineCost features - these must come first
56 #define POPULATE_NAMES(INDEX_NAME, NAME) NAME,
57   INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
58 #undef POPULATE_NAMES
59 
60 // Non-cost features
61 #define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME,
62   INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
63 #undef POPULATE_NAMES
64 };
65 // clang-format on
66 
67 const char *const llvm::DecisionName = "inlining_decision";
68 const char *const llvm::DefaultDecisionName = "inlining_default";
69 const char *const llvm::RewardName = "delta_size";
70 
71 CallBase *getInlinableCS(Instruction &I) {
72   if (auto *CS = dyn_cast<CallBase>(&I))
73     if (Function *Callee = CS->getCalledFunction()) {
74       if (!Callee->isDeclaration()) {
75         return CS;
76       }
77     }
78   return nullptr;
79 }
80 
81 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM,
82                                  std::unique_ptr<MLModelRunner> Runner)
83     : InlineAdvisor(
84           M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
85       ModelRunner(std::move(Runner)),
86       CG(MAM.getResult<LazyCallGraphAnalysis>(M)),
87       InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) {
88   assert(ModelRunner);
89 
90   // Extract the 'call site height' feature - the position of a call site
91   // relative to the farthest statically reachable SCC node. We don't mutate
92   // this value while inlining happens. Empirically, this feature proved
93   // critical in behavioral cloning - i.e. training a model to mimic the manual
94   // heuristic's decisions - and, thus, equally important for training for
95   // improvement.
96   CallGraph CGraph(M);
97   for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) {
98     const std::vector<CallGraphNode *> &CGNodes = *I;
99     unsigned Level = 0;
100     for (auto *CGNode : CGNodes) {
101       Function *F = CGNode->getFunction();
102       if (!F || F->isDeclaration())
103         continue;
104       for (auto &I : instructions(F)) {
105         if (auto *CS = getInlinableCS(I)) {
106           auto *Called = CS->getCalledFunction();
107           auto Pos = FunctionLevels.find(&CG.get(*Called));
108           // In bottom up traversal, an inlinable callee is either in the
109           // same SCC, or to a function in a visited SCC. So not finding its
110           // level means we haven't visited it yet, meaning it's in this SCC.
111           if (Pos == FunctionLevels.end())
112             continue;
113           Level = std::max(Level, Pos->second + 1);
114         }
115       }
116     }
117     for (auto *CGNode : CGNodes) {
118       Function *F = CGNode->getFunction();
119       if (F && !F->isDeclaration())
120         FunctionLevels[&CG.get(*F)] = Level;
121     }
122   }
123   for (auto KVP : FunctionLevels) {
124     AllNodes.insert(KVP.first);
125     EdgeCount += getLocalCalls(KVP.first->getFunction());
126   }
127   NodeCount = AllNodes.size();
128 }
129 
130 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const {
131   return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0;
132 }
133 
134 void MLInlineAdvisor::onPassEntry() {
135   // Function passes executed between InlinerPass runs may have changed the
136   // module-wide features.
137   // The cgscc pass manager rules are such that:
138   // - if a pass leads to merging SCCs, then the pipeline is restarted on the
139   // merged SCC
140   // - if a pass leads to splitting the SCC, then we continue with one of the
141   // splits
142   // This means that the NodesInLastSCC is a superset (not strict) of the nodes
143   // that subsequent passes would have processed
144   // - in addition, if new Nodes were created by a pass (e.g. CoroSplit),
145   // they'd be adjacent to Nodes in the last SCC. So we just need to check the
146   // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't
147   // care about the nature of the Edge (call or ref).
148   NodeCount -= static_cast<int64_t>(NodesInLastSCC.size());
149   while (!NodesInLastSCC.empty()) {
150     const auto *N = NodesInLastSCC.front();
151     NodesInLastSCC.pop_front();
152     // The Function wrapped by N could have been deleted since we last saw it.
153     if (N->isDead()) {
154       assert(!N->getFunction().isDeclaration());
155       continue;
156     }
157     ++NodeCount;
158     EdgeCount += getLocalCalls(N->getFunction());
159     for (const auto &E : *(*N)) {
160       const auto *AdjNode = &E.getNode();
161       assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration());
162       auto I = AllNodes.insert(AdjNode);
163       if (I.second)
164         NodesInLastSCC.push_back(AdjNode);
165     }
166   }
167 
168   EdgeCount -= EdgesOfLastSeenNodes;
169   EdgesOfLastSeenNodes = 0;
170 }
171 
172 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) {
173   if (!LastSCC)
174     return;
175   // Keep track of the nodes and edges we last saw. Then, in onPassEntry,
176   // we update the node count and edge count from the subset of these nodes that
177   // survived.
178   assert(NodesInLastSCC.empty());
179   assert(NodeCount >= LastSCC->size());
180   EdgesOfLastSeenNodes = 0;
181   for (const auto &N : *LastSCC) {
182     assert(!N.isDead());
183     EdgesOfLastSeenNodes += getLocalCalls(N.getFunction());
184     NodesInLastSCC.push_back(&N);
185   }
186   assert(EdgeCount >= EdgesOfLastSeenNodes);
187 }
188 
189 int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
190   return FAM.getResult<FunctionPropertiesAnalysis>(F)
191       .DirectCallsToDefinedFunctions;
192 }
193 
194 // Update the internal state of the advisor, and force invalidate feature
195 // analysis. Currently, we maintain minimal (and very simple) global state - the
196 // number of functions and the number of static calls. We also keep track of the
197 // total IR size in this module, to stop misbehaving policies at a certain bloat
198 // factor (SizeIncreaseThreshold)
199 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
200                                            bool CalleeWasDeleted) {
201   assert(!ForceStop);
202   Function *Caller = Advice.getCaller();
203   Function *Callee = Advice.getCallee();
204 
205   // The caller features aren't valid anymore.
206   {
207     PreservedAnalyses PA = PreservedAnalyses::all();
208     PA.abandon<FunctionPropertiesAnalysis>();
209     FAM.invalidate(*Caller, PA);
210   }
211   int64_t IRSizeAfter =
212       getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
213   CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
214   if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize)
215     ForceStop = true;
216 
217   // We can delta-update module-wide features. We know the inlining only changed
218   // the caller, and maybe the callee (by deleting the latter).
219   // Nodes are simple to update.
220   // For edges, we 'forget' the edges that the caller and callee used to have
221   // before inlining, and add back what they currently have together.
222   int64_t NewCallerAndCalleeEdges =
223       FAM.getResult<FunctionPropertiesAnalysis>(*Caller)
224           .DirectCallsToDefinedFunctions;
225 
226   if (CalleeWasDeleted)
227     --NodeCount;
228   else
229     NewCallerAndCalleeEdges +=
230         FAM.getResult<FunctionPropertiesAnalysis>(*Callee)
231             .DirectCallsToDefinedFunctions;
232   EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
233   assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
234 }
235 
236 int64_t MLInlineAdvisor::getModuleIRSize() const {
237   int64_t Ret = 0;
238   for (auto &F : M)
239     if (!F.isDeclaration())
240       Ret += getIRSize(F);
241   return Ret;
242 }
243 
244 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
245   auto &Caller = *CB.getCaller();
246   auto &Callee = *CB.getCalledFunction();
247 
248   auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
249     return FAM.getResult<AssumptionAnalysis>(F);
250   };
251   auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
252   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
253 
254   auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
255   // If this is a "never inline" case, there won't be any changes to internal
256   // state we need to track, so we can just return the base InlineAdvice, which
257   // will do nothing interesting.
258   // Same thing if this is a recursive case.
259   if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never ||
260       &Caller == &Callee)
261     return getMandatoryAdvice(CB, false);
262 
263   bool Mandatory =
264       MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always;
265 
266   // If we need to stop, we won't want to track anymore any state changes, so
267   // we just return the base InlineAdvice, which acts as a noop.
268   if (ForceStop) {
269     ORE.emit([&] {
270       return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB)
271              << "Won't attempt inlining because module size grew too much.";
272     });
273     return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory);
274   }
275 
276   int CostEstimate = 0;
277   if (!Mandatory) {
278     auto IsCallSiteInlinable =
279         llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache);
280     if (!IsCallSiteInlinable) {
281       // We can't inline this for correctness reasons, so return the base
282       // InlineAdvice, as we don't care about tracking any state changes (which
283       // won't happen).
284       return std::make_unique<InlineAdvice>(this, CB, ORE, false);
285     }
286     CostEstimate = *IsCallSiteInlinable;
287   }
288 
289   const auto CostFeatures =
290       llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache);
291   if (!CostFeatures) {
292     return std::make_unique<InlineAdvice>(this, CB, ORE, false);
293   }
294 
295   if (Mandatory)
296     return getMandatoryAdvice(CB, true);
297 
298   auto NrCtantParams = 0;
299   for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
300     NrCtantParams += (isa<Constant>(*I));
301   }
302 
303   auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller);
304   auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee);
305 
306   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) =
307       CalleeBefore.BasicBlockCount;
308   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) =
309       getInitialFunctionLevel(Caller);
310   *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount;
311   *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams;
312   *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount;
313   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) =
314       CallerBefore.Uses;
315   *ModelRunner->getTensor<int64_t>(
316       FeatureIndex::CallerConditionallyExecutedBlocks) =
317       CallerBefore.BlocksReachedFromConditionalInstruction;
318   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) =
319       CallerBefore.BasicBlockCount;
320   *ModelRunner->getTensor<int64_t>(
321       FeatureIndex::CalleeConditionallyExecutedBlocks) =
322       CalleeBefore.BlocksReachedFromConditionalInstruction;
323   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) =
324       CalleeBefore.Uses;
325   *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate;
326 
327   // Add the cost features
328   for (size_t I = 0;
329        I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) {
330     *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature(
331         static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I);
332   }
333 
334   return getAdviceFromModel(CB, ORE);
335 }
336 
337 std::unique_ptr<MLInlineAdvice>
338 MLInlineAdvisor::getAdviceFromModel(CallBase &CB,
339                                     OptimizationRemarkEmitter &ORE) {
340   return std::make_unique<MLInlineAdvice>(
341       this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>()));
342 }
343 
344 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
345                                                                   bool Advice) {
346   // Make sure we track inlinings in all cases - mandatory or not.
347   if (Advice && !ForceStop)
348     return getMandatoryAdviceImpl(CB);
349 
350   // If this is a "never inline" case, there won't be any changes to internal
351   // state we need to track, so we can just return the base InlineAdvice, which
352   // will do nothing interesting.
353   // Same if we are forced to stop - we don't track anymore.
354   return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
355 }
356 
357 std::unique_ptr<MLInlineAdvice>
358 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
359   return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true);
360 }
361 
362 void MLInlineAdvice::reportContextForRemark(
363     DiagnosticInfoOptimizationBase &OR) {
364   using namespace ore;
365   OR << NV("Callee", Callee->getName());
366   for (size_t I = 0; I < NumberOfFeatures; ++I)
367     OR << NV(FeatureNameMap[I],
368              *getAdvisor()->getModelRunner().getTensor<int64_t>(I));
369   OR << NV("ShouldInline", isInliningRecommended());
370 }
371 
372 void MLInlineAdvice::recordInliningImpl() {
373   ORE.emit([&]() {
374     OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block);
375     reportContextForRemark(R);
376     return R;
377   });
378   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false);
379 }
380 
381 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() {
382   ORE.emit([&]() {
383     OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc,
384                          Block);
385     reportContextForRemark(R);
386     return R;
387   });
388   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true);
389 }
390 
391 void MLInlineAdvice::recordUnsuccessfulInliningImpl(
392     const InlineResult &Result) {
393   ORE.emit([&]() {
394     OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful",
395                                DLoc, Block);
396     reportContextForRemark(R);
397     return R;
398   });
399 }
400 void MLInlineAdvice::recordUnattemptedInliningImpl() {
401   ORE.emit([&]() {
402     OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block);
403     reportContextForRemark(R);
404     return R;
405   });
406 }
407