1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interface between the inliner and a learned model.
10 // It delegates model evaluation to either the AOT compiled model (the
11 // 'release' mode) or a runtime-loaded model (the 'development' case).
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/MLInlineAdvisor.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/Analysis/AssumptionCache.h"
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
19 #include "llvm/Analysis/InlineCost.h"
20 #include "llvm/Analysis/InlineModelFeatureMaps.h"
21 #include "llvm/Analysis/LazyCallGraph.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/MLModelRunner.h"
24 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25 #include "llvm/Analysis/TargetTransformInfo.h"
26 #include "llvm/IR/InstIterator.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/Support/CommandLine.h"
29 
30 using namespace llvm;
31 
32 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
33 #include "llvm/Analysis/ReleaseModeModelRunner.h"
34 // codegen-ed file
35 #include "InlinerSizeModel.h" // NOLINT
36 
37 std::unique_ptr<InlineAdvisor>
38 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
39   auto AOTRunner =
40       std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>(
41           M.getContext(), FeatureMap, DecisionName);
42   return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner));
43 }
44 #endif
45 
46 #define DEBUG_TYPE "inline-ml"
47 
48 static cl::opt<float> SizeIncreaseThreshold(
49     "ml-advisor-size-increase-threshold", cl::Hidden,
50     cl::desc("Maximum factor by which expected native size may increase before "
51              "blocking any further inlining."),
52     cl::init(2.0));
53 
54 // clang-format off
55 const std::array<TensorSpec, NumberOfFeatures> llvm::FeatureMap{
56 #define POPULATE_NAMES(_, NAME) TensorSpec::createSpec<int64_t>(NAME, {1} ),
57 // InlineCost features - these must come first
58   INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
59 #undef POPULATE_NAMES
60 
61 // Non-cost features
62 #define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec<int64_t>(NAME, {1} ),
63   INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
64 #undef POPULATE_NAMES
65 };
66 // clang-format on
67 
68 const char *const llvm::DecisionName = "inlining_decision";
69 const char *const llvm::DefaultDecisionName = "inlining_default";
70 const char *const llvm::RewardName = "delta_size";
71 
72 CallBase *getInlinableCS(Instruction &I) {
73   if (auto *CS = dyn_cast<CallBase>(&I))
74     if (Function *Callee = CS->getCalledFunction()) {
75       if (!Callee->isDeclaration()) {
76         return CS;
77       }
78     }
79   return nullptr;
80 }
81 
82 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM,
83                                  std::unique_ptr<MLModelRunner> Runner)
84     : InlineAdvisor(
85           M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
86       ModelRunner(std::move(Runner)),
87       CG(MAM.getResult<LazyCallGraphAnalysis>(M)),
88       InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) {
89   assert(ModelRunner);
90 
91   // Extract the 'call site height' feature - the position of a call site
92   // relative to the farthest statically reachable SCC node. We don't mutate
93   // this value while inlining happens. Empirically, this feature proved
94   // critical in behavioral cloning - i.e. training a model to mimic the manual
95   // heuristic's decisions - and, thus, equally important for training for
96   // improvement.
97   CallGraph CGraph(M);
98   for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) {
99     const std::vector<CallGraphNode *> &CGNodes = *I;
100     unsigned Level = 0;
101     for (auto *CGNode : CGNodes) {
102       Function *F = CGNode->getFunction();
103       if (!F || F->isDeclaration())
104         continue;
105       for (auto &I : instructions(F)) {
106         if (auto *CS = getInlinableCS(I)) {
107           auto *Called = CS->getCalledFunction();
108           auto Pos = FunctionLevels.find(&CG.get(*Called));
109           // In bottom up traversal, an inlinable callee is either in the
110           // same SCC, or to a function in a visited SCC. So not finding its
111           // level means we haven't visited it yet, meaning it's in this SCC.
112           if (Pos == FunctionLevels.end())
113             continue;
114           Level = std::max(Level, Pos->second + 1);
115         }
116       }
117     }
118     for (auto *CGNode : CGNodes) {
119       Function *F = CGNode->getFunction();
120       if (F && !F->isDeclaration())
121         FunctionLevels[&CG.get(*F)] = Level;
122     }
123   }
124   for (auto KVP : FunctionLevels) {
125     AllNodes.insert(KVP.first);
126     EdgeCount += getLocalCalls(KVP.first->getFunction());
127   }
128   NodeCount = AllNodes.size();
129 }
130 
131 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const {
132   return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0;
133 }
134 
135 void MLInlineAdvisor::onPassEntry() {
136   FPICache.clear();
137   // Function passes executed between InlinerPass runs may have changed the
138   // module-wide features.
139   // The cgscc pass manager rules are such that:
140   // - if a pass leads to merging SCCs, then the pipeline is restarted on the
141   // merged SCC
142   // - if a pass leads to splitting the SCC, then we continue with one of the
143   // splits
144   // This means that the NodesInLastSCC is a superset (not strict) of the nodes
145   // that subsequent passes would have processed
146   // - in addition, if new Nodes were created by a pass (e.g. CoroSplit),
147   // they'd be adjacent to Nodes in the last SCC. So we just need to check the
148   // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't
149   // care about the nature of the Edge (call or ref).
150   NodeCount -= static_cast<int64_t>(NodesInLastSCC.size());
151   while (!NodesInLastSCC.empty()) {
152     const auto *N = NodesInLastSCC.front();
153     NodesInLastSCC.pop_front();
154     // The Function wrapped by N could have been deleted since we last saw it.
155     if (N->isDead()) {
156       assert(!N->getFunction().isDeclaration());
157       continue;
158     }
159     ++NodeCount;
160     EdgeCount += getLocalCalls(N->getFunction());
161     for (const auto &E : *(*N)) {
162       const auto *AdjNode = &E.getNode();
163       assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration());
164       auto I = AllNodes.insert(AdjNode);
165       if (I.second)
166         NodesInLastSCC.push_back(AdjNode);
167     }
168   }
169 
170   EdgeCount -= EdgesOfLastSeenNodes;
171   EdgesOfLastSeenNodes = 0;
172 }
173 
174 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) {
175   // No need to keep this around - function passes will invalidate it.
176   FPICache.clear();
177   if (!LastSCC)
178     return;
179   // Keep track of the nodes and edges we last saw. Then, in onPassEntry,
180   // we update the node count and edge count from the subset of these nodes that
181   // survived.
182   assert(NodesInLastSCC.empty());
183   assert(NodeCount >= LastSCC->size());
184   EdgesOfLastSeenNodes = 0;
185   for (const auto &N : *LastSCC) {
186     assert(!N.isDead());
187     EdgesOfLastSeenNodes += getLocalCalls(N.getFunction());
188     NodesInLastSCC.push_back(&N);
189   }
190   assert(EdgeCount >= EdgesOfLastSeenNodes);
191 }
192 
193 int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
194   return getCachedFPI(F).DirectCallsToDefinedFunctions;
195 }
196 
197 // Update the internal state of the advisor, and force invalidate feature
198 // analysis. Currently, we maintain minimal (and very simple) global state - the
199 // number of functions and the number of static calls. We also keep track of the
200 // total IR size in this module, to stop misbehaving policies at a certain bloat
201 // factor (SizeIncreaseThreshold)
202 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
203                                            bool CalleeWasDeleted) {
204   assert(!ForceStop);
205   Function *Caller = Advice.getCaller();
206   Function *Callee = Advice.getCallee();
207 
208   // The caller features aren't valid anymore.
209   {
210     PreservedAnalyses PA = PreservedAnalyses::all();
211     PA.abandon<FunctionPropertiesAnalysis>();
212     FAM.invalidate(*Caller, PA);
213   }
214   int64_t IRSizeAfter =
215       getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
216   CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
217   if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize)
218     ForceStop = true;
219 
220   // We can delta-update module-wide features. We know the inlining only changed
221   // the caller, and maybe the callee (by deleting the latter).
222   // Nodes are simple to update.
223   // For edges, we 'forget' the edges that the caller and callee used to have
224   // before inlining, and add back what they currently have together.
225   int64_t NewCallerAndCalleeEdges =
226       getCachedFPI(*Caller).DirectCallsToDefinedFunctions;
227 
228   if (CalleeWasDeleted)
229     --NodeCount;
230   else
231     NewCallerAndCalleeEdges +=
232         getCachedFPI(*Callee).DirectCallsToDefinedFunctions;
233   EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
234   assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
235 }
236 
237 int64_t MLInlineAdvisor::getModuleIRSize() const {
238   int64_t Ret = 0;
239   for (auto &F : M)
240     if (!F.isDeclaration())
241       Ret += getIRSize(F);
242   return Ret;
243 }
244 
245 FunctionPropertiesInfo &MLInlineAdvisor::getCachedFPI(Function &F) const {
246   auto InsertPair =
247       FPICache.insert(std::make_pair(&F, FunctionPropertiesInfo()));
248   if (!InsertPair.second)
249     return InsertPair.first->second;
250   InsertPair.first->second = FAM.getResult<FunctionPropertiesAnalysis>(F);
251   return InsertPair.first->second;
252 }
253 
254 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
255   auto &Caller = *CB.getCaller();
256   auto &Callee = *CB.getCalledFunction();
257 
258   auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
259     return FAM.getResult<AssumptionAnalysis>(F);
260   };
261   auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
262   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
263 
264   auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
265   // If this is a "never inline" case, there won't be any changes to internal
266   // state we need to track, so we can just return the base InlineAdvice, which
267   // will do nothing interesting.
268   // Same thing if this is a recursive case.
269   if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never ||
270       &Caller == &Callee)
271     return getMandatoryAdvice(CB, false);
272 
273   bool Mandatory =
274       MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always;
275 
276   // If we need to stop, we won't want to track anymore any state changes, so
277   // we just return the base InlineAdvice, which acts as a noop.
278   if (ForceStop) {
279     ORE.emit([&] {
280       return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB)
281              << "Won't attempt inlining because module size grew too much.";
282     });
283     return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory);
284   }
285 
286   int CostEstimate = 0;
287   if (!Mandatory) {
288     auto IsCallSiteInlinable =
289         llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache);
290     if (!IsCallSiteInlinable) {
291       // We can't inline this for correctness reasons, so return the base
292       // InlineAdvice, as we don't care about tracking any state changes (which
293       // won't happen).
294       return std::make_unique<InlineAdvice>(this, CB, ORE, false);
295     }
296     CostEstimate = *IsCallSiteInlinable;
297   }
298 
299   const auto CostFeatures =
300       llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache);
301   if (!CostFeatures) {
302     return std::make_unique<InlineAdvice>(this, CB, ORE, false);
303   }
304 
305   if (Mandatory)
306     return getMandatoryAdvice(CB, true);
307 
308   auto NrCtantParams = 0;
309   for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
310     NrCtantParams += (isa<Constant>(*I));
311   }
312 
313   auto &CallerBefore = getCachedFPI(Caller);
314   auto &CalleeBefore = getCachedFPI(Callee);
315 
316   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) =
317       CalleeBefore.BasicBlockCount;
318   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) =
319       getInitialFunctionLevel(Caller);
320   *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount;
321   *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams;
322   *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount;
323   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) =
324       CallerBefore.Uses;
325   *ModelRunner->getTensor<int64_t>(
326       FeatureIndex::CallerConditionallyExecutedBlocks) =
327       CallerBefore.BlocksReachedFromConditionalInstruction;
328   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) =
329       CallerBefore.BasicBlockCount;
330   *ModelRunner->getTensor<int64_t>(
331       FeatureIndex::CalleeConditionallyExecutedBlocks) =
332       CalleeBefore.BlocksReachedFromConditionalInstruction;
333   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) =
334       CalleeBefore.Uses;
335   *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate;
336 
337   // Add the cost features
338   for (size_t I = 0;
339        I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) {
340     *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature(
341         static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I);
342   }
343 
344   return getAdviceFromModel(CB, ORE);
345 }
346 
347 std::unique_ptr<MLInlineAdvice>
348 MLInlineAdvisor::getAdviceFromModel(CallBase &CB,
349                                     OptimizationRemarkEmitter &ORE) {
350   return std::make_unique<MLInlineAdvice>(
351       this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>()));
352 }
353 
354 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
355                                                                   bool Advice) {
356   // Make sure we track inlinings in all cases - mandatory or not.
357   if (Advice && !ForceStop)
358     return getMandatoryAdviceImpl(CB);
359 
360   // If this is a "never inline" case, there won't be any changes to internal
361   // state we need to track, so we can just return the base InlineAdvice, which
362   // will do nothing interesting.
363   // Same if we are forced to stop - we don't track anymore.
364   return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
365 }
366 
367 const LoopInfo &MLInlineAdvisor::getLoopInfo(Function &F) const {
368   return FAM.getResult<LoopAnalysis>(F);
369 }
370 
371 std::unique_ptr<MLInlineAdvice>
372 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
373   return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true);
374 }
375 
376 MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
377                                OptimizationRemarkEmitter &ORE,
378                                bool Recommendation)
379     : InlineAdvice(Advisor, CB, ORE, Recommendation),
380       CallerIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Caller)),
381       CalleeIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Callee)),
382       CallerAndCalleeEdges(Advisor->isForcedToStop()
383                                ? 0
384                                : (Advisor->getLocalCalls(*Caller) +
385                                   Advisor->getLocalCalls(*Callee))),
386       PreInlineCallerFPI(Advisor->getCachedFPI(*Caller)) {
387   if (Recommendation)
388     FPU.emplace(Advisor->getCachedFPI(*getCaller()), CB);
389 }
390 
391 void MLInlineAdvice::reportContextForRemark(
392     DiagnosticInfoOptimizationBase &OR) {
393   using namespace ore;
394   OR << NV("Callee", Callee->getName());
395   for (size_t I = 0; I < NumberOfFeatures; ++I)
396     OR << NV(FeatureMap[I].name(),
397              *getAdvisor()->getModelRunner().getTensor<int64_t>(I));
398   OR << NV("ShouldInline", isInliningRecommended());
399 }
400 
401 void MLInlineAdvice::updateCachedCallerFPI() {
402   FPU->finish(getAdvisor()->getLoopInfo(*Caller));
403 }
404 
405 void MLInlineAdvice::recordInliningImpl() {
406   updateCachedCallerFPI();
407   ORE.emit([&]() {
408     OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block);
409     reportContextForRemark(R);
410     return R;
411   });
412   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false);
413 }
414 
415 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() {
416   updateCachedCallerFPI();
417   ORE.emit([&]() {
418     OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc,
419                          Block);
420     reportContextForRemark(R);
421     return R;
422   });
423   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true);
424 }
425 
426 void MLInlineAdvice::recordUnsuccessfulInliningImpl(
427     const InlineResult &Result) {
428   getAdvisor()->getCachedFPI(*Caller) = PreInlineCallerFPI;
429   ORE.emit([&]() {
430     OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful",
431                                DLoc, Block);
432     reportContextForRemark(R);
433     return R;
434   });
435 }
436 void MLInlineAdvice::recordUnattemptedInliningImpl() {
437   assert(!FPU);
438   ORE.emit([&]() {
439     OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block);
440     reportContextForRemark(R);
441     return R;
442   });
443 }
444