1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interface between the inliner and a learned model.
10 // It delegates model evaluation to either the AOT compiled model (the
11 // 'release' mode) or a runtime-loaded model (the 'development' case).
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/MLInlineAdvisor.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/Analysis/AssumptionCache.h"
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
19 #include "llvm/Analysis/InlineCost.h"
20 #include "llvm/Analysis/InlineModelFeatureMaps.h"
21 #include "llvm/Analysis/LazyCallGraph.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/MLModelRunner.h"
24 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25 #include "llvm/Analysis/TargetTransformInfo.h"
26 #include "llvm/IR/InstIterator.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/Support/CommandLine.h"
29 
30 using namespace llvm;
31 
32 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
33 #include "llvm/Analysis/ReleaseModeModelRunner.h"
34 // codegen-ed file
35 #include "InlinerSizeModel.h" // NOLINT
36 
37 std::unique_ptr<InlineAdvisor>
38 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
39   auto AOTRunner =
40       std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>(
41           M.getContext(), FeatureMap, DecisionName);
42   return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner));
43 }
44 #endif
45 
46 #define DEBUG_TYPE "inline-ml"
47 
48 static cl::opt<float> SizeIncreaseThreshold(
49     "ml-advisor-size-increase-threshold", cl::Hidden,
50     cl::desc("Maximum factor by which expected native size may increase before "
51              "blocking any further inlining."),
52     cl::init(2.0));
53 
54 // clang-format off
55 const std::array<TensorSpec, NumberOfFeatures> llvm::FeatureMap{
56 #define POPULATE_NAMES(_, NAME) TensorSpec::createSpec<int64_t>(NAME, {1} ),
57 // InlineCost features - these must come first
58   INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
59 #undef POPULATE_NAMES
60 
61 // Non-cost features
62 #define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec<int64_t>(NAME, {1} ),
63   INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
64 #undef POPULATE_NAMES
65 };
66 // clang-format on
67 
68 const char *const llvm::DecisionName = "inlining_decision";
69 const char *const llvm::DefaultDecisionName = "inlining_default";
70 const char *const llvm::RewardName = "delta_size";
71 
72 CallBase *getInlinableCS(Instruction &I) {
73   if (auto *CS = dyn_cast<CallBase>(&I))
74     if (Function *Callee = CS->getCalledFunction()) {
75       if (!Callee->isDeclaration()) {
76         return CS;
77       }
78     }
79   return nullptr;
80 }
81 
82 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM,
83                                  std::unique_ptr<MLModelRunner> Runner)
84     : InlineAdvisor(
85           M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
86       ModelRunner(std::move(Runner)),
87       CG(MAM.getResult<LazyCallGraphAnalysis>(M)),
88       InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) {
89   assert(ModelRunner);
90 
91   // Extract the 'call site height' feature - the position of a call site
92   // relative to the farthest statically reachable SCC node. We don't mutate
93   // this value while inlining happens. Empirically, this feature proved
94   // critical in behavioral cloning - i.e. training a model to mimic the manual
95   // heuristic's decisions - and, thus, equally important for training for
96   // improvement.
97   CallGraph CGraph(M);
98   for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) {
99     const std::vector<CallGraphNode *> &CGNodes = *I;
100     unsigned Level = 0;
101     for (auto *CGNode : CGNodes) {
102       Function *F = CGNode->getFunction();
103       if (!F || F->isDeclaration())
104         continue;
105       for (auto &I : instructions(F)) {
106         if (auto *CS = getInlinableCS(I)) {
107           auto *Called = CS->getCalledFunction();
108           auto Pos = FunctionLevels.find(&CG.get(*Called));
109           // In bottom up traversal, an inlinable callee is either in the
110           // same SCC, or to a function in a visited SCC. So not finding its
111           // level means we haven't visited it yet, meaning it's in this SCC.
112           if (Pos == FunctionLevels.end())
113             continue;
114           Level = std::max(Level, Pos->second + 1);
115         }
116       }
117     }
118     for (auto *CGNode : CGNodes) {
119       Function *F = CGNode->getFunction();
120       if (F && !F->isDeclaration())
121         FunctionLevels[&CG.get(*F)] = Level;
122     }
123   }
124   for (auto KVP : FunctionLevels) {
125     AllNodes.insert(KVP.first);
126     EdgeCount += getLocalCalls(KVP.first->getFunction());
127   }
128   NodeCount = AllNodes.size();
129 }
130 
131 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const {
132   return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0;
133 }
134 
135 void MLInlineAdvisor::onPassEntry() {
136   if (ForceStop)
137     return;
138   FPICache.clear();
139   // Function passes executed between InlinerPass runs may have changed the
140   // module-wide features.
141   // The cgscc pass manager rules are such that:
142   // - if a pass leads to merging SCCs, then the pipeline is restarted on the
143   // merged SCC
144   // - if a pass leads to splitting the SCC, then we continue with one of the
145   // splits
146   // This means that the NodesInLastSCC is a superset (not strict) of the nodes
147   // that subsequent passes would have processed
148   // - in addition, if new Nodes were created by a pass (e.g. CoroSplit),
149   // they'd be adjacent to Nodes in the last SCC. So we just need to check the
150   // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't
151   // care about the nature of the Edge (call or ref).
152   NodeCount -= static_cast<int64_t>(NodesInLastSCC.size());
153   while (!NodesInLastSCC.empty()) {
154     const auto *N = NodesInLastSCC.front();
155     NodesInLastSCC.pop_front();
156     // The Function wrapped by N could have been deleted since we last saw it.
157     if (N->isDead()) {
158       assert(!N->getFunction().isDeclaration());
159       continue;
160     }
161     ++NodeCount;
162     EdgeCount += getLocalCalls(N->getFunction());
163     for (const auto &E : *(*N)) {
164       const auto *AdjNode = &E.getNode();
165       assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration());
166       auto I = AllNodes.insert(AdjNode);
167       if (I.second)
168         NodesInLastSCC.push_back(AdjNode);
169     }
170   }
171 
172   EdgeCount -= EdgesOfLastSeenNodes;
173   EdgesOfLastSeenNodes = 0;
174 }
175 
176 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) {
177   // No need to keep this around - function passes will invalidate it.
178   FPICache.clear();
179   if (!LastSCC || ForceStop)
180     return;
181   // Keep track of the nodes and edges we last saw. Then, in onPassEntry,
182   // we update the node count and edge count from the subset of these nodes that
183   // survived.
184   assert(NodesInLastSCC.empty());
185   assert(NodeCount >= LastSCC->size());
186   EdgesOfLastSeenNodes = 0;
187   for (const auto &N : *LastSCC) {
188     assert(!N.isDead());
189     EdgesOfLastSeenNodes += getLocalCalls(N.getFunction());
190     NodesInLastSCC.push_back(&N);
191   }
192   assert(EdgeCount >= EdgesOfLastSeenNodes);
193 }
194 
195 int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
196   return getCachedFPI(F).DirectCallsToDefinedFunctions;
197 }
198 
199 // Update the internal state of the advisor, and force invalidate feature
200 // analysis. Currently, we maintain minimal (and very simple) global state - the
201 // number of functions and the number of static calls. We also keep track of the
202 // total IR size in this module, to stop misbehaving policies at a certain bloat
203 // factor (SizeIncreaseThreshold)
204 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
205                                            bool CalleeWasDeleted) {
206   assert(!ForceStop);
207   Function *Caller = Advice.getCaller();
208   Function *Callee = Advice.getCallee();
209 
210   // The caller features aren't valid anymore.
211   {
212     PreservedAnalyses PA = PreservedAnalyses::all();
213     PA.abandon<FunctionPropertiesAnalysis>();
214     FAM.invalidate(*Caller, PA);
215   }
216   int64_t IRSizeAfter =
217       getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
218   CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
219   if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize)
220     ForceStop = true;
221 
222   // We can delta-update module-wide features. We know the inlining only changed
223   // the caller, and maybe the callee (by deleting the latter).
224   // Nodes are simple to update.
225   // For edges, we 'forget' the edges that the caller and callee used to have
226   // before inlining, and add back what they currently have together.
227   int64_t NewCallerAndCalleeEdges =
228       getCachedFPI(*Caller).DirectCallsToDefinedFunctions;
229 
230   if (CalleeWasDeleted)
231     --NodeCount;
232   else
233     NewCallerAndCalleeEdges +=
234         getCachedFPI(*Callee).DirectCallsToDefinedFunctions;
235   EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
236   assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
237 }
238 
239 int64_t MLInlineAdvisor::getModuleIRSize() const {
240   int64_t Ret = 0;
241   for (auto &F : M)
242     if (!F.isDeclaration())
243       Ret += getIRSize(F);
244   return Ret;
245 }
246 
247 FunctionPropertiesInfo &MLInlineAdvisor::getCachedFPI(Function &F) const {
248   auto InsertPair =
249       FPICache.insert(std::make_pair(&F, FunctionPropertiesInfo()));
250   if (!InsertPair.second)
251     return InsertPair.first->second;
252   InsertPair.first->second = FAM.getResult<FunctionPropertiesAnalysis>(F);
253   return InsertPair.first->second;
254 }
255 
256 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
257   auto &Caller = *CB.getCaller();
258   auto &Callee = *CB.getCalledFunction();
259 
260   auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
261     return FAM.getResult<AssumptionAnalysis>(F);
262   };
263   auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
264   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
265 
266   auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
267   // If this is a "never inline" case, there won't be any changes to internal
268   // state we need to track, so we can just return the base InlineAdvice, which
269   // will do nothing interesting.
270   // Same thing if this is a recursive case.
271   if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never ||
272       &Caller == &Callee)
273     return getMandatoryAdvice(CB, false);
274 
275   bool Mandatory =
276       MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always;
277 
278   // If we need to stop, we won't want to track anymore any state changes, so
279   // we just return the base InlineAdvice, which acts as a noop.
280   if (ForceStop) {
281     ORE.emit([&] {
282       return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB)
283              << "Won't attempt inlining because module size grew too much.";
284     });
285     return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory);
286   }
287 
288   int CostEstimate = 0;
289   if (!Mandatory) {
290     auto IsCallSiteInlinable =
291         llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache);
292     if (!IsCallSiteInlinable) {
293       // We can't inline this for correctness reasons, so return the base
294       // InlineAdvice, as we don't care about tracking any state changes (which
295       // won't happen).
296       return std::make_unique<InlineAdvice>(this, CB, ORE, false);
297     }
298     CostEstimate = *IsCallSiteInlinable;
299   }
300 
301   const auto CostFeatures =
302       llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache);
303   if (!CostFeatures) {
304     return std::make_unique<InlineAdvice>(this, CB, ORE, false);
305   }
306 
307   if (Mandatory)
308     return getMandatoryAdvice(CB, true);
309 
310   auto NrCtantParams = 0;
311   for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
312     NrCtantParams += (isa<Constant>(*I));
313   }
314 
315   auto &CallerBefore = getCachedFPI(Caller);
316   auto &CalleeBefore = getCachedFPI(Callee);
317 
318   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) =
319       CalleeBefore.BasicBlockCount;
320   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) =
321       getInitialFunctionLevel(Caller);
322   *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount;
323   *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams;
324   *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount;
325   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) =
326       CallerBefore.Uses;
327   *ModelRunner->getTensor<int64_t>(
328       FeatureIndex::CallerConditionallyExecutedBlocks) =
329       CallerBefore.BlocksReachedFromConditionalInstruction;
330   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) =
331       CallerBefore.BasicBlockCount;
332   *ModelRunner->getTensor<int64_t>(
333       FeatureIndex::CalleeConditionallyExecutedBlocks) =
334       CalleeBefore.BlocksReachedFromConditionalInstruction;
335   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) =
336       CalleeBefore.Uses;
337   *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate;
338 
339   // Add the cost features
340   for (size_t I = 0;
341        I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) {
342     *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature(
343         static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I);
344   }
345 
346   return getAdviceFromModel(CB, ORE);
347 }
348 
349 std::unique_ptr<MLInlineAdvice>
350 MLInlineAdvisor::getAdviceFromModel(CallBase &CB,
351                                     OptimizationRemarkEmitter &ORE) {
352   return std::make_unique<MLInlineAdvice>(
353       this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>()));
354 }
355 
356 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
357                                                                   bool Advice) {
358   // Make sure we track inlinings in all cases - mandatory or not.
359   if (Advice && !ForceStop)
360     return getMandatoryAdviceImpl(CB);
361 
362   // If this is a "never inline" case, there won't be any changes to internal
363   // state we need to track, so we can just return the base InlineAdvice, which
364   // will do nothing interesting.
365   // Same if we are forced to stop - we don't track anymore.
366   return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
367 }
368 
369 const LoopInfo &MLInlineAdvisor::getLoopInfo(Function &F) const {
370   return FAM.getResult<LoopAnalysis>(F);
371 }
372 
373 std::unique_ptr<MLInlineAdvice>
374 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
375   return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true);
376 }
377 
378 MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
379                                OptimizationRemarkEmitter &ORE,
380                                bool Recommendation)
381     : InlineAdvice(Advisor, CB, ORE, Recommendation),
382       CallerIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Caller)),
383       CalleeIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Callee)),
384       CallerAndCalleeEdges(Advisor->isForcedToStop()
385                                ? 0
386                                : (Advisor->getLocalCalls(*Caller) +
387                                   Advisor->getLocalCalls(*Callee))),
388       PreInlineCallerFPI(Advisor->getCachedFPI(*Caller)) {
389   if (Recommendation)
390     FPU.emplace(Advisor->getCachedFPI(*getCaller()), CB);
391 }
392 
393 void MLInlineAdvice::reportContextForRemark(
394     DiagnosticInfoOptimizationBase &OR) {
395   using namespace ore;
396   OR << NV("Callee", Callee->getName());
397   for (size_t I = 0; I < NumberOfFeatures; ++I)
398     OR << NV(FeatureMap[I].name(),
399              *getAdvisor()->getModelRunner().getTensor<int64_t>(I));
400   OR << NV("ShouldInline", isInliningRecommended());
401 }
402 
403 void MLInlineAdvice::updateCachedCallerFPI() {
404   FPU->finish(getAdvisor()->getLoopInfo(*Caller));
405 }
406 
407 void MLInlineAdvice::recordInliningImpl() {
408   updateCachedCallerFPI();
409   ORE.emit([&]() {
410     OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block);
411     reportContextForRemark(R);
412     return R;
413   });
414   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false);
415 }
416 
417 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() {
418   updateCachedCallerFPI();
419   ORE.emit([&]() {
420     OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc,
421                          Block);
422     reportContextForRemark(R);
423     return R;
424   });
425   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true);
426 }
427 
428 void MLInlineAdvice::recordUnsuccessfulInliningImpl(
429     const InlineResult &Result) {
430   getAdvisor()->getCachedFPI(*Caller) = PreInlineCallerFPI;
431   ORE.emit([&]() {
432     OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful",
433                                DLoc, Block);
434     reportContextForRemark(R);
435     return R;
436   });
437 }
438 void MLInlineAdvice::recordUnattemptedInliningImpl() {
439   assert(!FPU);
440   ORE.emit([&]() {
441     OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block);
442     reportContextForRemark(R);
443     return R;
444   });
445 }
446