1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interface between the inliner and a learned model.
10 // It delegates model evaluation to either the AOT compiled model (the
11 // 'release' mode) or a runtime-loaded model (the 'development' case).
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/MLInlineAdvisor.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/Analysis/AssumptionCache.h"
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
19 #include "llvm/Analysis/InlineCost.h"
20 #include "llvm/Analysis/InlineModelFeatureMaps.h"
21 #include "llvm/Analysis/LazyCallGraph.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/MLModelRunner.h"
24 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25 #include "llvm/Analysis/TargetTransformInfo.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/InstIterator.h"
28 #include "llvm/IR/PassManager.h"
29 #include "llvm/Support/CommandLine.h"
30 
31 using namespace llvm;
32 
33 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
34 #include "llvm/Analysis/ReleaseModeModelRunner.h"
35 // codegen-ed file
36 #include "InlinerSizeModel.h" // NOLINT
37 
38 std::unique_ptr<InlineAdvisor>
39 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
40   auto AOTRunner =
41       std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>(
42           M.getContext(), FeatureMap, DecisionName);
43   return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner));
44 }
45 #endif
46 
47 #define DEBUG_TYPE "inline-ml"
48 
49 static cl::opt<float> SizeIncreaseThreshold(
50     "ml-advisor-size-increase-threshold", cl::Hidden,
51     cl::desc("Maximum factor by which expected native size may increase before "
52              "blocking any further inlining."),
53     cl::init(2.0));
54 
55 static cl::opt<bool> KeepFPICache(
56     "ml-advisor-keep-fpi-cache", cl::Hidden,
57     cl::desc(
58         "For test - keep the ML Inline advisor's FunctionPropertiesInfo cache"),
59     cl::init(false));
60 
61 // clang-format off
62 const std::array<TensorSpec, NumberOfFeatures> llvm::FeatureMap{
63 #define POPULATE_NAMES(_, NAME) TensorSpec::createSpec<int64_t>(NAME, {1} ),
64 // InlineCost features - these must come first
65   INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
66 #undef POPULATE_NAMES
67 
68 // Non-cost features
69 #define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec<int64_t>(NAME, {1} ),
70   INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
71 #undef POPULATE_NAMES
72 };
73 // clang-format on
74 
75 const char *const llvm::DecisionName = "inlining_decision";
76 const char *const llvm::DefaultDecisionName = "inlining_default";
77 const char *const llvm::RewardName = "delta_size";
78 
79 CallBase *getInlinableCS(Instruction &I) {
80   if (auto *CS = dyn_cast<CallBase>(&I))
81     if (Function *Callee = CS->getCalledFunction()) {
82       if (!Callee->isDeclaration()) {
83         return CS;
84       }
85     }
86   return nullptr;
87 }
88 
89 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM,
90                                  std::unique_ptr<MLModelRunner> Runner)
91     : InlineAdvisor(
92           M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
93       ModelRunner(std::move(Runner)),
94       CG(MAM.getResult<LazyCallGraphAnalysis>(M)),
95       InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) {
96   assert(ModelRunner);
97 
98   // Extract the 'call site height' feature - the position of a call site
99   // relative to the farthest statically reachable SCC node. We don't mutate
100   // this value while inlining happens. Empirically, this feature proved
101   // critical in behavioral cloning - i.e. training a model to mimic the manual
102   // heuristic's decisions - and, thus, equally important for training for
103   // improvement.
104   CallGraph CGraph(M);
105   for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) {
106     const std::vector<CallGraphNode *> &CGNodes = *I;
107     unsigned Level = 0;
108     for (auto *CGNode : CGNodes) {
109       Function *F = CGNode->getFunction();
110       if (!F || F->isDeclaration())
111         continue;
112       for (auto &I : instructions(F)) {
113         if (auto *CS = getInlinableCS(I)) {
114           auto *Called = CS->getCalledFunction();
115           auto Pos = FunctionLevels.find(&CG.get(*Called));
116           // In bottom up traversal, an inlinable callee is either in the
117           // same SCC, or to a function in a visited SCC. So not finding its
118           // level means we haven't visited it yet, meaning it's in this SCC.
119           if (Pos == FunctionLevels.end())
120             continue;
121           Level = std::max(Level, Pos->second + 1);
122         }
123       }
124     }
125     for (auto *CGNode : CGNodes) {
126       Function *F = CGNode->getFunction();
127       if (F && !F->isDeclaration())
128         FunctionLevels[&CG.get(*F)] = Level;
129     }
130   }
131   for (auto KVP : FunctionLevels) {
132     AllNodes.insert(KVP.first);
133     EdgeCount += getLocalCalls(KVP.first->getFunction());
134   }
135   NodeCount = AllNodes.size();
136 }
137 
138 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const {
139   return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0;
140 }
141 
142 void MLInlineAdvisor::onPassEntry() {
143   if (ForceStop)
144     return;
145   FPICache.clear();
146   // Function passes executed between InlinerPass runs may have changed the
147   // module-wide features.
148   // The cgscc pass manager rules are such that:
149   // - if a pass leads to merging SCCs, then the pipeline is restarted on the
150   // merged SCC
151   // - if a pass leads to splitting the SCC, then we continue with one of the
152   // splits
153   // This means that the NodesInLastSCC is a superset (not strict) of the nodes
154   // that subsequent passes would have processed
155   // - in addition, if new Nodes were created by a pass (e.g. CoroSplit),
156   // they'd be adjacent to Nodes in the last SCC. So we just need to check the
157   // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't
158   // care about the nature of the Edge (call or ref).
159   NodeCount -= static_cast<int64_t>(NodesInLastSCC.size());
160   while (!NodesInLastSCC.empty()) {
161     const auto *N = NodesInLastSCC.front();
162     NodesInLastSCC.pop_front();
163     // The Function wrapped by N could have been deleted since we last saw it.
164     if (N->isDead()) {
165       assert(!N->getFunction().isDeclaration());
166       continue;
167     }
168     ++NodeCount;
169     EdgeCount += getLocalCalls(N->getFunction());
170     for (const auto &E : *(*N)) {
171       const auto *AdjNode = &E.getNode();
172       assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration());
173       auto I = AllNodes.insert(AdjNode);
174       if (I.second)
175         NodesInLastSCC.push_back(AdjNode);
176     }
177   }
178 
179   EdgeCount -= EdgesOfLastSeenNodes;
180   EdgesOfLastSeenNodes = 0;
181 }
182 
183 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) {
184   // No need to keep this around - function passes will invalidate it.
185   if (!KeepFPICache)
186     FPICache.clear();
187   if (!LastSCC || ForceStop)
188     return;
189   // Keep track of the nodes and edges we last saw. Then, in onPassEntry,
190   // we update the node count and edge count from the subset of these nodes that
191   // survived.
192   assert(NodesInLastSCC.empty());
193   assert(NodeCount >= LastSCC->size());
194   EdgesOfLastSeenNodes = 0;
195   for (const auto &N : *LastSCC) {
196     assert(!N.isDead());
197     EdgesOfLastSeenNodes += getLocalCalls(N.getFunction());
198     NodesInLastSCC.push_back(&N);
199   }
200   assert(EdgeCount >= EdgesOfLastSeenNodes);
201 }
202 
203 int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
204   return getCachedFPI(F).DirectCallsToDefinedFunctions;
205 }
206 
207 // Update the internal state of the advisor, and force invalidate feature
208 // analysis. Currently, we maintain minimal (and very simple) global state - the
209 // number of functions and the number of static calls. We also keep track of the
210 // total IR size in this module, to stop misbehaving policies at a certain bloat
211 // factor (SizeIncreaseThreshold)
212 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
213                                            bool CalleeWasDeleted) {
214   assert(!ForceStop);
215   Function *Caller = Advice.getCaller();
216   Function *Callee = Advice.getCallee();
217   // The caller features aren't valid anymore.
218   {
219     PreservedAnalyses PA = PreservedAnalyses::none();
220     FAM.invalidate(*Caller, PA);
221   }
222   Advice.updateCachedCallerFPI(FAM);
223   int64_t IRSizeAfter =
224       getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
225   CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
226   if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize)
227     ForceStop = true;
228 
229   // We can delta-update module-wide features. We know the inlining only changed
230   // the caller, and maybe the callee (by deleting the latter).
231   // Nodes are simple to update.
232   // For edges, we 'forget' the edges that the caller and callee used to have
233   // before inlining, and add back what they currently have together.
234   int64_t NewCallerAndCalleeEdges =
235       getCachedFPI(*Caller).DirectCallsToDefinedFunctions;
236 
237   if (CalleeWasDeleted)
238     --NodeCount;
239   else
240     NewCallerAndCalleeEdges +=
241         getCachedFPI(*Callee).DirectCallsToDefinedFunctions;
242   EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
243   assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
244 }
245 
246 int64_t MLInlineAdvisor::getModuleIRSize() const {
247   int64_t Ret = 0;
248   for (auto &F : M)
249     if (!F.isDeclaration())
250       Ret += getIRSize(F);
251   return Ret;
252 }
253 
254 FunctionPropertiesInfo &MLInlineAdvisor::getCachedFPI(Function &F) const {
255   auto InsertPair =
256       FPICache.insert(std::make_pair(&F, FunctionPropertiesInfo()));
257   if (!InsertPair.second)
258     return InsertPair.first->second;
259   InsertPair.first->second = FAM.getResult<FunctionPropertiesAnalysis>(F);
260   return InsertPair.first->second;
261 }
262 
263 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
264   auto &Caller = *CB.getCaller();
265   auto &Callee = *CB.getCalledFunction();
266 
267   auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
268     return FAM.getResult<AssumptionAnalysis>(F);
269   };
270   auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
271   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
272 
273   auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
274   // If this is a "never inline" case, there won't be any changes to internal
275   // state we need to track, so we can just return the base InlineAdvice, which
276   // will do nothing interesting.
277   // Same thing if this is a recursive case.
278   if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never ||
279       &Caller == &Callee)
280     return getMandatoryAdvice(CB, false);
281 
282   bool Mandatory =
283       MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always;
284 
285   // If we need to stop, we won't want to track anymore any state changes, so
286   // we just return the base InlineAdvice, which acts as a noop.
287   if (ForceStop) {
288     ORE.emit([&] {
289       return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB)
290              << "Won't attempt inlining because module size grew too much.";
291     });
292     return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory);
293   }
294 
295   int CostEstimate = 0;
296   if (!Mandatory) {
297     auto IsCallSiteInlinable =
298         llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache);
299     if (!IsCallSiteInlinable) {
300       // We can't inline this for correctness reasons, so return the base
301       // InlineAdvice, as we don't care about tracking any state changes (which
302       // won't happen).
303       return std::make_unique<InlineAdvice>(this, CB, ORE, false);
304     }
305     CostEstimate = *IsCallSiteInlinable;
306   }
307 
308   const auto CostFeatures =
309       llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache);
310   if (!CostFeatures) {
311     return std::make_unique<InlineAdvice>(this, CB, ORE, false);
312   }
313 
314   if (Mandatory)
315     return getMandatoryAdvice(CB, true);
316 
317   auto NrCtantParams = 0;
318   for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
319     NrCtantParams += (isa<Constant>(*I));
320   }
321 
322   auto &CallerBefore = getCachedFPI(Caller);
323   auto &CalleeBefore = getCachedFPI(Callee);
324 
325   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) =
326       CalleeBefore.BasicBlockCount;
327   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) =
328       getInitialFunctionLevel(Caller);
329   *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount;
330   *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams;
331   *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount;
332   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) =
333       CallerBefore.Uses;
334   *ModelRunner->getTensor<int64_t>(
335       FeatureIndex::CallerConditionallyExecutedBlocks) =
336       CallerBefore.BlocksReachedFromConditionalInstruction;
337   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) =
338       CallerBefore.BasicBlockCount;
339   *ModelRunner->getTensor<int64_t>(
340       FeatureIndex::CalleeConditionallyExecutedBlocks) =
341       CalleeBefore.BlocksReachedFromConditionalInstruction;
342   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) =
343       CalleeBefore.Uses;
344   *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate;
345 
346   // Add the cost features
347   for (size_t I = 0;
348        I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) {
349     *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature(
350         static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I);
351   }
352 
353   return getAdviceFromModel(CB, ORE);
354 }
355 
356 std::unique_ptr<MLInlineAdvice>
357 MLInlineAdvisor::getAdviceFromModel(CallBase &CB,
358                                     OptimizationRemarkEmitter &ORE) {
359   return std::make_unique<MLInlineAdvice>(
360       this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>()));
361 }
362 
363 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
364                                                                   bool Advice) {
365   // Make sure we track inlinings in all cases - mandatory or not.
366   if (Advice && !ForceStop)
367     return getMandatoryAdviceImpl(CB);
368 
369   // If this is a "never inline" case, there won't be any changes to internal
370   // state we need to track, so we can just return the base InlineAdvice, which
371   // will do nothing interesting.
372   // Same if we are forced to stop - we don't track anymore.
373   return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
374 }
375 
376 std::unique_ptr<MLInlineAdvice>
377 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
378   return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true);
379 }
380 
381 void MLInlineAdvisor::print(raw_ostream &OS) const {
382   OS << "[MLInlineAdvisor] Nodes: " << NodeCount << " Edges: " << EdgeCount
383      << "\n";
384   OS << "[MLInlineAdvisor] FPI:\n";
385   for (auto I : FPICache) {
386     OS << I.getFirst()->getName() << ":\n";
387     I.getSecond().print(OS);
388     OS << "\n";
389   }
390   OS << "\n";
391 }
392 
393 MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
394                                OptimizationRemarkEmitter &ORE,
395                                bool Recommendation)
396     : InlineAdvice(Advisor, CB, ORE, Recommendation),
397       CallerIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Caller)),
398       CalleeIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Callee)),
399       CallerAndCalleeEdges(Advisor->isForcedToStop()
400                                ? 0
401                                : (Advisor->getLocalCalls(*Caller) +
402                                   Advisor->getLocalCalls(*Callee))),
403       PreInlineCallerFPI(Advisor->getCachedFPI(*Caller)) {
404   if (Recommendation)
405     FPU.emplace(Advisor->getCachedFPI(*getCaller()), CB);
406 }
407 
408 void MLInlineAdvice::reportContextForRemark(
409     DiagnosticInfoOptimizationBase &OR) {
410   using namespace ore;
411   OR << NV("Callee", Callee->getName());
412   for (size_t I = 0; I < NumberOfFeatures; ++I)
413     OR << NV(FeatureMap[I].name(),
414              *getAdvisor()->getModelRunner().getTensor<int64_t>(I));
415   OR << NV("ShouldInline", isInliningRecommended());
416 }
417 
418 void MLInlineAdvice::updateCachedCallerFPI(FunctionAnalysisManager &FAM) const {
419   FPU->finish(FAM);
420 }
421 
422 void MLInlineAdvice::recordInliningImpl() {
423   ORE.emit([&]() {
424     OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block);
425     reportContextForRemark(R);
426     return R;
427   });
428   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false);
429 }
430 
431 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() {
432   ORE.emit([&]() {
433     OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc,
434                          Block);
435     reportContextForRemark(R);
436     return R;
437   });
438   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true);
439 }
440 
441 void MLInlineAdvice::recordUnsuccessfulInliningImpl(
442     const InlineResult &Result) {
443   getAdvisor()->getCachedFPI(*Caller) = PreInlineCallerFPI;
444   ORE.emit([&]() {
445     OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful",
446                                DLoc, Block);
447     reportContextForRemark(R);
448     return R;
449   });
450 }
451 void MLInlineAdvice::recordUnattemptedInliningImpl() {
452   assert(!FPU);
453   ORE.emit([&]() {
454     OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block);
455     reportContextForRemark(R);
456     return R;
457   });
458 }
459