1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interface between the inliner and a learned model.
10 // It delegates model evaluation to either the AOT compiled model (the
11 // 'release' mode) or a runtime-loaded model (the 'development' case).
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/MLInlineAdvisor.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/Analysis/AssumptionCache.h"
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
19 #include "llvm/Analysis/InlineCost.h"
20 #include "llvm/Analysis/InlineModelFeatureMaps.h"
21 #include "llvm/Analysis/LazyCallGraph.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/MLModelRunner.h"
24 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25 #include "llvm/Analysis/TargetTransformInfo.h"
26 #include "llvm/IR/InstIterator.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/Support/CommandLine.h"
29 
30 using namespace llvm;
31 
32 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
33 #include "llvm/Analysis/ReleaseModeModelRunner.h"
34 // codegen-ed file
35 #include "InlinerSizeModel.h" // NOLINT
36 
37 std::unique_ptr<InlineAdvisor>
38 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
39   auto AOTRunner =
40       std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>(
41           M.getContext(), FeatureMap, DecisionName);
42   return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner));
43 }
44 #endif
45 
46 #define DEBUG_TYPE "inline-ml"
47 
48 static cl::opt<float> SizeIncreaseThreshold(
49     "ml-advisor-size-increase-threshold", cl::Hidden,
50     cl::desc("Maximum factor by which expected native size may increase before "
51              "blocking any further inlining."),
52     cl::init(2.0));
53 
54 static cl::opt<bool> KeepFPICache(
55     "ml-advisor-keep-fpi-cache", cl::Hidden,
56     cl::desc(
57         "For test - keep the ML Inline advisor's FunctionPropertiesInfo cache"),
58     cl::init(false));
59 
60 // clang-format off
61 const std::array<TensorSpec, NumberOfFeatures> llvm::FeatureMap{
62 #define POPULATE_NAMES(_, NAME) TensorSpec::createSpec<int64_t>(NAME, {1} ),
63 // InlineCost features - these must come first
64   INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
65 #undef POPULATE_NAMES
66 
67 // Non-cost features
68 #define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec<int64_t>(NAME, {1} ),
69   INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
70 #undef POPULATE_NAMES
71 };
72 // clang-format on
73 
74 const char *const llvm::DecisionName = "inlining_decision";
75 const char *const llvm::DefaultDecisionName = "inlining_default";
76 const char *const llvm::RewardName = "delta_size";
77 
78 CallBase *getInlinableCS(Instruction &I) {
79   if (auto *CS = dyn_cast<CallBase>(&I))
80     if (Function *Callee = CS->getCalledFunction()) {
81       if (!Callee->isDeclaration()) {
82         return CS;
83       }
84     }
85   return nullptr;
86 }
87 
88 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM,
89                                  std::unique_ptr<MLModelRunner> Runner)
90     : InlineAdvisor(
91           M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
92       ModelRunner(std::move(Runner)),
93       CG(MAM.getResult<LazyCallGraphAnalysis>(M)),
94       InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) {
95   assert(ModelRunner);
96 
97   // Extract the 'call site height' feature - the position of a call site
98   // relative to the farthest statically reachable SCC node. We don't mutate
99   // this value while inlining happens. Empirically, this feature proved
100   // critical in behavioral cloning - i.e. training a model to mimic the manual
101   // heuristic's decisions - and, thus, equally important for training for
102   // improvement.
103   CallGraph CGraph(M);
104   for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) {
105     const std::vector<CallGraphNode *> &CGNodes = *I;
106     unsigned Level = 0;
107     for (auto *CGNode : CGNodes) {
108       Function *F = CGNode->getFunction();
109       if (!F || F->isDeclaration())
110         continue;
111       for (auto &I : instructions(F)) {
112         if (auto *CS = getInlinableCS(I)) {
113           auto *Called = CS->getCalledFunction();
114           auto Pos = FunctionLevels.find(&CG.get(*Called));
115           // In bottom up traversal, an inlinable callee is either in the
116           // same SCC, or to a function in a visited SCC. So not finding its
117           // level means we haven't visited it yet, meaning it's in this SCC.
118           if (Pos == FunctionLevels.end())
119             continue;
120           Level = std::max(Level, Pos->second + 1);
121         }
122       }
123     }
124     for (auto *CGNode : CGNodes) {
125       Function *F = CGNode->getFunction();
126       if (F && !F->isDeclaration())
127         FunctionLevels[&CG.get(*F)] = Level;
128     }
129   }
130   for (auto KVP : FunctionLevels) {
131     AllNodes.insert(KVP.first);
132     EdgeCount += getLocalCalls(KVP.first->getFunction());
133   }
134   NodeCount = AllNodes.size();
135 }
136 
137 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const {
138   return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0;
139 }
140 
141 void MLInlineAdvisor::onPassEntry() {
142   if (ForceStop)
143     return;
144   FPICache.clear();
145   // Function passes executed between InlinerPass runs may have changed the
146   // module-wide features.
147   // The cgscc pass manager rules are such that:
148   // - if a pass leads to merging SCCs, then the pipeline is restarted on the
149   // merged SCC
150   // - if a pass leads to splitting the SCC, then we continue with one of the
151   // splits
152   // This means that the NodesInLastSCC is a superset (not strict) of the nodes
153   // that subsequent passes would have processed
154   // - in addition, if new Nodes were created by a pass (e.g. CoroSplit),
155   // they'd be adjacent to Nodes in the last SCC. So we just need to check the
156   // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't
157   // care about the nature of the Edge (call or ref).
158   NodeCount -= static_cast<int64_t>(NodesInLastSCC.size());
159   while (!NodesInLastSCC.empty()) {
160     const auto *N = NodesInLastSCC.front();
161     NodesInLastSCC.pop_front();
162     // The Function wrapped by N could have been deleted since we last saw it.
163     if (N->isDead()) {
164       assert(!N->getFunction().isDeclaration());
165       continue;
166     }
167     ++NodeCount;
168     EdgeCount += getLocalCalls(N->getFunction());
169     for (const auto &E : *(*N)) {
170       const auto *AdjNode = &E.getNode();
171       assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration());
172       auto I = AllNodes.insert(AdjNode);
173       if (I.second)
174         NodesInLastSCC.push_back(AdjNode);
175     }
176   }
177 
178   EdgeCount -= EdgesOfLastSeenNodes;
179   EdgesOfLastSeenNodes = 0;
180 }
181 
182 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) {
183   // No need to keep this around - function passes will invalidate it.
184   if (!KeepFPICache)
185     FPICache.clear();
186   if (!LastSCC || ForceStop)
187     return;
188   // Keep track of the nodes and edges we last saw. Then, in onPassEntry,
189   // we update the node count and edge count from the subset of these nodes that
190   // survived.
191   assert(NodesInLastSCC.empty());
192   assert(NodeCount >= LastSCC->size());
193   EdgesOfLastSeenNodes = 0;
194   for (const auto &N : *LastSCC) {
195     assert(!N.isDead());
196     EdgesOfLastSeenNodes += getLocalCalls(N.getFunction());
197     NodesInLastSCC.push_back(&N);
198   }
199   assert(EdgeCount >= EdgesOfLastSeenNodes);
200 }
201 
202 int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
203   return getCachedFPI(F).DirectCallsToDefinedFunctions;
204 }
205 
206 // Update the internal state of the advisor, and force invalidate feature
207 // analysis. Currently, we maintain minimal (and very simple) global state - the
208 // number of functions and the number of static calls. We also keep track of the
209 // total IR size in this module, to stop misbehaving policies at a certain bloat
210 // factor (SizeIncreaseThreshold)
211 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
212                                            bool CalleeWasDeleted) {
213   assert(!ForceStop);
214   Function *Caller = Advice.getCaller();
215   Function *Callee = Advice.getCallee();
216   // The caller features aren't valid anymore.
217   {
218     PreservedAnalyses PA = PreservedAnalyses::none();
219     FAM.invalidate(*Caller, PA);
220   }
221   Advice.updateCachedCallerFPI(FAM.getResult<LoopAnalysis>(*Caller));
222   int64_t IRSizeAfter =
223       getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
224   CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
225   if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize)
226     ForceStop = true;
227 
228   // We can delta-update module-wide features. We know the inlining only changed
229   // the caller, and maybe the callee (by deleting the latter).
230   // Nodes are simple to update.
231   // For edges, we 'forget' the edges that the caller and callee used to have
232   // before inlining, and add back what they currently have together.
233   int64_t NewCallerAndCalleeEdges =
234       getCachedFPI(*Caller).DirectCallsToDefinedFunctions;
235 
236   if (CalleeWasDeleted)
237     --NodeCount;
238   else
239     NewCallerAndCalleeEdges +=
240         getCachedFPI(*Callee).DirectCallsToDefinedFunctions;
241   EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
242   assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
243 }
244 
245 int64_t MLInlineAdvisor::getModuleIRSize() const {
246   int64_t Ret = 0;
247   for (auto &F : M)
248     if (!F.isDeclaration())
249       Ret += getIRSize(F);
250   return Ret;
251 }
252 
253 FunctionPropertiesInfo &MLInlineAdvisor::getCachedFPI(Function &F) const {
254   auto InsertPair =
255       FPICache.insert(std::make_pair(&F, FunctionPropertiesInfo()));
256   if (!InsertPair.second)
257     return InsertPair.first->second;
258   InsertPair.first->second = FAM.getResult<FunctionPropertiesAnalysis>(F);
259   return InsertPair.first->second;
260 }
261 
262 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
263   auto &Caller = *CB.getCaller();
264   auto &Callee = *CB.getCalledFunction();
265 
266   auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
267     return FAM.getResult<AssumptionAnalysis>(F);
268   };
269   auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
270   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
271 
272   auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
273   // If this is a "never inline" case, there won't be any changes to internal
274   // state we need to track, so we can just return the base InlineAdvice, which
275   // will do nothing interesting.
276   // Same thing if this is a recursive case.
277   if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never ||
278       &Caller == &Callee)
279     return getMandatoryAdvice(CB, false);
280 
281   bool Mandatory =
282       MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always;
283 
284   // If we need to stop, we won't want to track anymore any state changes, so
285   // we just return the base InlineAdvice, which acts as a noop.
286   if (ForceStop) {
287     ORE.emit([&] {
288       return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB)
289              << "Won't attempt inlining because module size grew too much.";
290     });
291     return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory);
292   }
293 
294   int CostEstimate = 0;
295   if (!Mandatory) {
296     auto IsCallSiteInlinable =
297         llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache);
298     if (!IsCallSiteInlinable) {
299       // We can't inline this for correctness reasons, so return the base
300       // InlineAdvice, as we don't care about tracking any state changes (which
301       // won't happen).
302       return std::make_unique<InlineAdvice>(this, CB, ORE, false);
303     }
304     CostEstimate = *IsCallSiteInlinable;
305   }
306 
307   const auto CostFeatures =
308       llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache);
309   if (!CostFeatures) {
310     return std::make_unique<InlineAdvice>(this, CB, ORE, false);
311   }
312 
313   if (Mandatory)
314     return getMandatoryAdvice(CB, true);
315 
316   auto NrCtantParams = 0;
317   for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
318     NrCtantParams += (isa<Constant>(*I));
319   }
320 
321   auto &CallerBefore = getCachedFPI(Caller);
322   auto &CalleeBefore = getCachedFPI(Callee);
323 
324   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) =
325       CalleeBefore.BasicBlockCount;
326   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) =
327       getInitialFunctionLevel(Caller);
328   *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount;
329   *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams;
330   *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount;
331   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) =
332       CallerBefore.Uses;
333   *ModelRunner->getTensor<int64_t>(
334       FeatureIndex::CallerConditionallyExecutedBlocks) =
335       CallerBefore.BlocksReachedFromConditionalInstruction;
336   *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) =
337       CallerBefore.BasicBlockCount;
338   *ModelRunner->getTensor<int64_t>(
339       FeatureIndex::CalleeConditionallyExecutedBlocks) =
340       CalleeBefore.BlocksReachedFromConditionalInstruction;
341   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) =
342       CalleeBefore.Uses;
343   *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate;
344 
345   // Add the cost features
346   for (size_t I = 0;
347        I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) {
348     *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature(
349         static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I);
350   }
351 
352   return getAdviceFromModel(CB, ORE);
353 }
354 
355 std::unique_ptr<MLInlineAdvice>
356 MLInlineAdvisor::getAdviceFromModel(CallBase &CB,
357                                     OptimizationRemarkEmitter &ORE) {
358   return std::make_unique<MLInlineAdvice>(
359       this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>()));
360 }
361 
362 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
363                                                                   bool Advice) {
364   // Make sure we track inlinings in all cases - mandatory or not.
365   if (Advice && !ForceStop)
366     return getMandatoryAdviceImpl(CB);
367 
368   // If this is a "never inline" case, there won't be any changes to internal
369   // state we need to track, so we can just return the base InlineAdvice, which
370   // will do nothing interesting.
371   // Same if we are forced to stop - we don't track anymore.
372   return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
373 }
374 
375 std::unique_ptr<MLInlineAdvice>
376 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
377   return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true);
378 }
379 
380 void MLInlineAdvisor::print(raw_ostream &OS) const {
381   OS << "[MLInlineAdvisor] Nodes: " << NodeCount << " Edges: " << EdgeCount
382      << "\n";
383   OS << "[MLInlineAdvisor] FPI:\n";
384   for (auto I : FPICache) {
385     OS << I.getFirst()->getName() << ":\n";
386     I.getSecond().print(OS);
387     OS << "\n";
388   }
389   OS << "\n";
390 }
391 
392 MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
393                                OptimizationRemarkEmitter &ORE,
394                                bool Recommendation)
395     : InlineAdvice(Advisor, CB, ORE, Recommendation),
396       CallerIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Caller)),
397       CalleeIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Callee)),
398       CallerAndCalleeEdges(Advisor->isForcedToStop()
399                                ? 0
400                                : (Advisor->getLocalCalls(*Caller) +
401                                   Advisor->getLocalCalls(*Callee))),
402       PreInlineCallerFPI(Advisor->getCachedFPI(*Caller)) {
403   if (Recommendation)
404     FPU.emplace(Advisor->getCachedFPI(*getCaller()), CB);
405 }
406 
407 void MLInlineAdvice::reportContextForRemark(
408     DiagnosticInfoOptimizationBase &OR) {
409   using namespace ore;
410   OR << NV("Callee", Callee->getName());
411   for (size_t I = 0; I < NumberOfFeatures; ++I)
412     OR << NV(FeatureMap[I].name(),
413              *getAdvisor()->getModelRunner().getTensor<int64_t>(I));
414   OR << NV("ShouldInline", isInliningRecommended());
415 }
416 
417 void MLInlineAdvice::updateCachedCallerFPI(const LoopInfo &LI) const {
418   FPU->finish(LI);
419 }
420 
421 void MLInlineAdvice::recordInliningImpl() {
422   ORE.emit([&]() {
423     OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block);
424     reportContextForRemark(R);
425     return R;
426   });
427   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false);
428 }
429 
430 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() {
431   ORE.emit([&]() {
432     OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc,
433                          Block);
434     reportContextForRemark(R);
435     return R;
436   });
437   getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true);
438 }
439 
440 void MLInlineAdvice::recordUnsuccessfulInliningImpl(
441     const InlineResult &Result) {
442   getAdvisor()->getCachedFPI(*Caller) = PreInlineCallerFPI;
443   ORE.emit([&]() {
444     OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful",
445                                DLoc, Block);
446     reportContextForRemark(R);
447     return R;
448   });
449 }
450 void MLInlineAdvice::recordUnattemptedInliningImpl() {
451   assert(!FPU);
452   ORE.emit([&]() {
453     OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block);
454     reportContextForRemark(R);
455     return R;
456   });
457 }
458