1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the interface between the inliner and a learned model. 10 // It delegates model evaluation to either the AOT compiled model (the 11 // 'release' mode) or a runtime-loaded model (the 'development' case). 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Analysis/MLInlineAdvisor.h" 15 #include "llvm/ADT/SCCIterator.h" 16 #include "llvm/Analysis/AssumptionCache.h" 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/Analysis/FunctionPropertiesAnalysis.h" 19 #include "llvm/Analysis/InlineCost.h" 20 #include "llvm/Analysis/InlineModelFeatureMaps.h" 21 #include "llvm/Analysis/LazyCallGraph.h" 22 #include "llvm/Analysis/LoopInfo.h" 23 #include "llvm/Analysis/MLModelRunner.h" 24 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 25 #include "llvm/Analysis/TargetTransformInfo.h" 26 #include "llvm/IR/InstIterator.h" 27 #include "llvm/IR/PassManager.h" 28 #include "llvm/Support/CommandLine.h" 29 30 using namespace llvm; 31 32 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL) 33 #include "llvm/Analysis/ReleaseModeModelRunner.h" 34 // codegen-ed file 35 #include "InlinerSizeModel.h" // NOLINT 36 37 std::unique_ptr<InlineAdvisor> 38 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { 39 auto AOTRunner = 40 std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>( 41 M.getContext(), FeatureMap, DecisionName); 42 return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner)); 43 } 44 #endif 45 46 #define DEBUG_TYPE "inline-ml" 47 48 static cl::opt<float> SizeIncreaseThreshold( 49 "ml-advisor-size-increase-threshold", cl::Hidden, 50 cl::desc("Maximum factor by which expected native size may increase before " 51 "blocking any further inlining."), 52 cl::init(2.0)); 53 54 // clang-format off 55 const std::array<TensorSpec, NumberOfFeatures> llvm::FeatureMap{ 56 #define POPULATE_NAMES(_, NAME) TensorSpec::createSpec<int64_t>(NAME, {1} ), 57 // InlineCost features - these must come first 58 INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES) 59 #undef POPULATE_NAMES 60 61 // Non-cost features 62 #define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec<int64_t>(NAME, {1} ), 63 INLINE_FEATURE_ITERATOR(POPULATE_NAMES) 64 #undef POPULATE_NAMES 65 }; 66 // clang-format on 67 68 const char *const llvm::DecisionName = "inlining_decision"; 69 const char *const llvm::DefaultDecisionName = "inlining_default"; 70 const char *const llvm::RewardName = "delta_size"; 71 72 CallBase *getInlinableCS(Instruction &I) { 73 if (auto *CS = dyn_cast<CallBase>(&I)) 74 if (Function *Callee = CS->getCalledFunction()) { 75 if (!Callee->isDeclaration()) { 76 return CS; 77 } 78 } 79 return nullptr; 80 } 81 82 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, 83 std::unique_ptr<MLModelRunner> Runner) 84 : InlineAdvisor( 85 M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), 86 ModelRunner(std::move(Runner)), 87 CG(MAM.getResult<LazyCallGraphAnalysis>(M)), 88 InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { 89 assert(ModelRunner); 90 91 // Extract the 'call site height' feature - the position of a call site 92 // relative to the farthest statically reachable SCC node. We don't mutate 93 // this value while inlining happens. Empirically, this feature proved 94 // critical in behavioral cloning - i.e. training a model to mimic the manual 95 // heuristic's decisions - and, thus, equally important for training for 96 // improvement. 97 CallGraph CGraph(M); 98 for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) { 99 const std::vector<CallGraphNode *> &CGNodes = *I; 100 unsigned Level = 0; 101 for (auto *CGNode : CGNodes) { 102 Function *F = CGNode->getFunction(); 103 if (!F || F->isDeclaration()) 104 continue; 105 for (auto &I : instructions(F)) { 106 if (auto *CS = getInlinableCS(I)) { 107 auto *Called = CS->getCalledFunction(); 108 auto Pos = FunctionLevels.find(&CG.get(*Called)); 109 // In bottom up traversal, an inlinable callee is either in the 110 // same SCC, or to a function in a visited SCC. So not finding its 111 // level means we haven't visited it yet, meaning it's in this SCC. 112 if (Pos == FunctionLevels.end()) 113 continue; 114 Level = std::max(Level, Pos->second + 1); 115 } 116 } 117 } 118 for (auto *CGNode : CGNodes) { 119 Function *F = CGNode->getFunction(); 120 if (F && !F->isDeclaration()) 121 FunctionLevels[&CG.get(*F)] = Level; 122 } 123 } 124 for (auto KVP : FunctionLevels) { 125 AllNodes.insert(KVP.first); 126 EdgeCount += getLocalCalls(KVP.first->getFunction()); 127 } 128 NodeCount = AllNodes.size(); 129 } 130 131 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const { 132 return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0; 133 } 134 135 void MLInlineAdvisor::onPassEntry() { 136 FPICache.clear(); 137 // Function passes executed between InlinerPass runs may have changed the 138 // module-wide features. 139 // The cgscc pass manager rules are such that: 140 // - if a pass leads to merging SCCs, then the pipeline is restarted on the 141 // merged SCC 142 // - if a pass leads to splitting the SCC, then we continue with one of the 143 // splits 144 // This means that the NodesInLastSCC is a superset (not strict) of the nodes 145 // that subsequent passes would have processed 146 // - in addition, if new Nodes were created by a pass (e.g. CoroSplit), 147 // they'd be adjacent to Nodes in the last SCC. So we just need to check the 148 // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't 149 // care about the nature of the Edge (call or ref). 150 NodeCount -= static_cast<int64_t>(NodesInLastSCC.size()); 151 while (!NodesInLastSCC.empty()) { 152 const auto *N = NodesInLastSCC.front(); 153 NodesInLastSCC.pop_front(); 154 // The Function wrapped by N could have been deleted since we last saw it. 155 if (N->isDead()) { 156 assert(!N->getFunction().isDeclaration()); 157 continue; 158 } 159 ++NodeCount; 160 EdgeCount += getLocalCalls(N->getFunction()); 161 for (const auto &E : *(*N)) { 162 const auto *AdjNode = &E.getNode(); 163 assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration()); 164 auto I = AllNodes.insert(AdjNode); 165 if (I.second) 166 NodesInLastSCC.push_back(AdjNode); 167 } 168 } 169 170 EdgeCount -= EdgesOfLastSeenNodes; 171 EdgesOfLastSeenNodes = 0; 172 } 173 174 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) { 175 // No need to keep this around - function passes will invalidate it. 176 FPICache.clear(); 177 if (!LastSCC) 178 return; 179 // Keep track of the nodes and edges we last saw. Then, in onPassEntry, 180 // we update the node count and edge count from the subset of these nodes that 181 // survived. 182 assert(NodesInLastSCC.empty()); 183 assert(NodeCount >= LastSCC->size()); 184 EdgesOfLastSeenNodes = 0; 185 for (const auto &N : *LastSCC) { 186 assert(!N.isDead()); 187 EdgesOfLastSeenNodes += getLocalCalls(N.getFunction()); 188 NodesInLastSCC.push_back(&N); 189 } 190 assert(EdgeCount >= EdgesOfLastSeenNodes); 191 } 192 193 int64_t MLInlineAdvisor::getLocalCalls(Function &F) { 194 return getCachedFPI(F).DirectCallsToDefinedFunctions; 195 } 196 197 // Update the internal state of the advisor, and force invalidate feature 198 // analysis. Currently, we maintain minimal (and very simple) global state - the 199 // number of functions and the number of static calls. We also keep track of the 200 // total IR size in this module, to stop misbehaving policies at a certain bloat 201 // factor (SizeIncreaseThreshold) 202 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, 203 bool CalleeWasDeleted) { 204 assert(!ForceStop); 205 Function *Caller = Advice.getCaller(); 206 Function *Callee = Advice.getCallee(); 207 208 // The caller features aren't valid anymore. 209 { 210 PreservedAnalyses PA = PreservedAnalyses::all(); 211 PA.abandon<FunctionPropertiesAnalysis>(); 212 FAM.invalidate(*Caller, PA); 213 } 214 int64_t IRSizeAfter = 215 getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); 216 CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); 217 if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize) 218 ForceStop = true; 219 220 // We can delta-update module-wide features. We know the inlining only changed 221 // the caller, and maybe the callee (by deleting the latter). 222 // Nodes are simple to update. 223 // For edges, we 'forget' the edges that the caller and callee used to have 224 // before inlining, and add back what they currently have together. 225 int64_t NewCallerAndCalleeEdges = 226 getCachedFPI(*Caller).DirectCallsToDefinedFunctions; 227 228 if (CalleeWasDeleted) 229 --NodeCount; 230 else 231 NewCallerAndCalleeEdges += 232 getCachedFPI(*Callee).DirectCallsToDefinedFunctions; 233 EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); 234 assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); 235 } 236 237 int64_t MLInlineAdvisor::getModuleIRSize() const { 238 int64_t Ret = 0; 239 for (auto &F : M) 240 if (!F.isDeclaration()) 241 Ret += getIRSize(F); 242 return Ret; 243 } 244 245 FunctionPropertiesInfo &MLInlineAdvisor::getCachedFPI(Function &F) const { 246 auto InsertPair = 247 FPICache.insert(std::make_pair(&F, FunctionPropertiesInfo())); 248 if (!InsertPair.second) 249 return InsertPair.first->second; 250 InsertPair.first->second = FAM.getResult<FunctionPropertiesAnalysis>(F); 251 return InsertPair.first->second; 252 } 253 254 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { 255 auto &Caller = *CB.getCaller(); 256 auto &Callee = *CB.getCalledFunction(); 257 258 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { 259 return FAM.getResult<AssumptionAnalysis>(F); 260 }; 261 auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee); 262 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); 263 264 auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); 265 // If this is a "never inline" case, there won't be any changes to internal 266 // state we need to track, so we can just return the base InlineAdvice, which 267 // will do nothing interesting. 268 // Same thing if this is a recursive case. 269 if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never || 270 &Caller == &Callee) 271 return getMandatoryAdvice(CB, false); 272 273 bool Mandatory = 274 MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always; 275 276 // If we need to stop, we won't want to track anymore any state changes, so 277 // we just return the base InlineAdvice, which acts as a noop. 278 if (ForceStop) { 279 ORE.emit([&] { 280 return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB) 281 << "Won't attempt inlining because module size grew too much."; 282 }); 283 return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory); 284 } 285 286 int CostEstimate = 0; 287 if (!Mandatory) { 288 auto IsCallSiteInlinable = 289 llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache); 290 if (!IsCallSiteInlinable) { 291 // We can't inline this for correctness reasons, so return the base 292 // InlineAdvice, as we don't care about tracking any state changes (which 293 // won't happen). 294 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 295 } 296 CostEstimate = *IsCallSiteInlinable; 297 } 298 299 const auto CostFeatures = 300 llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache); 301 if (!CostFeatures) { 302 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 303 } 304 305 if (Mandatory) 306 return getMandatoryAdvice(CB, true); 307 308 auto NrCtantParams = 0; 309 for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { 310 NrCtantParams += (isa<Constant>(*I)); 311 } 312 313 auto &CallerBefore = getCachedFPI(Caller); 314 auto &CalleeBefore = getCachedFPI(Callee); 315 316 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) = 317 CalleeBefore.BasicBlockCount; 318 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) = 319 getInitialFunctionLevel(Caller); 320 *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount; 321 *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams; 322 *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount; 323 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) = 324 CallerBefore.Uses; 325 *ModelRunner->getTensor<int64_t>( 326 FeatureIndex::CallerConditionallyExecutedBlocks) = 327 CallerBefore.BlocksReachedFromConditionalInstruction; 328 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) = 329 CallerBefore.BasicBlockCount; 330 *ModelRunner->getTensor<int64_t>( 331 FeatureIndex::CalleeConditionallyExecutedBlocks) = 332 CalleeBefore.BlocksReachedFromConditionalInstruction; 333 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) = 334 CalleeBefore.Uses; 335 *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate; 336 337 // Add the cost features 338 for (size_t I = 0; 339 I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) { 340 *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature( 341 static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I); 342 } 343 344 return getAdviceFromModel(CB, ORE); 345 } 346 347 std::unique_ptr<MLInlineAdvice> 348 MLInlineAdvisor::getAdviceFromModel(CallBase &CB, 349 OptimizationRemarkEmitter &ORE) { 350 return std::make_unique<MLInlineAdvice>( 351 this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>())); 352 } 353 354 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, 355 bool Advice) { 356 // Make sure we track inlinings in all cases - mandatory or not. 357 if (Advice && !ForceStop) 358 return getMandatoryAdviceImpl(CB); 359 360 // If this is a "never inline" case, there won't be any changes to internal 361 // state we need to track, so we can just return the base InlineAdvice, which 362 // will do nothing interesting. 363 // Same if we are forced to stop - we don't track anymore. 364 return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice); 365 } 366 367 const LoopInfo &MLInlineAdvisor::getLoopInfo(Function &F) const { 368 return FAM.getResult<LoopAnalysis>(F); 369 } 370 371 std::unique_ptr<MLInlineAdvice> 372 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 373 return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true); 374 } 375 376 MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB, 377 OptimizationRemarkEmitter &ORE, 378 bool Recommendation) 379 : InlineAdvice(Advisor, CB, ORE, Recommendation), 380 CallerIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Caller)), 381 CalleeIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Callee)), 382 CallerAndCalleeEdges(Advisor->isForcedToStop() 383 ? 0 384 : (Advisor->getLocalCalls(*Caller) + 385 Advisor->getLocalCalls(*Callee))), 386 PreInlineCallerFPI(Advisor->getCachedFPI(*Caller)) { 387 if (Recommendation) 388 FPU.emplace(Advisor->getCachedFPI(*getCaller()), CB); 389 } 390 391 void MLInlineAdvice::reportContextForRemark( 392 DiagnosticInfoOptimizationBase &OR) { 393 using namespace ore; 394 OR << NV("Callee", Callee->getName()); 395 for (size_t I = 0; I < NumberOfFeatures; ++I) 396 OR << NV(FeatureMap[I].name(), 397 *getAdvisor()->getModelRunner().getTensor<int64_t>(I)); 398 OR << NV("ShouldInline", isInliningRecommended()); 399 } 400 401 void MLInlineAdvice::updateCachedCallerFPI() { 402 FPU->finish(getAdvisor()->getLoopInfo(*Caller)); 403 } 404 405 void MLInlineAdvice::recordInliningImpl() { 406 updateCachedCallerFPI(); 407 ORE.emit([&]() { 408 OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block); 409 reportContextForRemark(R); 410 return R; 411 }); 412 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false); 413 } 414 415 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() { 416 updateCachedCallerFPI(); 417 ORE.emit([&]() { 418 OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc, 419 Block); 420 reportContextForRemark(R); 421 return R; 422 }); 423 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true); 424 } 425 426 void MLInlineAdvice::recordUnsuccessfulInliningImpl( 427 const InlineResult &Result) { 428 getAdvisor()->getCachedFPI(*Caller) = PreInlineCallerFPI; 429 ORE.emit([&]() { 430 OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful", 431 DLoc, Block); 432 reportContextForRemark(R); 433 return R; 434 }); 435 } 436 void MLInlineAdvice::recordUnattemptedInliningImpl() { 437 assert(!FPU); 438 ORE.emit([&]() { 439 OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block); 440 reportContextForRemark(R); 441 return R; 442 }); 443 } 444