1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the interface between the inliner and a learned model. 10 // It delegates model evaluation to either the AOT compiled model (the 11 // 'release' mode) or a runtime-loaded model (the 'development' case). 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Analysis/MLInlineAdvisor.h" 15 #include "llvm/ADT/SCCIterator.h" 16 #include "llvm/Analysis/CallGraph.h" 17 #include "llvm/Analysis/FunctionPropertiesAnalysis.h" 18 #include "llvm/Analysis/InlineCost.h" 19 #include "llvm/Analysis/InlineModelFeatureMaps.h" 20 #include "llvm/Analysis/LazyCallGraph.h" 21 #include "llvm/Analysis/MLModelRunner.h" 22 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 23 #include "llvm/Analysis/ReleaseModeModelRunner.h" 24 #include "llvm/Analysis/TargetLibraryInfo.h" 25 #include "llvm/Analysis/TargetTransformInfo.h" 26 #include "llvm/Config/config.h" 27 #include "llvm/IR/InstIterator.h" 28 #include "llvm/IR/Instructions.h" 29 #include "llvm/IR/PassManager.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Path.h" 32 33 #include <limits> 34 #include <unordered_map> 35 #include <unordered_set> 36 37 using namespace llvm; 38 39 #ifdef LLVM_HAVE_TF_AOT_INLINERSIZEMODEL 40 #define LLVM_HAVE_TF_AOT 41 #endif 42 43 #if defined(LLVM_HAVE_TF_AOT) 44 // codegen-ed file 45 #include "InlinerSizeModel.h" // NOLINT 46 47 std::unique_ptr<InlineAdvisor> 48 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { 49 auto AOTRunner = 50 std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>( 51 M.getContext(), FeatureNameMap, DecisionName); 52 return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner)); 53 } 54 #endif 55 56 #define DEBUG_TYPE "inline-ml" 57 58 #if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) 59 60 static cl::opt<float> SizeIncreaseThreshold( 61 "ml-advisor-size-increase-threshold", cl::Hidden, 62 cl::desc("Maximum factor by which expected native size may increase before " 63 "blocking any further inlining."), 64 cl::init(2.0)); 65 66 // clang-format off 67 const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{ 68 // InlineCost features - these must come first 69 #define POPULATE_NAMES(INDEX_NAME, NAME) NAME, 70 INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES) 71 #undef POPULATE_NAMES 72 73 // Non-cost features 74 #define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME, 75 INLINE_FEATURE_ITERATOR(POPULATE_NAMES) 76 #undef POPULATE_NAMES 77 }; 78 // clang-format on 79 80 const char *const llvm::DecisionName = "inlining_decision"; 81 const char *const llvm::DefaultDecisionName = "inlining_default"; 82 const char *const llvm::RewardName = "delta_size"; 83 84 CallBase *getInlinableCS(Instruction &I) { 85 if (auto *CS = dyn_cast<CallBase>(&I)) 86 if (Function *Callee = CS->getCalledFunction()) { 87 if (!Callee->isDeclaration()) { 88 return CS; 89 } 90 } 91 return nullptr; 92 } 93 94 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, 95 std::unique_ptr<MLModelRunner> Runner) 96 : InlineAdvisor( 97 M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), 98 ModelRunner(std::move(Runner)), 99 CG(MAM.getResult<LazyCallGraphAnalysis>(M)), 100 InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { 101 assert(ModelRunner); 102 103 // Extract the 'call site height' feature - the position of a call site 104 // relative to the farthest statically reachable SCC node. We don't mutate 105 // this value while inlining happens. Empirically, this feature proved 106 // critical in behavioral cloning - i.e. training a model to mimic the manual 107 // heuristic's decisions - and, thus, equally important for training for 108 // improvement. 109 CallGraph CGraph(M); 110 for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) { 111 const std::vector<CallGraphNode *> &CGNodes = *I; 112 unsigned Level = 0; 113 for (auto *CGNode : CGNodes) { 114 Function *F = CGNode->getFunction(); 115 if (!F || F->isDeclaration()) 116 continue; 117 for (auto &I : instructions(F)) { 118 if (auto *CS = getInlinableCS(I)) { 119 auto *Called = CS->getCalledFunction(); 120 auto Pos = FunctionLevels.find(&CG.get(*Called)); 121 // In bottom up traversal, an inlinable callee is either in the 122 // same SCC, or to a function in a visited SCC. So not finding its 123 // level means we haven't visited it yet, meaning it's in this SCC. 124 if (Pos == FunctionLevels.end()) 125 continue; 126 Level = std::max(Level, Pos->second + 1); 127 } 128 } 129 } 130 for (auto *CGNode : CGNodes) { 131 Function *F = CGNode->getFunction(); 132 if (F && !F->isDeclaration()) 133 FunctionLevels[&CG.get(*F)] = Level; 134 } 135 } 136 for (auto KVP : FunctionLevels) { 137 AllNodes.insert(KVP.first); 138 EdgeCount += getLocalCalls(KVP.first->getFunction()); 139 } 140 NodeCount = AllNodes.size(); 141 } 142 143 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const { 144 return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0; 145 } 146 147 void MLInlineAdvisor::onPassEntry() { 148 // Function passes executed between InlinerPass runs may have changed the 149 // module-wide features. 150 // The cgscc pass manager rules are such that: 151 // - if a pass leads to merging SCCs, then the pipeline is restarted on the 152 // merged SCC 153 // - if a pass leads to splitting the SCC, then we continue with one of the 154 // splits 155 // This means that the NodesInLastSCC is a superset (not strict) of the nodes 156 // that subsequent passes would have processed 157 // - in addition, if new Nodes were created by a pass (e.g. CoroSplit), 158 // they'd be adjacent to Nodes in the last SCC. So we just need to check the 159 // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't 160 // care about the nature of the Edge (call or ref). 161 NodeCount -= static_cast<int64_t>(NodesInLastSCC.size()); 162 while (!NodesInLastSCC.empty()) { 163 const auto *N = NodesInLastSCC.front(); 164 NodesInLastSCC.pop_front(); 165 // The Function wrapped by N could have been deleted since we last saw it. 166 if (N->isDead()) { 167 assert(!N->getFunction().isDeclaration()); 168 continue; 169 } 170 ++NodeCount; 171 EdgeCount += getLocalCalls(N->getFunction()); 172 for (const auto &E : *(*N)) { 173 const auto *AdjNode = &E.getNode(); 174 assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration()); 175 auto I = AllNodes.insert(AdjNode); 176 if (I.second) 177 NodesInLastSCC.push_back(AdjNode); 178 } 179 } 180 181 EdgeCount -= EdgesOfLastSeenNodes; 182 EdgesOfLastSeenNodes = 0; 183 } 184 185 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) { 186 if (!LastSCC) 187 return; 188 // Keep track of the nodes and edges we last saw. Then, in onPassEntry, 189 // we update the node count and edge count from the subset of these nodes that 190 // survived. 191 assert(NodesInLastSCC.empty()); 192 assert(NodeCount >= LastSCC->size()); 193 EdgesOfLastSeenNodes = 0; 194 for (const auto &N : *LastSCC) { 195 assert(!N.isDead()); 196 EdgesOfLastSeenNodes += getLocalCalls(N.getFunction()); 197 NodesInLastSCC.push_back(&N); 198 } 199 assert(EdgeCount >= EdgesOfLastSeenNodes); 200 } 201 202 int64_t MLInlineAdvisor::getLocalCalls(Function &F) { 203 return FAM.getResult<FunctionPropertiesAnalysis>(F) 204 .DirectCallsToDefinedFunctions; 205 } 206 207 // Update the internal state of the advisor, and force invalidate feature 208 // analysis. Currently, we maintain minimal (and very simple) global state - the 209 // number of functions and the number of static calls. We also keep track of the 210 // total IR size in this module, to stop misbehaving policies at a certain bloat 211 // factor (SizeIncreaseThreshold) 212 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, 213 bool CalleeWasDeleted) { 214 assert(!ForceStop); 215 Function *Caller = Advice.getCaller(); 216 Function *Callee = Advice.getCallee(); 217 218 // The caller features aren't valid anymore. 219 { 220 PreservedAnalyses PA = PreservedAnalyses::all(); 221 PA.abandon<FunctionPropertiesAnalysis>(); 222 FAM.invalidate(*Caller, PA); 223 } 224 int64_t IRSizeAfter = 225 getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); 226 CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); 227 if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize) 228 ForceStop = true; 229 230 // We can delta-update module-wide features. We know the inlining only changed 231 // the caller, and maybe the callee (by deleting the latter). 232 // Nodes are simple to update. 233 // For edges, we 'forget' the edges that the caller and callee used to have 234 // before inlining, and add back what they currently have together. 235 int64_t NewCallerAndCalleeEdges = 236 FAM.getResult<FunctionPropertiesAnalysis>(*Caller) 237 .DirectCallsToDefinedFunctions; 238 239 if (CalleeWasDeleted) 240 --NodeCount; 241 else 242 NewCallerAndCalleeEdges += 243 FAM.getResult<FunctionPropertiesAnalysis>(*Callee) 244 .DirectCallsToDefinedFunctions; 245 EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); 246 assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); 247 } 248 249 int64_t MLInlineAdvisor::getModuleIRSize() const { 250 int64_t Ret = 0; 251 for (auto &F : M) 252 if (!F.isDeclaration()) 253 Ret += getIRSize(F); 254 return Ret; 255 } 256 257 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { 258 auto &Caller = *CB.getCaller(); 259 auto &Callee = *CB.getCalledFunction(); 260 261 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { 262 return FAM.getResult<AssumptionAnalysis>(F); 263 }; 264 auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee); 265 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); 266 267 auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); 268 // If this is a "never inline" case, there won't be any changes to internal 269 // state we need to track, so we can just return the base InlineAdvice, which 270 // will do nothing interesting. 271 // Same thing if this is a recursive case. 272 if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never || 273 &Caller == &Callee) 274 return getMandatoryAdvice(CB, false); 275 276 bool Mandatory = 277 MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always; 278 279 // If we need to stop, we won't want to track anymore any state changes, so 280 // we just return the base InlineAdvice, which acts as a noop. 281 if (ForceStop) { 282 ORE.emit([&] { 283 return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB) 284 << "Won't attempt inlining because module size grew too much."; 285 }); 286 return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory); 287 } 288 289 int CostEstimate = 0; 290 if (!Mandatory) { 291 auto IsCallSiteInlinable = 292 llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache); 293 if (!IsCallSiteInlinable) { 294 // We can't inline this for correctness reasons, so return the base 295 // InlineAdvice, as we don't care about tracking any state changes (which 296 // won't happen). 297 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 298 } 299 CostEstimate = *IsCallSiteInlinable; 300 } 301 302 const auto CostFeatures = 303 llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache); 304 if (!CostFeatures) { 305 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 306 } 307 308 if (Mandatory) 309 return getMandatoryAdvice(CB, true); 310 311 auto NrCtantParams = 0; 312 for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { 313 NrCtantParams += (isa<Constant>(*I)); 314 } 315 316 auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller); 317 auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee); 318 319 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) = 320 CalleeBefore.BasicBlockCount; 321 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) = 322 getInitialFunctionLevel(Caller); 323 *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount; 324 *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams; 325 *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount; 326 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) = 327 CallerBefore.Uses; 328 *ModelRunner->getTensor<int64_t>( 329 FeatureIndex::CallerConditionallyExecutedBlocks) = 330 CallerBefore.BlocksReachedFromConditionalInstruction; 331 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) = 332 CallerBefore.BasicBlockCount; 333 *ModelRunner->getTensor<int64_t>( 334 FeatureIndex::CalleeConditionallyExecutedBlocks) = 335 CalleeBefore.BlocksReachedFromConditionalInstruction; 336 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) = 337 CalleeBefore.Uses; 338 *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate; 339 340 // Add the cost features 341 for (size_t I = 0; 342 I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) { 343 *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature( 344 static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I); 345 } 346 347 return getAdviceFromModel(CB, ORE); 348 } 349 350 std::unique_ptr<MLInlineAdvice> 351 MLInlineAdvisor::getAdviceFromModel(CallBase &CB, 352 OptimizationRemarkEmitter &ORE) { 353 return std::make_unique<MLInlineAdvice>( 354 this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>())); 355 } 356 357 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, 358 bool Advice) { 359 // Make sure we track inlinings in all cases - mandatory or not. 360 if (Advice && !ForceStop) 361 return getMandatoryAdviceImpl(CB); 362 363 // If this is a "never inline" case, there won't be any changes to internal 364 // state we need to track, so we can just return the base InlineAdvice, which 365 // will do nothing interesting. 366 // Same if we are forced to stop - we don't track anymore. 367 return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice); 368 } 369 370 std::unique_ptr<MLInlineAdvice> 371 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 372 return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true); 373 } 374 375 void MLInlineAdvice::reportContextForRemark( 376 DiagnosticInfoOptimizationBase &OR) { 377 using namespace ore; 378 OR << NV("Callee", Callee->getName()); 379 for (size_t I = 0; I < NumberOfFeatures; ++I) 380 OR << NV(FeatureNameMap[I], 381 *getAdvisor()->getModelRunner().getTensor<int64_t>(I)); 382 OR << NV("ShouldInline", isInliningRecommended()); 383 } 384 385 void MLInlineAdvice::recordInliningImpl() { 386 ORE.emit([&]() { 387 OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block); 388 reportContextForRemark(R); 389 return R; 390 }); 391 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false); 392 } 393 394 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() { 395 ORE.emit([&]() { 396 OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc, 397 Block); 398 reportContextForRemark(R); 399 return R; 400 }); 401 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true); 402 } 403 404 void MLInlineAdvice::recordUnsuccessfulInliningImpl( 405 const InlineResult &Result) { 406 ORE.emit([&]() { 407 OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful", 408 DLoc, Block); 409 reportContextForRemark(R); 410 return R; 411 }); 412 } 413 void MLInlineAdvice::recordUnattemptedInliningImpl() { 414 ORE.emit([&]() { 415 OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block); 416 reportContextForRemark(R); 417 return R; 418 }); 419 } 420 #endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) 421