1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the interface between the inliner and a learned model. 10 // It delegates model evaluation to either the AOT compiled model (the 11 // 'release' mode) or a runtime-loaded model (the 'development' case). 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Config/config.h" 15 #if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) 16 17 #include <limits> 18 #include <unordered_map> 19 #include <unordered_set> 20 21 #include "llvm/ADT/SCCIterator.h" 22 #include "llvm/Analysis/CallGraph.h" 23 #include "llvm/Analysis/FunctionPropertiesAnalysis.h" 24 #include "llvm/Analysis/InlineCost.h" 25 #include "llvm/Analysis/LazyCallGraph.h" 26 #include "llvm/Analysis/MLInlineAdvisor.h" 27 #include "llvm/Analysis/MLModelRunner.h" 28 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 29 #include "llvm/Analysis/TargetLibraryInfo.h" 30 #include "llvm/Analysis/TargetTransformInfo.h" 31 #include "llvm/IR/InstIterator.h" 32 #include "llvm/IR/Instructions.h" 33 #include "llvm/IR/PassManager.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Path.h" 36 37 using namespace llvm; 38 39 #ifdef LLVM_HAVE_TF_AOT 40 #include "llvm/Analysis/ReleaseModeModelRunner.h" 41 // codegen-ed file 42 #include "InlinerSizeModel.h" // NOLINT 43 #include "llvm/Analysis/InlineModelFeatureMaps.h" 44 45 std::unique_ptr<InlineAdvisor> 46 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { 47 auto AOTRunner = 48 std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>( 49 M.getContext(), FeatureNameMap, DecisionName); 50 return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner)); 51 } 52 #endif 53 54 #define DEBUG_TYPE "inline-ml" 55 56 static cl::opt<float> SizeIncreaseThreshold( 57 "ml-advisor-size-increase-threshold", cl::Hidden, 58 cl::desc("Maximum factor by which expected native size may increase before " 59 "blocking any further inlining."), 60 cl::init(2.0)); 61 62 // clang-format off 63 const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{ 64 // InlineCost features - these must come first 65 #define POPULATE_NAMES(INDEX_NAME, NAME) NAME, 66 INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES) 67 #undef POPULATE_NAMES 68 69 // Non-cost features 70 #define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME, 71 INLINE_FEATURE_ITERATOR(POPULATE_NAMES) 72 #undef POPULATE_NAMES 73 }; 74 // clang-format on 75 76 const char *const llvm::DecisionName = "inlining_decision"; 77 const char *const llvm::DefaultDecisionName = "inlining_default"; 78 const char *const llvm::RewardName = "delta_size"; 79 80 CallBase *getInlinableCS(Instruction &I) { 81 if (auto *CS = dyn_cast<CallBase>(&I)) 82 if (Function *Callee = CS->getCalledFunction()) { 83 if (!Callee->isDeclaration()) { 84 return CS; 85 } 86 } 87 return nullptr; 88 } 89 90 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, 91 std::unique_ptr<MLModelRunner> Runner) 92 : InlineAdvisor( 93 M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), 94 ModelRunner(std::move(Runner)), 95 CG(MAM.getResult<LazyCallGraphAnalysis>(M)), 96 InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { 97 assert(ModelRunner); 98 99 // Extract the 'call site height' feature - the position of a call site 100 // relative to the farthest statically reachable SCC node. We don't mutate 101 // this value while inlining happens. Empirically, this feature proved 102 // critical in behavioral cloning - i.e. training a model to mimic the manual 103 // heuristic's decisions - and, thus, equally important for training for 104 // improvement. 105 CallGraph CGraph(M); 106 for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) { 107 const std::vector<CallGraphNode *> &CGNodes = *I; 108 unsigned Level = 0; 109 for (auto *CGNode : CGNodes) { 110 Function *F = CGNode->getFunction(); 111 if (!F || F->isDeclaration()) 112 continue; 113 for (auto &I : instructions(F)) { 114 if (auto *CS = getInlinableCS(I)) { 115 auto *Called = CS->getCalledFunction(); 116 auto Pos = FunctionLevels.find(&CG.get(*Called)); 117 // In bottom up traversal, an inlinable callee is either in the 118 // same SCC, or to a function in a visited SCC. So not finding its 119 // level means we haven't visited it yet, meaning it's in this SCC. 120 if (Pos == FunctionLevels.end()) 121 continue; 122 Level = std::max(Level, Pos->second + 1); 123 } 124 } 125 } 126 for (auto *CGNode : CGNodes) { 127 Function *F = CGNode->getFunction(); 128 if (F && !F->isDeclaration()) 129 FunctionLevels[&CG.get(*F)] = Level; 130 } 131 } 132 for (auto KVP : FunctionLevels) { 133 AllNodes.insert(KVP.first); 134 EdgeCount += getLocalCalls(KVP.first->getFunction()); 135 } 136 NodeCount = AllNodes.size(); 137 } 138 139 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const { 140 return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0; 141 } 142 143 void MLInlineAdvisor::onPassEntry() { 144 // Function passes executed between InlinerPass runs may have changed the 145 // module-wide features. 146 // The cgscc pass manager rules are such that: 147 // - if a pass leads to merging SCCs, then the pipeline is restarted on the 148 // merged SCC 149 // - if a pass leads to splitting the SCC, then we continue with one of the 150 // splits 151 // This means that the NodesInLastSCC is a superset (not strict) of the nodes 152 // that subsequent passes would have processed 153 // - in addition, if new Nodes were created by a pass (e.g. CoroSplit), 154 // they'd be adjacent to Nodes in the last SCC. So we just need to check the 155 // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't 156 // care about the nature of the Edge (call or ref). 157 NodeCount -= static_cast<int64_t>(NodesInLastSCC.size()); 158 while (!NodesInLastSCC.empty()) { 159 const auto *N = NodesInLastSCC.front(); 160 NodesInLastSCC.pop_front(); 161 // The Function wrapped by N could have been deleted since we last saw it. 162 if (N->isDead()) { 163 assert(!N->getFunction().isDeclaration()); 164 continue; 165 } 166 ++NodeCount; 167 EdgeCount += getLocalCalls(N->getFunction()); 168 for (const auto &E : *(*N)) { 169 const auto *AdjNode = &E.getNode(); 170 assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration()); 171 auto I = AllNodes.insert(AdjNode); 172 if (I.second) 173 NodesInLastSCC.push_back(AdjNode); 174 } 175 } 176 177 EdgeCount -= EdgesOfLastSeenNodes; 178 EdgesOfLastSeenNodes = 0; 179 } 180 181 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) { 182 if (!LastSCC) 183 return; 184 // Keep track of the nodes and edges we last saw. Then, in onPassEntry, 185 // we update the node count and edge count from the subset of these nodes that 186 // survived. 187 assert(NodesInLastSCC.empty()); 188 assert(NodeCount >= LastSCC->size()); 189 EdgesOfLastSeenNodes = 0; 190 for (const auto &N : *LastSCC) { 191 assert(!N.isDead()); 192 EdgesOfLastSeenNodes += getLocalCalls(N.getFunction()); 193 NodesInLastSCC.push_back(&N); 194 } 195 assert(EdgeCount >= EdgesOfLastSeenNodes); 196 } 197 198 int64_t MLInlineAdvisor::getLocalCalls(Function &F) { 199 return FAM.getResult<FunctionPropertiesAnalysis>(F) 200 .DirectCallsToDefinedFunctions; 201 } 202 203 // Update the internal state of the advisor, and force invalidate feature 204 // analysis. Currently, we maintain minimal (and very simple) global state - the 205 // number of functions and the number of static calls. We also keep track of the 206 // total IR size in this module, to stop misbehaving policies at a certain bloat 207 // factor (SizeIncreaseThreshold) 208 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, 209 bool CalleeWasDeleted) { 210 assert(!ForceStop); 211 Function *Caller = Advice.getCaller(); 212 Function *Callee = Advice.getCallee(); 213 214 // The caller features aren't valid anymore. 215 { 216 PreservedAnalyses PA = PreservedAnalyses::all(); 217 PA.abandon<FunctionPropertiesAnalysis>(); 218 FAM.invalidate(*Caller, PA); 219 } 220 int64_t IRSizeAfter = 221 getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); 222 CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); 223 if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize) 224 ForceStop = true; 225 226 // We can delta-update module-wide features. We know the inlining only changed 227 // the caller, and maybe the callee (by deleting the latter). 228 // Nodes are simple to update. 229 // For edges, we 'forget' the edges that the caller and callee used to have 230 // before inlining, and add back what they currently have together. 231 int64_t NewCallerAndCalleeEdges = 232 FAM.getResult<FunctionPropertiesAnalysis>(*Caller) 233 .DirectCallsToDefinedFunctions; 234 235 if (CalleeWasDeleted) 236 --NodeCount; 237 else 238 NewCallerAndCalleeEdges += 239 FAM.getResult<FunctionPropertiesAnalysis>(*Callee) 240 .DirectCallsToDefinedFunctions; 241 EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); 242 assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); 243 } 244 245 int64_t MLInlineAdvisor::getModuleIRSize() const { 246 int64_t Ret = 0; 247 for (auto &F : M) 248 if (!F.isDeclaration()) 249 Ret += getIRSize(F); 250 return Ret; 251 } 252 253 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { 254 auto &Caller = *CB.getCaller(); 255 auto &Callee = *CB.getCalledFunction(); 256 257 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { 258 return FAM.getResult<AssumptionAnalysis>(F); 259 }; 260 auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee); 261 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); 262 263 auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); 264 // If this is a "never inline" case, there won't be any changes to internal 265 // state we need to track, so we can just return the base InlineAdvice, which 266 // will do nothing interesting. 267 // Same thing if this is a recursive case. 268 if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never || 269 &Caller == &Callee) 270 return getMandatoryAdvice(CB, false); 271 272 bool Mandatory = 273 MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always; 274 275 // If we need to stop, we won't want to track anymore any state changes, so 276 // we just return the base InlineAdvice, which acts as a noop. 277 if (ForceStop) { 278 ORE.emit([&] { 279 return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB) 280 << "Won't attempt inlining because module size grew too much."; 281 }); 282 return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory); 283 } 284 285 int CostEstimate = 0; 286 if (!Mandatory) { 287 auto IsCallSiteInlinable = 288 llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache); 289 if (!IsCallSiteInlinable) { 290 // We can't inline this for correctness reasons, so return the base 291 // InlineAdvice, as we don't care about tracking any state changes (which 292 // won't happen). 293 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 294 } 295 CostEstimate = *IsCallSiteInlinable; 296 } 297 298 const auto CostFeatures = 299 llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache); 300 if (!CostFeatures) { 301 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 302 } 303 304 if (Mandatory) 305 return getMandatoryAdvice(CB, true); 306 307 auto NrCtantParams = 0; 308 for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { 309 NrCtantParams += (isa<Constant>(*I)); 310 } 311 312 auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller); 313 auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee); 314 315 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) = 316 CalleeBefore.BasicBlockCount; 317 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) = 318 getInitialFunctionLevel(Caller); 319 *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount; 320 *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams; 321 *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount; 322 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) = 323 CallerBefore.Uses; 324 *ModelRunner->getTensor<int64_t>( 325 FeatureIndex::CallerConditionallyExecutedBlocks) = 326 CallerBefore.BlocksReachedFromConditionalInstruction; 327 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) = 328 CallerBefore.BasicBlockCount; 329 *ModelRunner->getTensor<int64_t>( 330 FeatureIndex::CalleeConditionallyExecutedBlocks) = 331 CalleeBefore.BlocksReachedFromConditionalInstruction; 332 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) = 333 CalleeBefore.Uses; 334 *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate; 335 336 // Add the cost features 337 for (size_t I = 0; 338 I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) { 339 *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature( 340 static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I); 341 } 342 343 return getAdviceFromModel(CB, ORE); 344 } 345 346 std::unique_ptr<MLInlineAdvice> 347 MLInlineAdvisor::getAdviceFromModel(CallBase &CB, 348 OptimizationRemarkEmitter &ORE) { 349 return std::make_unique<MLInlineAdvice>( 350 this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>())); 351 } 352 353 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, 354 bool Advice) { 355 // Make sure we track inlinings in all cases - mandatory or not. 356 if (Advice && !ForceStop) 357 return getMandatoryAdviceImpl(CB); 358 359 // If this is a "never inline" case, there won't be any changes to internal 360 // state we need to track, so we can just return the base InlineAdvice, which 361 // will do nothing interesting. 362 // Same if we are forced to stop - we don't track anymore. 363 return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice); 364 } 365 366 std::unique_ptr<MLInlineAdvice> 367 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 368 return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true); 369 } 370 371 void MLInlineAdvice::reportContextForRemark( 372 DiagnosticInfoOptimizationBase &OR) { 373 using namespace ore; 374 OR << NV("Callee", Callee->getName()); 375 for (size_t I = 0; I < NumberOfFeatures; ++I) 376 OR << NV(FeatureNameMap[I], 377 *getAdvisor()->getModelRunner().getTensor<int64_t>(I)); 378 OR << NV("ShouldInline", isInliningRecommended()); 379 } 380 381 void MLInlineAdvice::recordInliningImpl() { 382 ORE.emit([&]() { 383 OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block); 384 reportContextForRemark(R); 385 return R; 386 }); 387 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false); 388 } 389 390 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() { 391 ORE.emit([&]() { 392 OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc, 393 Block); 394 reportContextForRemark(R); 395 return R; 396 }); 397 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true); 398 } 399 400 void MLInlineAdvice::recordUnsuccessfulInliningImpl( 401 const InlineResult &Result) { 402 ORE.emit([&]() { 403 OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful", 404 DLoc, Block); 405 reportContextForRemark(R); 406 return R; 407 }); 408 } 409 void MLInlineAdvice::recordUnattemptedInliningImpl() { 410 ORE.emit([&]() { 411 OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block); 412 reportContextForRemark(R); 413 return R; 414 }); 415 } 416 #endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) 417