1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the interface between the inliner and a learned model. 10 // It delegates model evaluation to either the AOT compiled model (the 11 // 'release' mode) or a runtime-loaded model (the 'development' case). 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Analysis/MLInlineAdvisor.h" 15 #include "llvm/ADT/SCCIterator.h" 16 #include "llvm/Analysis/AssumptionCache.h" 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/Analysis/FunctionPropertiesAnalysis.h" 19 #include "llvm/Analysis/InlineCost.h" 20 #include "llvm/Analysis/InlineModelFeatureMaps.h" 21 #include "llvm/Analysis/LazyCallGraph.h" 22 #include "llvm/Analysis/MLModelRunner.h" 23 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 24 #include "llvm/Analysis/TargetTransformInfo.h" 25 #include "llvm/IR/InstIterator.h" 26 #include "llvm/IR/PassManager.h" 27 #include "llvm/Support/CommandLine.h" 28 29 using namespace llvm; 30 31 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL) 32 #include "llvm/Analysis/ReleaseModeModelRunner.h" 33 // codegen-ed file 34 #include "InlinerSizeModel.h" // NOLINT 35 36 std::unique_ptr<InlineAdvisor> 37 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { 38 auto AOTRunner = 39 std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>( 40 M.getContext(), FeatureNameMap, DecisionName); 41 return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner)); 42 } 43 #endif 44 45 #define DEBUG_TYPE "inline-ml" 46 47 static cl::opt<float> SizeIncreaseThreshold( 48 "ml-advisor-size-increase-threshold", cl::Hidden, 49 cl::desc("Maximum factor by which expected native size may increase before " 50 "blocking any further inlining."), 51 cl::init(2.0)); 52 53 // clang-format off 54 const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{ 55 // InlineCost features - these must come first 56 #define POPULATE_NAMES(INDEX_NAME, NAME) NAME, 57 INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES) 58 #undef POPULATE_NAMES 59 60 // Non-cost features 61 #define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME, 62 INLINE_FEATURE_ITERATOR(POPULATE_NAMES) 63 #undef POPULATE_NAMES 64 }; 65 // clang-format on 66 67 const char *const llvm::DecisionName = "inlining_decision"; 68 const char *const llvm::DefaultDecisionName = "inlining_default"; 69 const char *const llvm::RewardName = "delta_size"; 70 71 CallBase *getInlinableCS(Instruction &I) { 72 if (auto *CS = dyn_cast<CallBase>(&I)) 73 if (Function *Callee = CS->getCalledFunction()) { 74 if (!Callee->isDeclaration()) { 75 return CS; 76 } 77 } 78 return nullptr; 79 } 80 81 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, 82 std::unique_ptr<MLModelRunner> Runner) 83 : InlineAdvisor( 84 M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), 85 ModelRunner(std::move(Runner)), 86 CG(MAM.getResult<LazyCallGraphAnalysis>(M)), 87 InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { 88 assert(ModelRunner); 89 90 // Extract the 'call site height' feature - the position of a call site 91 // relative to the farthest statically reachable SCC node. We don't mutate 92 // this value while inlining happens. Empirically, this feature proved 93 // critical in behavioral cloning - i.e. training a model to mimic the manual 94 // heuristic's decisions - and, thus, equally important for training for 95 // improvement. 96 CallGraph CGraph(M); 97 for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) { 98 const std::vector<CallGraphNode *> &CGNodes = *I; 99 unsigned Level = 0; 100 for (auto *CGNode : CGNodes) { 101 Function *F = CGNode->getFunction(); 102 if (!F || F->isDeclaration()) 103 continue; 104 for (auto &I : instructions(F)) { 105 if (auto *CS = getInlinableCS(I)) { 106 auto *Called = CS->getCalledFunction(); 107 auto Pos = FunctionLevels.find(&CG.get(*Called)); 108 // In bottom up traversal, an inlinable callee is either in the 109 // same SCC, or to a function in a visited SCC. So not finding its 110 // level means we haven't visited it yet, meaning it's in this SCC. 111 if (Pos == FunctionLevels.end()) 112 continue; 113 Level = std::max(Level, Pos->second + 1); 114 } 115 } 116 } 117 for (auto *CGNode : CGNodes) { 118 Function *F = CGNode->getFunction(); 119 if (F && !F->isDeclaration()) 120 FunctionLevels[&CG.get(*F)] = Level; 121 } 122 } 123 for (auto KVP : FunctionLevels) { 124 AllNodes.insert(KVP.first); 125 EdgeCount += getLocalCalls(KVP.first->getFunction()); 126 } 127 NodeCount = AllNodes.size(); 128 } 129 130 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const { 131 return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0; 132 } 133 134 void MLInlineAdvisor::onPassEntry() { 135 // Function passes executed between InlinerPass runs may have changed the 136 // module-wide features. 137 // The cgscc pass manager rules are such that: 138 // - if a pass leads to merging SCCs, then the pipeline is restarted on the 139 // merged SCC 140 // - if a pass leads to splitting the SCC, then we continue with one of the 141 // splits 142 // This means that the NodesInLastSCC is a superset (not strict) of the nodes 143 // that subsequent passes would have processed 144 // - in addition, if new Nodes were created by a pass (e.g. CoroSplit), 145 // they'd be adjacent to Nodes in the last SCC. So we just need to check the 146 // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't 147 // care about the nature of the Edge (call or ref). 148 NodeCount -= static_cast<int64_t>(NodesInLastSCC.size()); 149 while (!NodesInLastSCC.empty()) { 150 const auto *N = NodesInLastSCC.front(); 151 NodesInLastSCC.pop_front(); 152 // The Function wrapped by N could have been deleted since we last saw it. 153 if (N->isDead()) { 154 assert(!N->getFunction().isDeclaration()); 155 continue; 156 } 157 ++NodeCount; 158 EdgeCount += getLocalCalls(N->getFunction()); 159 for (const auto &E : *(*N)) { 160 const auto *AdjNode = &E.getNode(); 161 assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration()); 162 auto I = AllNodes.insert(AdjNode); 163 if (I.second) 164 NodesInLastSCC.push_back(AdjNode); 165 } 166 } 167 168 EdgeCount -= EdgesOfLastSeenNodes; 169 EdgesOfLastSeenNodes = 0; 170 } 171 172 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) { 173 if (!LastSCC) 174 return; 175 // Keep track of the nodes and edges we last saw. Then, in onPassEntry, 176 // we update the node count and edge count from the subset of these nodes that 177 // survived. 178 assert(NodesInLastSCC.empty()); 179 assert(NodeCount >= LastSCC->size()); 180 EdgesOfLastSeenNodes = 0; 181 for (const auto &N : *LastSCC) { 182 assert(!N.isDead()); 183 EdgesOfLastSeenNodes += getLocalCalls(N.getFunction()); 184 NodesInLastSCC.push_back(&N); 185 } 186 assert(EdgeCount >= EdgesOfLastSeenNodes); 187 } 188 189 int64_t MLInlineAdvisor::getLocalCalls(Function &F) { 190 return FAM.getResult<FunctionPropertiesAnalysis>(F) 191 .DirectCallsToDefinedFunctions; 192 } 193 194 // Update the internal state of the advisor, and force invalidate feature 195 // analysis. Currently, we maintain minimal (and very simple) global state - the 196 // number of functions and the number of static calls. We also keep track of the 197 // total IR size in this module, to stop misbehaving policies at a certain bloat 198 // factor (SizeIncreaseThreshold) 199 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, 200 bool CalleeWasDeleted) { 201 assert(!ForceStop); 202 Function *Caller = Advice.getCaller(); 203 Function *Callee = Advice.getCallee(); 204 205 // The caller features aren't valid anymore. 206 { 207 PreservedAnalyses PA = PreservedAnalyses::all(); 208 PA.abandon<FunctionPropertiesAnalysis>(); 209 FAM.invalidate(*Caller, PA); 210 } 211 int64_t IRSizeAfter = 212 getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); 213 CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); 214 if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize) 215 ForceStop = true; 216 217 // We can delta-update module-wide features. We know the inlining only changed 218 // the caller, and maybe the callee (by deleting the latter). 219 // Nodes are simple to update. 220 // For edges, we 'forget' the edges that the caller and callee used to have 221 // before inlining, and add back what they currently have together. 222 int64_t NewCallerAndCalleeEdges = 223 FAM.getResult<FunctionPropertiesAnalysis>(*Caller) 224 .DirectCallsToDefinedFunctions; 225 226 if (CalleeWasDeleted) 227 --NodeCount; 228 else 229 NewCallerAndCalleeEdges += 230 FAM.getResult<FunctionPropertiesAnalysis>(*Callee) 231 .DirectCallsToDefinedFunctions; 232 EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); 233 assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); 234 } 235 236 int64_t MLInlineAdvisor::getModuleIRSize() const { 237 int64_t Ret = 0; 238 for (auto &F : M) 239 if (!F.isDeclaration()) 240 Ret += getIRSize(F); 241 return Ret; 242 } 243 244 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { 245 auto &Caller = *CB.getCaller(); 246 auto &Callee = *CB.getCalledFunction(); 247 248 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { 249 return FAM.getResult<AssumptionAnalysis>(F); 250 }; 251 auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee); 252 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); 253 254 auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); 255 // If this is a "never inline" case, there won't be any changes to internal 256 // state we need to track, so we can just return the base InlineAdvice, which 257 // will do nothing interesting. 258 // Same thing if this is a recursive case. 259 if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never || 260 &Caller == &Callee) 261 return getMandatoryAdvice(CB, false); 262 263 bool Mandatory = 264 MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always; 265 266 // If we need to stop, we won't want to track anymore any state changes, so 267 // we just return the base InlineAdvice, which acts as a noop. 268 if (ForceStop) { 269 ORE.emit([&] { 270 return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB) 271 << "Won't attempt inlining because module size grew too much."; 272 }); 273 return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory); 274 } 275 276 int CostEstimate = 0; 277 if (!Mandatory) { 278 auto IsCallSiteInlinable = 279 llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache); 280 if (!IsCallSiteInlinable) { 281 // We can't inline this for correctness reasons, so return the base 282 // InlineAdvice, as we don't care about tracking any state changes (which 283 // won't happen). 284 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 285 } 286 CostEstimate = *IsCallSiteInlinable; 287 } 288 289 const auto CostFeatures = 290 llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache); 291 if (!CostFeatures) { 292 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 293 } 294 295 if (Mandatory) 296 return getMandatoryAdvice(CB, true); 297 298 auto NrCtantParams = 0; 299 for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { 300 NrCtantParams += (isa<Constant>(*I)); 301 } 302 303 auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller); 304 auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee); 305 306 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) = 307 CalleeBefore.BasicBlockCount; 308 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) = 309 getInitialFunctionLevel(Caller); 310 *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount; 311 *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams; 312 *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount; 313 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) = 314 CallerBefore.Uses; 315 *ModelRunner->getTensor<int64_t>( 316 FeatureIndex::CallerConditionallyExecutedBlocks) = 317 CallerBefore.BlocksReachedFromConditionalInstruction; 318 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) = 319 CallerBefore.BasicBlockCount; 320 *ModelRunner->getTensor<int64_t>( 321 FeatureIndex::CalleeConditionallyExecutedBlocks) = 322 CalleeBefore.BlocksReachedFromConditionalInstruction; 323 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) = 324 CalleeBefore.Uses; 325 *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate; 326 327 // Add the cost features 328 for (size_t I = 0; 329 I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) { 330 *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature( 331 static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I); 332 } 333 334 return getAdviceFromModel(CB, ORE); 335 } 336 337 std::unique_ptr<MLInlineAdvice> 338 MLInlineAdvisor::getAdviceFromModel(CallBase &CB, 339 OptimizationRemarkEmitter &ORE) { 340 return std::make_unique<MLInlineAdvice>( 341 this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>())); 342 } 343 344 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, 345 bool Advice) { 346 // Make sure we track inlinings in all cases - mandatory or not. 347 if (Advice && !ForceStop) 348 return getMandatoryAdviceImpl(CB); 349 350 // If this is a "never inline" case, there won't be any changes to internal 351 // state we need to track, so we can just return the base InlineAdvice, which 352 // will do nothing interesting. 353 // Same if we are forced to stop - we don't track anymore. 354 return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice); 355 } 356 357 std::unique_ptr<MLInlineAdvice> 358 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 359 return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true); 360 } 361 362 void MLInlineAdvice::reportContextForRemark( 363 DiagnosticInfoOptimizationBase &OR) { 364 using namespace ore; 365 OR << NV("Callee", Callee->getName()); 366 for (size_t I = 0; I < NumberOfFeatures; ++I) 367 OR << NV(FeatureNameMap[I], 368 *getAdvisor()->getModelRunner().getTensor<int64_t>(I)); 369 OR << NV("ShouldInline", isInliningRecommended()); 370 } 371 372 void MLInlineAdvice::recordInliningImpl() { 373 ORE.emit([&]() { 374 OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block); 375 reportContextForRemark(R); 376 return R; 377 }); 378 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false); 379 } 380 381 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() { 382 ORE.emit([&]() { 383 OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc, 384 Block); 385 reportContextForRemark(R); 386 return R; 387 }); 388 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true); 389 } 390 391 void MLInlineAdvice::recordUnsuccessfulInliningImpl( 392 const InlineResult &Result) { 393 ORE.emit([&]() { 394 OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful", 395 DLoc, Block); 396 reportContextForRemark(R); 397 return R; 398 }); 399 } 400 void MLInlineAdvice::recordUnattemptedInliningImpl() { 401 ORE.emit([&]() { 402 OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block); 403 reportContextForRemark(R); 404 return R; 405 }); 406 } 407