1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the interface between the inliner and a learned model. 10 // It delegates model evaluation to either the AOT compiled model (the 11 // 'release' mode) or a runtime-loaded model (the 'development' case). 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Analysis/MLInlineAdvisor.h" 15 #include "llvm/ADT/SCCIterator.h" 16 #include "llvm/Analysis/AssumptionCache.h" 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/Analysis/FunctionPropertiesAnalysis.h" 19 #include "llvm/Analysis/InlineCost.h" 20 #include "llvm/Analysis/InlineModelFeatureMaps.h" 21 #include "llvm/Analysis/LazyCallGraph.h" 22 #include "llvm/Analysis/MLModelRunner.h" 23 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 24 #include "llvm/Analysis/TargetTransformInfo.h" 25 #include "llvm/IR/InstIterator.h" 26 #include "llvm/IR/PassManager.h" 27 #include "llvm/Support/CommandLine.h" 28 29 using namespace llvm; 30 31 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL) 32 // codegen-ed file 33 #include "InlinerSizeModel.h" // NOLINT 34 35 std::unique_ptr<InlineAdvisor> 36 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { 37 auto AOTRunner = 38 std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>( 39 M.getContext(), FeatureNameMap, DecisionName); 40 return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner)); 41 } 42 #endif 43 44 #define DEBUG_TYPE "inline-ml" 45 46 static cl::opt<float> SizeIncreaseThreshold( 47 "ml-advisor-size-increase-threshold", cl::Hidden, 48 cl::desc("Maximum factor by which expected native size may increase before " 49 "blocking any further inlining."), 50 cl::init(2.0)); 51 52 // clang-format off 53 const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{ 54 // InlineCost features - these must come first 55 #define POPULATE_NAMES(INDEX_NAME, NAME) NAME, 56 INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES) 57 #undef POPULATE_NAMES 58 59 // Non-cost features 60 #define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME, 61 INLINE_FEATURE_ITERATOR(POPULATE_NAMES) 62 #undef POPULATE_NAMES 63 }; 64 // clang-format on 65 66 const char *const llvm::DecisionName = "inlining_decision"; 67 const char *const llvm::DefaultDecisionName = "inlining_default"; 68 const char *const llvm::RewardName = "delta_size"; 69 70 CallBase *getInlinableCS(Instruction &I) { 71 if (auto *CS = dyn_cast<CallBase>(&I)) 72 if (Function *Callee = CS->getCalledFunction()) { 73 if (!Callee->isDeclaration()) { 74 return CS; 75 } 76 } 77 return nullptr; 78 } 79 80 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, 81 std::unique_ptr<MLModelRunner> Runner) 82 : InlineAdvisor( 83 M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), 84 ModelRunner(std::move(Runner)), 85 CG(MAM.getResult<LazyCallGraphAnalysis>(M)), 86 InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { 87 assert(ModelRunner); 88 89 // Extract the 'call site height' feature - the position of a call site 90 // relative to the farthest statically reachable SCC node. We don't mutate 91 // this value while inlining happens. Empirically, this feature proved 92 // critical in behavioral cloning - i.e. training a model to mimic the manual 93 // heuristic's decisions - and, thus, equally important for training for 94 // improvement. 95 CallGraph CGraph(M); 96 for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) { 97 const std::vector<CallGraphNode *> &CGNodes = *I; 98 unsigned Level = 0; 99 for (auto *CGNode : CGNodes) { 100 Function *F = CGNode->getFunction(); 101 if (!F || F->isDeclaration()) 102 continue; 103 for (auto &I : instructions(F)) { 104 if (auto *CS = getInlinableCS(I)) { 105 auto *Called = CS->getCalledFunction(); 106 auto Pos = FunctionLevels.find(&CG.get(*Called)); 107 // In bottom up traversal, an inlinable callee is either in the 108 // same SCC, or to a function in a visited SCC. So not finding its 109 // level means we haven't visited it yet, meaning it's in this SCC. 110 if (Pos == FunctionLevels.end()) 111 continue; 112 Level = std::max(Level, Pos->second + 1); 113 } 114 } 115 } 116 for (auto *CGNode : CGNodes) { 117 Function *F = CGNode->getFunction(); 118 if (F && !F->isDeclaration()) 119 FunctionLevels[&CG.get(*F)] = Level; 120 } 121 } 122 for (auto KVP : FunctionLevels) { 123 AllNodes.insert(KVP.first); 124 EdgeCount += getLocalCalls(KVP.first->getFunction()); 125 } 126 NodeCount = AllNodes.size(); 127 } 128 129 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const { 130 return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0; 131 } 132 133 void MLInlineAdvisor::onPassEntry() { 134 // Function passes executed between InlinerPass runs may have changed the 135 // module-wide features. 136 // The cgscc pass manager rules are such that: 137 // - if a pass leads to merging SCCs, then the pipeline is restarted on the 138 // merged SCC 139 // - if a pass leads to splitting the SCC, then we continue with one of the 140 // splits 141 // This means that the NodesInLastSCC is a superset (not strict) of the nodes 142 // that subsequent passes would have processed 143 // - in addition, if new Nodes were created by a pass (e.g. CoroSplit), 144 // they'd be adjacent to Nodes in the last SCC. So we just need to check the 145 // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't 146 // care about the nature of the Edge (call or ref). 147 NodeCount -= static_cast<int64_t>(NodesInLastSCC.size()); 148 while (!NodesInLastSCC.empty()) { 149 const auto *N = NodesInLastSCC.front(); 150 NodesInLastSCC.pop_front(); 151 // The Function wrapped by N could have been deleted since we last saw it. 152 if (N->isDead()) { 153 assert(!N->getFunction().isDeclaration()); 154 continue; 155 } 156 ++NodeCount; 157 EdgeCount += getLocalCalls(N->getFunction()); 158 for (const auto &E : *(*N)) { 159 const auto *AdjNode = &E.getNode(); 160 assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration()); 161 auto I = AllNodes.insert(AdjNode); 162 if (I.second) 163 NodesInLastSCC.push_back(AdjNode); 164 } 165 } 166 167 EdgeCount -= EdgesOfLastSeenNodes; 168 EdgesOfLastSeenNodes = 0; 169 } 170 171 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) { 172 if (!LastSCC) 173 return; 174 // Keep track of the nodes and edges we last saw. Then, in onPassEntry, 175 // we update the node count and edge count from the subset of these nodes that 176 // survived. 177 assert(NodesInLastSCC.empty()); 178 assert(NodeCount >= LastSCC->size()); 179 EdgesOfLastSeenNodes = 0; 180 for (const auto &N : *LastSCC) { 181 assert(!N.isDead()); 182 EdgesOfLastSeenNodes += getLocalCalls(N.getFunction()); 183 NodesInLastSCC.push_back(&N); 184 } 185 assert(EdgeCount >= EdgesOfLastSeenNodes); 186 } 187 188 int64_t MLInlineAdvisor::getLocalCalls(Function &F) { 189 return FAM.getResult<FunctionPropertiesAnalysis>(F) 190 .DirectCallsToDefinedFunctions; 191 } 192 193 // Update the internal state of the advisor, and force invalidate feature 194 // analysis. Currently, we maintain minimal (and very simple) global state - the 195 // number of functions and the number of static calls. We also keep track of the 196 // total IR size in this module, to stop misbehaving policies at a certain bloat 197 // factor (SizeIncreaseThreshold) 198 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, 199 bool CalleeWasDeleted) { 200 assert(!ForceStop); 201 Function *Caller = Advice.getCaller(); 202 Function *Callee = Advice.getCallee(); 203 204 // The caller features aren't valid anymore. 205 { 206 PreservedAnalyses PA = PreservedAnalyses::all(); 207 PA.abandon<FunctionPropertiesAnalysis>(); 208 FAM.invalidate(*Caller, PA); 209 } 210 int64_t IRSizeAfter = 211 getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); 212 CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); 213 if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize) 214 ForceStop = true; 215 216 // We can delta-update module-wide features. We know the inlining only changed 217 // the caller, and maybe the callee (by deleting the latter). 218 // Nodes are simple to update. 219 // For edges, we 'forget' the edges that the caller and callee used to have 220 // before inlining, and add back what they currently have together. 221 int64_t NewCallerAndCalleeEdges = 222 FAM.getResult<FunctionPropertiesAnalysis>(*Caller) 223 .DirectCallsToDefinedFunctions; 224 225 if (CalleeWasDeleted) 226 --NodeCount; 227 else 228 NewCallerAndCalleeEdges += 229 FAM.getResult<FunctionPropertiesAnalysis>(*Callee) 230 .DirectCallsToDefinedFunctions; 231 EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); 232 assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); 233 } 234 235 int64_t MLInlineAdvisor::getModuleIRSize() const { 236 int64_t Ret = 0; 237 for (auto &F : M) 238 if (!F.isDeclaration()) 239 Ret += getIRSize(F); 240 return Ret; 241 } 242 243 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { 244 auto &Caller = *CB.getCaller(); 245 auto &Callee = *CB.getCalledFunction(); 246 247 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { 248 return FAM.getResult<AssumptionAnalysis>(F); 249 }; 250 auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee); 251 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); 252 253 auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); 254 // If this is a "never inline" case, there won't be any changes to internal 255 // state we need to track, so we can just return the base InlineAdvice, which 256 // will do nothing interesting. 257 // Same thing if this is a recursive case. 258 if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never || 259 &Caller == &Callee) 260 return getMandatoryAdvice(CB, false); 261 262 bool Mandatory = 263 MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always; 264 265 // If we need to stop, we won't want to track anymore any state changes, so 266 // we just return the base InlineAdvice, which acts as a noop. 267 if (ForceStop) { 268 ORE.emit([&] { 269 return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB) 270 << "Won't attempt inlining because module size grew too much."; 271 }); 272 return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory); 273 } 274 275 int CostEstimate = 0; 276 if (!Mandatory) { 277 auto IsCallSiteInlinable = 278 llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache); 279 if (!IsCallSiteInlinable) { 280 // We can't inline this for correctness reasons, so return the base 281 // InlineAdvice, as we don't care about tracking any state changes (which 282 // won't happen). 283 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 284 } 285 CostEstimate = *IsCallSiteInlinable; 286 } 287 288 const auto CostFeatures = 289 llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache); 290 if (!CostFeatures) { 291 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 292 } 293 294 if (Mandatory) 295 return getMandatoryAdvice(CB, true); 296 297 auto NrCtantParams = 0; 298 for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { 299 NrCtantParams += (isa<Constant>(*I)); 300 } 301 302 auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller); 303 auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee); 304 305 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) = 306 CalleeBefore.BasicBlockCount; 307 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) = 308 getInitialFunctionLevel(Caller); 309 *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount; 310 *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams; 311 *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount; 312 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) = 313 CallerBefore.Uses; 314 *ModelRunner->getTensor<int64_t>( 315 FeatureIndex::CallerConditionallyExecutedBlocks) = 316 CallerBefore.BlocksReachedFromConditionalInstruction; 317 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) = 318 CallerBefore.BasicBlockCount; 319 *ModelRunner->getTensor<int64_t>( 320 FeatureIndex::CalleeConditionallyExecutedBlocks) = 321 CalleeBefore.BlocksReachedFromConditionalInstruction; 322 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) = 323 CalleeBefore.Uses; 324 *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate; 325 326 // Add the cost features 327 for (size_t I = 0; 328 I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) { 329 *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature( 330 static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I); 331 } 332 333 return getAdviceFromModel(CB, ORE); 334 } 335 336 std::unique_ptr<MLInlineAdvice> 337 MLInlineAdvisor::getAdviceFromModel(CallBase &CB, 338 OptimizationRemarkEmitter &ORE) { 339 return std::make_unique<MLInlineAdvice>( 340 this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>())); 341 } 342 343 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, 344 bool Advice) { 345 // Make sure we track inlinings in all cases - mandatory or not. 346 if (Advice && !ForceStop) 347 return getMandatoryAdviceImpl(CB); 348 349 // If this is a "never inline" case, there won't be any changes to internal 350 // state we need to track, so we can just return the base InlineAdvice, which 351 // will do nothing interesting. 352 // Same if we are forced to stop - we don't track anymore. 353 return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice); 354 } 355 356 std::unique_ptr<MLInlineAdvice> 357 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 358 return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true); 359 } 360 361 void MLInlineAdvice::reportContextForRemark( 362 DiagnosticInfoOptimizationBase &OR) { 363 using namespace ore; 364 OR << NV("Callee", Callee->getName()); 365 for (size_t I = 0; I < NumberOfFeatures; ++I) 366 OR << NV(FeatureNameMap[I], 367 *getAdvisor()->getModelRunner().getTensor<int64_t>(I)); 368 OR << NV("ShouldInline", isInliningRecommended()); 369 } 370 371 void MLInlineAdvice::recordInliningImpl() { 372 ORE.emit([&]() { 373 OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block); 374 reportContextForRemark(R); 375 return R; 376 }); 377 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false); 378 } 379 380 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() { 381 ORE.emit([&]() { 382 OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc, 383 Block); 384 reportContextForRemark(R); 385 return R; 386 }); 387 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true); 388 } 389 390 void MLInlineAdvice::recordUnsuccessfulInliningImpl( 391 const InlineResult &Result) { 392 ORE.emit([&]() { 393 OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful", 394 DLoc, Block); 395 reportContextForRemark(R); 396 return R; 397 }); 398 } 399 void MLInlineAdvice::recordUnattemptedInliningImpl() { 400 ORE.emit([&]() { 401 OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block); 402 reportContextForRemark(R); 403 return R; 404 }); 405 } 406