1 //===- Inliner.cpp - Code common to all inliners --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the mechanics required to implement inlining without 10 // missing any calls and updating the call graph. The decisions of which calls 11 // are profitable to inline are implemented elsewhere. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/Inliner.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/None.h" 18 #include "llvm/ADT/Optional.h" 19 #include "llvm/ADT/PriorityWorklist.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/ScopeExit.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Analysis/AssumptionCache.h" 28 #include "llvm/Analysis/BasicAliasAnalysis.h" 29 #include "llvm/Analysis/BlockFrequencyInfo.h" 30 #include "llvm/Analysis/CGSCCPassManager.h" 31 #include "llvm/Analysis/CallGraph.h" 32 #include "llvm/Analysis/GlobalsModRef.h" 33 #include "llvm/Analysis/InlineAdvisor.h" 34 #include "llvm/Analysis/InlineCost.h" 35 #include "llvm/Analysis/InlineOrder.h" 36 #include "llvm/Analysis/LazyCallGraph.h" 37 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 38 #include "llvm/Analysis/ProfileSummaryInfo.h" 39 #include "llvm/Analysis/ReplayInlineAdvisor.h" 40 #include "llvm/Analysis/TargetLibraryInfo.h" 41 #include "llvm/Analysis/TargetTransformInfo.h" 42 #include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" 43 #include "llvm/IR/Attributes.h" 44 #include "llvm/IR/BasicBlock.h" 45 #include "llvm/IR/DataLayout.h" 46 #include "llvm/IR/DebugLoc.h" 47 #include "llvm/IR/DerivedTypes.h" 48 #include "llvm/IR/DiagnosticInfo.h" 49 #include "llvm/IR/Function.h" 50 #include "llvm/IR/InstIterator.h" 51 #include "llvm/IR/Instruction.h" 52 #include "llvm/IR/Instructions.h" 53 #include "llvm/IR/IntrinsicInst.h" 54 #include "llvm/IR/Metadata.h" 55 #include "llvm/IR/Module.h" 56 #include "llvm/IR/PassManager.h" 57 #include "llvm/IR/User.h" 58 #include "llvm/IR/Value.h" 59 #include "llvm/Pass.h" 60 #include "llvm/Support/Casting.h" 61 #include "llvm/Support/CommandLine.h" 62 #include "llvm/Support/Debug.h" 63 #include "llvm/Support/raw_ostream.h" 64 #include "llvm/Transforms/Utils/CallPromotionUtils.h" 65 #include "llvm/Transforms/Utils/Cloning.h" 66 #include "llvm/Transforms/Utils/Local.h" 67 #include "llvm/Transforms/Utils/ModuleUtils.h" 68 #include <algorithm> 69 #include <cassert> 70 #include <functional> 71 #include <sstream> 72 #include <tuple> 73 #include <utility> 74 #include <vector> 75 76 using namespace llvm; 77 78 #define DEBUG_TYPE "inline" 79 80 STATISTIC(NumInlined, "Number of functions inlined"); 81 STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); 82 STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); 83 STATISTIC(NumMergedAllocas, "Number of allocas merged together"); 84 85 /// Flag to disable manual alloca merging. 86 /// 87 /// Merging of allocas was originally done as a stack-size saving technique 88 /// prior to LLVM's code generator having support for stack coloring based on 89 /// lifetime markers. It is now in the process of being removed. To experiment 90 /// with disabling it and relying fully on lifetime marker based stack 91 /// coloring, you can pass this flag to LLVM. 92 static cl::opt<bool> 93 DisableInlinedAllocaMerging("disable-inlined-alloca-merging", 94 cl::init(false), cl::Hidden); 95 96 /// A flag for test, so we can print the content of the advisor when running it 97 /// as part of the default (e.g. -O3) pipeline. 98 static cl::opt<bool> KeepAdvisorForPrinting("keep-inline-advisor-for-printing", 99 cl::init(false), cl::Hidden); 100 101 extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats; 102 103 static cl::opt<std::string> CGSCCInlineReplayFile( 104 "cgscc-inline-replay", cl::init(""), cl::value_desc("filename"), 105 cl::desc( 106 "Optimization remarks file containing inline remarks to be replayed " 107 "by cgscc inlining."), 108 cl::Hidden); 109 110 static cl::opt<ReplayInlinerSettings::Scope> CGSCCInlineReplayScope( 111 "cgscc-inline-replay-scope", 112 cl::init(ReplayInlinerSettings::Scope::Function), 113 cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", 114 "Replay on functions that have remarks associated " 115 "with them (default)"), 116 clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", 117 "Replay on the entire module")), 118 cl::desc("Whether inline replay should be applied to the entire " 119 "Module or just the Functions (default) that are present as " 120 "callers in remarks during cgscc inlining."), 121 cl::Hidden); 122 123 static cl::opt<ReplayInlinerSettings::Fallback> CGSCCInlineReplayFallback( 124 "cgscc-inline-replay-fallback", 125 cl::init(ReplayInlinerSettings::Fallback::Original), 126 cl::values( 127 clEnumValN( 128 ReplayInlinerSettings::Fallback::Original, "Original", 129 "All decisions not in replay send to original advisor (default)"), 130 clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, 131 "AlwaysInline", "All decisions not in replay are inlined"), 132 clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", 133 "All decisions not in replay are not inlined")), 134 cl::desc( 135 "How cgscc inline replay treats sites that don't come from the replay. " 136 "Original: defers to original advisor, AlwaysInline: inline all sites " 137 "not in replay, NeverInline: inline no sites not in replay"), 138 cl::Hidden); 139 140 static cl::opt<CallSiteFormat::Format> CGSCCInlineReplayFormat( 141 "cgscc-inline-replay-format", 142 cl::init(CallSiteFormat::Format::LineColumnDiscriminator), 143 cl::values( 144 clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), 145 clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", 146 "<Line Number>:<Column Number>"), 147 clEnumValN(CallSiteFormat::Format::LineDiscriminator, 148 "LineDiscriminator", "<Line Number>.<Discriminator>"), 149 clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, 150 "LineColumnDiscriminator", 151 "<Line Number>:<Column Number>.<Discriminator> (default)")), 152 cl::desc("How cgscc inline replay file is formatted"), cl::Hidden); 153 154 static cl::opt<bool> InlineEnablePriorityOrder( 155 "inline-enable-priority-order", cl::Hidden, cl::init(false), 156 cl::desc("Enable the priority inline order for the inliner")); 157 158 LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} 159 160 LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) 161 : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} 162 163 /// For this class, we declare that we require and preserve the call graph. 164 /// If the derived class implements this method, it should 165 /// always explicitly call the implementation here. 166 void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const { 167 AU.addRequired<AssumptionCacheTracker>(); 168 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 169 AU.addRequired<TargetLibraryInfoWrapperPass>(); 170 getAAResultsAnalysisUsage(AU); 171 CallGraphSCCPass::getAnalysisUsage(AU); 172 } 173 174 using InlinedArrayAllocasTy = DenseMap<ArrayType *, std::vector<AllocaInst *>>; 175 176 /// Look at all of the allocas that we inlined through this call site. If we 177 /// have already inlined other allocas through other calls into this function, 178 /// then we know that they have disjoint lifetimes and that we can merge them. 179 /// 180 /// There are many heuristics possible for merging these allocas, and the 181 /// different options have different tradeoffs. One thing that we *really* 182 /// don't want to hurt is SRoA: once inlining happens, often allocas are no 183 /// longer address taken and so they can be promoted. 184 /// 185 /// Our "solution" for that is to only merge allocas whose outermost type is an 186 /// array type. These are usually not promoted because someone is using a 187 /// variable index into them. These are also often the most important ones to 188 /// merge. 189 /// 190 /// A better solution would be to have real memory lifetime markers in the IR 191 /// and not have the inliner do any merging of allocas at all. This would 192 /// allow the backend to do proper stack slot coloring of all allocas that 193 /// *actually make it to the backend*, which is really what we want. 194 /// 195 /// Because we don't have this information, we do this simple and useful hack. 196 static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI, 197 InlinedArrayAllocasTy &InlinedArrayAllocas, 198 int InlineHistory) { 199 SmallPtrSet<AllocaInst *, 16> UsedAllocas; 200 201 // When processing our SCC, check to see if the call site was inlined from 202 // some other call site. For example, if we're processing "A" in this code: 203 // A() { B() } 204 // B() { x = alloca ... C() } 205 // C() { y = alloca ... } 206 // Assume that C was not inlined into B initially, and so we're processing A 207 // and decide to inline B into A. Doing this makes an alloca available for 208 // reuse and makes a callsite (C) available for inlining. When we process 209 // the C call site we don't want to do any alloca merging between X and Y 210 // because their scopes are not disjoint. We could make this smarter by 211 // keeping track of the inline history for each alloca in the 212 // InlinedArrayAllocas but this isn't likely to be a significant win. 213 if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. 214 return; 215 216 // Loop over all the allocas we have so far and see if they can be merged with 217 // a previously inlined alloca. If not, remember that we had it. 218 for (unsigned AllocaNo = 0, E = IFI.StaticAllocas.size(); AllocaNo != E; 219 ++AllocaNo) { 220 AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; 221 222 // Don't bother trying to merge array allocations (they will usually be 223 // canonicalized to be an allocation *of* an array), or allocations whose 224 // type is not itself an array (because we're afraid of pessimizing SRoA). 225 ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); 226 if (!ATy || AI->isArrayAllocation()) 227 continue; 228 229 // Get the list of all available allocas for this array type. 230 std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy]; 231 232 // Loop over the allocas in AllocasForType to see if we can reuse one. Note 233 // that we have to be careful not to reuse the same "available" alloca for 234 // multiple different allocas that we just inlined, we use the 'UsedAllocas' 235 // set to keep track of which "available" allocas are being used by this 236 // function. Also, AllocasForType can be empty of course! 237 bool MergedAwayAlloca = false; 238 for (AllocaInst *AvailableAlloca : AllocasForType) { 239 Align Align1 = AI->getAlign(); 240 Align Align2 = AvailableAlloca->getAlign(); 241 242 // The available alloca has to be in the right function, not in some other 243 // function in this SCC. 244 if (AvailableAlloca->getParent() != AI->getParent()) 245 continue; 246 247 // If the inlined function already uses this alloca then we can't reuse 248 // it. 249 if (!UsedAllocas.insert(AvailableAlloca).second) 250 continue; 251 252 // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare 253 // success! 254 LLVM_DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI 255 << "\n\t\tINTO: " << *AvailableAlloca << '\n'); 256 257 // Move affected dbg.declare calls immediately after the new alloca to 258 // avoid the situation when a dbg.declare precedes its alloca. 259 if (auto *L = LocalAsMetadata::getIfExists(AI)) 260 if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) 261 for (User *U : MDV->users()) 262 if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) 263 DDI->moveBefore(AvailableAlloca->getNextNode()); 264 265 AI->replaceAllUsesWith(AvailableAlloca); 266 267 if (Align1 > Align2) 268 AvailableAlloca->setAlignment(AI->getAlign()); 269 270 AI->eraseFromParent(); 271 MergedAwayAlloca = true; 272 ++NumMergedAllocas; 273 IFI.StaticAllocas[AllocaNo] = nullptr; 274 break; 275 } 276 277 // If we already nuked the alloca, we're done with it. 278 if (MergedAwayAlloca) 279 continue; 280 281 // If we were unable to merge away the alloca either because there are no 282 // allocas of the right type available or because we reused them all 283 // already, remember that this alloca came from an inlined function and mark 284 // it used so we don't reuse it for other allocas from this inline 285 // operation. 286 AllocasForType.push_back(AI); 287 UsedAllocas.insert(AI); 288 } 289 } 290 291 /// If it is possible to inline the specified call site, 292 /// do so and update the CallGraph for this operation. 293 /// 294 /// This function also does some basic book-keeping to update the IR. The 295 /// InlinedArrayAllocas map keeps track of any allocas that are already 296 /// available from other functions inlined into the caller. If we are able to 297 /// inline this call site we attempt to reuse already available allocas or add 298 /// any new allocas to the set if not possible. 299 static InlineResult inlineCallIfPossible( 300 CallBase &CB, InlineFunctionInfo &IFI, 301 InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, 302 bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter, 303 ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { 304 Function *Callee = CB.getCalledFunction(); 305 Function *Caller = CB.getCaller(); 306 307 AAResults &AAR = AARGetter(*Callee); 308 309 // Try to inline the function. Get the list of static allocas that were 310 // inlined. 311 InlineResult IR = InlineFunction(CB, IFI, &AAR, InsertLifetime); 312 if (!IR.isSuccess()) 313 return IR; 314 315 if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) 316 ImportedFunctionsStats.recordInline(*Caller, *Callee); 317 318 AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); 319 320 if (!DisableInlinedAllocaMerging) 321 mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory); 322 323 return IR; // success 324 } 325 326 /// Return true if the specified inline history ID 327 /// indicates an inline history that includes the specified function. 328 static bool inlineHistoryIncludes( 329 Function *F, int InlineHistoryID, 330 const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) { 331 while (InlineHistoryID != -1) { 332 assert(unsigned(InlineHistoryID) < InlineHistory.size() && 333 "Invalid inline history ID"); 334 if (InlineHistory[InlineHistoryID].first == F) 335 return true; 336 InlineHistoryID = InlineHistory[InlineHistoryID].second; 337 } 338 return false; 339 } 340 341 bool LegacyInlinerBase::doInitialization(CallGraph &CG) { 342 if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) 343 ImportedFunctionsStats.setModuleInfo(CG.getModule()); 344 return false; // No changes to CallGraph. 345 } 346 347 bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) { 348 if (skipSCC(SCC)) 349 return false; 350 return inlineCalls(SCC); 351 } 352 353 static bool 354 inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, 355 std::function<AssumptionCache &(Function &)> GetAssumptionCache, 356 ProfileSummaryInfo *PSI, 357 std::function<const TargetLibraryInfo &(Function &)> GetTLI, 358 bool InsertLifetime, 359 function_ref<InlineCost(CallBase &CB)> GetInlineCost, 360 function_ref<AAResults &(Function &)> AARGetter, 361 ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { 362 SmallPtrSet<Function *, 8> SCCFunctions; 363 LLVM_DEBUG(dbgs() << "Inliner visiting SCC:"); 364 for (CallGraphNode *Node : SCC) { 365 Function *F = Node->getFunction(); 366 if (F) 367 SCCFunctions.insert(F); 368 LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); 369 } 370 371 // Scan through and identify all call sites ahead of time so that we only 372 // inline call sites in the original functions, not call sites that result 373 // from inlining other functions. 374 SmallVector<std::pair<CallBase *, int>, 16> CallSites; 375 376 // When inlining a callee produces new call sites, we want to keep track of 377 // the fact that they were inlined from the callee. This allows us to avoid 378 // infinite inlining in some obscure cases. To represent this, we use an 379 // index into the InlineHistory vector. 380 SmallVector<std::pair<Function *, int>, 8> InlineHistory; 381 382 for (CallGraphNode *Node : SCC) { 383 Function *F = Node->getFunction(); 384 if (!F || F->isDeclaration()) 385 continue; 386 387 OptimizationRemarkEmitter ORE(F); 388 for (BasicBlock &BB : *F) 389 for (Instruction &I : BB) { 390 auto *CB = dyn_cast<CallBase>(&I); 391 // If this isn't a call, or it is a call to an intrinsic, it can 392 // never be inlined. 393 if (!CB || isa<IntrinsicInst>(I)) 394 continue; 395 396 // If this is a direct call to an external function, we can never inline 397 // it. If it is an indirect call, inlining may resolve it to be a 398 // direct call, so we keep it. 399 if (Function *Callee = CB->getCalledFunction()) 400 if (Callee->isDeclaration()) { 401 using namespace ore; 402 403 setInlineRemark(*CB, "unavailable definition"); 404 ORE.emit([&]() { 405 return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) 406 << NV("Callee", Callee) << " will not be inlined into " 407 << NV("Caller", CB->getCaller()) 408 << " because its definition is unavailable" 409 << setIsVerbose(); 410 }); 411 continue; 412 } 413 414 CallSites.push_back(std::make_pair(CB, -1)); 415 } 416 } 417 418 LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); 419 420 // If there are no calls in this function, exit early. 421 if (CallSites.empty()) 422 return false; 423 424 // Now that we have all of the call sites, move the ones to functions in the 425 // current SCC to the end of the list. 426 unsigned FirstCallInSCC = CallSites.size(); 427 for (unsigned I = 0; I < FirstCallInSCC; ++I) 428 if (Function *F = CallSites[I].first->getCalledFunction()) 429 if (SCCFunctions.count(F)) 430 std::swap(CallSites[I--], CallSites[--FirstCallInSCC]); 431 432 InlinedArrayAllocasTy InlinedArrayAllocas; 433 InlineFunctionInfo InlineInfo(&CG, GetAssumptionCache, PSI); 434 435 // Now that we have all of the call sites, loop over them and inline them if 436 // it looks profitable to do so. 437 bool Changed = false; 438 bool LocalChange; 439 do { 440 LocalChange = false; 441 // Iterate over the outer loop because inlining functions can cause indirect 442 // calls to become direct calls. 443 // CallSites may be modified inside so ranged for loop can not be used. 444 for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { 445 auto &P = CallSites[CSi]; 446 CallBase &CB = *P.first; 447 const int InlineHistoryID = P.second; 448 449 Function *Caller = CB.getCaller(); 450 Function *Callee = CB.getCalledFunction(); 451 452 // We can only inline direct calls to non-declarations. 453 if (!Callee || Callee->isDeclaration()) 454 continue; 455 456 bool IsTriviallyDead = isInstructionTriviallyDead(&CB, &GetTLI(*Caller)); 457 458 if (!IsTriviallyDead) { 459 // If this call site was obtained by inlining another function, verify 460 // that the include path for the function did not include the callee 461 // itself. If so, we'd be recursively inlining the same function, 462 // which would provide the same callsites, which would cause us to 463 // infinitely inline. 464 if (InlineHistoryID != -1 && 465 inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) { 466 setInlineRemark(CB, "recursive"); 467 continue; 468 } 469 } 470 471 // FIXME for new PM: because of the old PM we currently generate ORE and 472 // in turn BFI on demand. With the new PM, the ORE dependency should 473 // just become a regular analysis dependency. 474 OptimizationRemarkEmitter ORE(Caller); 475 476 auto OIC = shouldInline(CB, GetInlineCost, ORE); 477 // If the policy determines that we should inline this function, 478 // delete the call instead. 479 if (!OIC) 480 continue; 481 482 // If this call site is dead and it is to a readonly function, we should 483 // just delete the call instead of trying to inline it, regardless of 484 // size. This happens because IPSCCP propagates the result out of the 485 // call and then we're left with the dead call. 486 if (IsTriviallyDead) { 487 LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << CB << "\n"); 488 // Update the call graph by deleting the edge from Callee to Caller. 489 setInlineRemark(CB, "trivially dead"); 490 CG[Caller]->removeCallEdgeFor(CB); 491 CB.eraseFromParent(); 492 ++NumCallsDeleted; 493 } else { 494 // Get DebugLoc to report. CB will be invalid after Inliner. 495 DebugLoc DLoc = CB.getDebugLoc(); 496 BasicBlock *Block = CB.getParent(); 497 498 // Attempt to inline the function. 499 using namespace ore; 500 501 InlineResult IR = inlineCallIfPossible( 502 CB, InlineInfo, InlinedArrayAllocas, InlineHistoryID, 503 InsertLifetime, AARGetter, ImportedFunctionsStats); 504 if (!IR.isSuccess()) { 505 setInlineRemark(CB, std::string(IR.getFailureReason()) + "; " + 506 inlineCostStr(*OIC)); 507 ORE.emit([&]() { 508 return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, 509 Block) 510 << NV("Callee", Callee) << " will not be inlined into " 511 << NV("Caller", Caller) << ": " 512 << NV("Reason", IR.getFailureReason()); 513 }); 514 continue; 515 } 516 ++NumInlined; 517 518 emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC); 519 520 // If inlining this function gave us any new call sites, throw them 521 // onto our worklist to process. They are useful inline candidates. 522 if (!InlineInfo.InlinedCalls.empty()) { 523 // Create a new inline history entry for this, so that we remember 524 // that these new callsites came about due to inlining Callee. 525 int NewHistoryID = InlineHistory.size(); 526 InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); 527 528 #ifndef NDEBUG 529 // Make sure no dupplicates in the inline candidates. This could 530 // happen when a callsite is simpilfied to reusing the return value 531 // of another callsite during function cloning, thus the other 532 // callsite will be reconsidered here. 533 DenseSet<CallBase *> DbgCallSites; 534 for (auto &II : CallSites) 535 DbgCallSites.insert(II.first); 536 #endif 537 538 for (Value *Ptr : InlineInfo.InlinedCalls) { 539 #ifndef NDEBUG 540 assert(DbgCallSites.count(dyn_cast<CallBase>(Ptr)) == 0); 541 #endif 542 CallSites.push_back( 543 std::make_pair(dyn_cast<CallBase>(Ptr), NewHistoryID)); 544 } 545 } 546 } 547 548 // If we inlined or deleted the last possible call site to the function, 549 // delete the function body now. 550 if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && 551 // TODO: Can remove if in SCC now. 552 !SCCFunctions.count(Callee) && 553 // The function may be apparently dead, but if there are indirect 554 // callgraph references to the node, we cannot delete it yet, this 555 // could invalidate the CGSCC iterator. 556 CG[Callee]->getNumReferences() == 0) { 557 LLVM_DEBUG(dbgs() << " -> Deleting dead function: " 558 << Callee->getName() << "\n"); 559 CallGraphNode *CalleeNode = CG[Callee]; 560 561 // Remove any call graph edges from the callee to its callees. 562 CalleeNode->removeAllCalledFunctions(); 563 564 // Removing the node for callee from the call graph and delete it. 565 delete CG.removeFunctionFromModule(CalleeNode); 566 ++NumDeleted; 567 } 568 569 // Remove this call site from the list. If possible, use 570 // swap/pop_back for efficiency, but do not use it if doing so would 571 // move a call site to a function in this SCC before the 572 // 'FirstCallInSCC' barrier. 573 if (SCC.isSingular()) { 574 CallSites[CSi] = CallSites.back(); 575 CallSites.pop_back(); 576 } else { 577 CallSites.erase(CallSites.begin() + CSi); 578 } 579 --CSi; 580 581 Changed = true; 582 LocalChange = true; 583 } 584 } while (LocalChange); 585 586 return Changed; 587 } 588 589 bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { 590 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 591 ACT = &getAnalysis<AssumptionCacheTracker>(); 592 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 593 GetTLI = [&](Function &F) -> const TargetLibraryInfo & { 594 return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 595 }; 596 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { 597 return ACT->getAssumptionCache(F); 598 }; 599 return inlineCallsImpl( 600 SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime, 601 [&](CallBase &CB) { return getInlineCost(CB); }, LegacyAARGetter(*this), 602 ImportedFunctionsStats); 603 } 604 605 /// Remove now-dead linkonce functions at the end of 606 /// processing to avoid breaking the SCC traversal. 607 bool LegacyInlinerBase::doFinalization(CallGraph &CG) { 608 if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) 609 ImportedFunctionsStats.dump(InlinerFunctionImportStats == 610 InlinerFunctionImportStatsOpts::Verbose); 611 return removeDeadFunctions(CG); 612 } 613 614 /// Remove dead functions that are not included in DNR (Do Not Remove) list. 615 bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG, 616 bool AlwaysInlineOnly) { 617 SmallVector<CallGraphNode *, 16> FunctionsToRemove; 618 SmallVector<Function *, 16> DeadFunctionsInComdats; 619 620 auto RemoveCGN = [&](CallGraphNode *CGN) { 621 // Remove any call graph edges from the function to its callees. 622 CGN->removeAllCalledFunctions(); 623 624 // Remove any edges from the external node to the function's call graph 625 // node. These edges might have been made irrelegant due to 626 // optimization of the program. 627 CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); 628 629 // Removing the node for callee from the call graph and delete it. 630 FunctionsToRemove.push_back(CGN); 631 }; 632 633 // Scan for all of the functions, looking for ones that should now be removed 634 // from the program. Insert the dead ones in the FunctionsToRemove set. 635 for (const auto &I : CG) { 636 CallGraphNode *CGN = I.second.get(); 637 Function *F = CGN->getFunction(); 638 if (!F || F->isDeclaration()) 639 continue; 640 641 // Handle the case when this function is called and we only want to care 642 // about always-inline functions. This is a bit of a hack to share code 643 // between here and the InlineAlways pass. 644 if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline)) 645 continue; 646 647 // If the only remaining users of the function are dead constants, remove 648 // them. 649 F->removeDeadConstantUsers(); 650 651 if (!F->isDefTriviallyDead()) 652 continue; 653 654 // It is unsafe to drop a function with discardable linkage from a COMDAT 655 // without also dropping the other members of the COMDAT. 656 // The inliner doesn't visit non-function entities which are in COMDAT 657 // groups so it is unsafe to do so *unless* the linkage is local. 658 if (!F->hasLocalLinkage()) { 659 if (F->hasComdat()) { 660 DeadFunctionsInComdats.push_back(F); 661 continue; 662 } 663 } 664 665 RemoveCGN(CGN); 666 } 667 if (!DeadFunctionsInComdats.empty()) { 668 // Filter out the functions whose comdats remain alive. 669 filterDeadComdatFunctions(DeadFunctionsInComdats); 670 // Remove the rest. 671 for (Function *F : DeadFunctionsInComdats) 672 RemoveCGN(CG[F]); 673 } 674 675 if (FunctionsToRemove.empty()) 676 return false; 677 678 // Now that we know which functions to delete, do so. We didn't want to do 679 // this inline, because that would invalidate our CallGraph::iterator 680 // objects. :( 681 // 682 // Note that it doesn't matter that we are iterating over a non-stable order 683 // here to do this, it doesn't matter which order the functions are deleted 684 // in. 685 array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); 686 FunctionsToRemove.erase( 687 std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()), 688 FunctionsToRemove.end()); 689 for (CallGraphNode *CGN : FunctionsToRemove) { 690 delete CG.removeFunctionFromModule(CGN); 691 ++NumDeleted; 692 } 693 return true; 694 } 695 696 InlineAdvisor & 697 InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, 698 FunctionAnalysisManager &FAM, Module &M) { 699 if (OwnedAdvisor) 700 return *OwnedAdvisor; 701 702 auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M); 703 if (!IAA) { 704 // It should still be possible to run the inliner as a stand-alone SCC pass, 705 // for test scenarios. In that case, we default to the 706 // DefaultInlineAdvisor, which doesn't need to keep state between SCC pass 707 // runs. It also uses just the default InlineParams. 708 // In this case, we need to use the provided FAM, which is valid for the 709 // duration of the inliner pass, and thus the lifetime of the owned advisor. 710 // The one we would get from the MAM can be invalidated as a result of the 711 // inliner's activity. 712 OwnedAdvisor = 713 std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams()); 714 715 if (!CGSCCInlineReplayFile.empty()) 716 OwnedAdvisor = getReplayInlineAdvisor( 717 M, FAM, M.getContext(), std::move(OwnedAdvisor), 718 ReplayInlinerSettings{CGSCCInlineReplayFile, 719 CGSCCInlineReplayScope, 720 CGSCCInlineReplayFallback, 721 {CGSCCInlineReplayFormat}}, 722 /*EmitRemarks=*/true); 723 724 return *OwnedAdvisor; 725 } 726 assert(IAA->getAdvisor() && 727 "Expected a present InlineAdvisorAnalysis also have an " 728 "InlineAdvisor initialized"); 729 return *IAA->getAdvisor(); 730 } 731 732 PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, 733 CGSCCAnalysisManager &AM, LazyCallGraph &CG, 734 CGSCCUpdateResult &UR) { 735 const auto &MAMProxy = 736 AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG); 737 bool Changed = false; 738 739 assert(InitialC.size() > 0 && "Cannot handle an empty SCC!"); 740 Module &M = *InitialC.begin()->getFunction().getParent(); 741 ProfileSummaryInfo *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(M); 742 743 FunctionAnalysisManager &FAM = 744 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG) 745 .getManager(); 746 747 InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M); 748 Advisor.onPassEntry(); 749 750 auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(&InitialC); }); 751 752 // We use a single common worklist for calls across the entire SCC. We 753 // process these in-order and append new calls introduced during inlining to 754 // the end. The PriorityInlineOrder is optional here, in which the smaller 755 // callee would have a higher priority to inline. 756 // 757 // Note that this particular order of processing is actually critical to 758 // avoid very bad behaviors. Consider *highly connected* call graphs where 759 // each function contains a small amount of code and a couple of calls to 760 // other functions. Because the LLVM inliner is fundamentally a bottom-up 761 // inliner, it can handle gracefully the fact that these all appear to be 762 // reasonable inlining candidates as it will flatten things until they become 763 // too big to inline, and then move on and flatten another batch. 764 // 765 // However, when processing call edges *within* an SCC we cannot rely on this 766 // bottom-up behavior. As a consequence, with heavily connected *SCCs* of 767 // functions we can end up incrementally inlining N calls into each of 768 // N functions because each incremental inlining decision looks good and we 769 // don't have a topological ordering to prevent explosions. 770 // 771 // To compensate for this, we don't process transitive edges made immediate 772 // by inlining until we've done one pass of inlining across the entire SCC. 773 // Large, highly connected SCCs still lead to some amount of code bloat in 774 // this model, but it is uniformly spread across all the functions in the SCC 775 // and eventually they all become too large to inline, rather than 776 // incrementally maknig a single function grow in a super linear fashion. 777 std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls; 778 if (InlineEnablePriorityOrder) 779 Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>(); 780 else 781 Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>(); 782 assert(Calls != nullptr && "Expected an initialized InlineOrder"); 783 784 // Populate the initial list of calls in this SCC. 785 for (auto &N : InitialC) { 786 auto &ORE = 787 FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction()); 788 // We want to generally process call sites top-down in order for 789 // simplifications stemming from replacing the call with the returned value 790 // after inlining to be visible to subsequent inlining decisions. 791 // FIXME: Using instructions sequence is a really bad way to do this. 792 // Instead we should do an actual RPO walk of the function body. 793 for (Instruction &I : instructions(N.getFunction())) 794 if (auto *CB = dyn_cast<CallBase>(&I)) 795 if (Function *Callee = CB->getCalledFunction()) { 796 if (!Callee->isDeclaration()) 797 Calls->push({CB, -1}); 798 else if (!isa<IntrinsicInst>(I)) { 799 using namespace ore; 800 setInlineRemark(*CB, "unavailable definition"); 801 ORE.emit([&]() { 802 return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) 803 << NV("Callee", Callee) << " will not be inlined into " 804 << NV("Caller", CB->getCaller()) 805 << " because its definition is unavailable" 806 << setIsVerbose(); 807 }); 808 } 809 } 810 } 811 if (Calls->empty()) 812 return PreservedAnalyses::all(); 813 814 // Capture updatable variable for the current SCC. 815 auto *C = &InitialC; 816 817 // When inlining a callee produces new call sites, we want to keep track of 818 // the fact that they were inlined from the callee. This allows us to avoid 819 // infinite inlining in some obscure cases. To represent this, we use an 820 // index into the InlineHistory vector. 821 SmallVector<std::pair<Function *, int>, 16> InlineHistory; 822 823 // Track a set vector of inlined callees so that we can augment the caller 824 // with all of their edges in the call graph before pruning out the ones that 825 // got simplified away. 826 SmallSetVector<Function *, 4> InlinedCallees; 827 828 // Track the dead functions to delete once finished with inlining calls. We 829 // defer deleting these to make it easier to handle the call graph updates. 830 SmallVector<Function *, 4> DeadFunctions; 831 832 // Track potentially dead non-local functions with comdats to see if they can 833 // be deleted as a batch after inlining. 834 SmallVector<Function *, 4> DeadFunctionsInComdats; 835 836 // Loop forward over all of the calls. 837 while (!Calls->empty()) { 838 // We expect the calls to typically be batched with sequences of calls that 839 // have the same caller, so we first set up some shared infrastructure for 840 // this caller. We also do any pruning we can at this layer on the caller 841 // alone. 842 Function &F = *Calls->front().first->getCaller(); 843 LazyCallGraph::Node &N = *CG.lookup(F); 844 if (CG.lookupSCC(N) != C) { 845 Calls->pop(); 846 continue; 847 } 848 849 LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n" 850 << " Function size: " << F.getInstructionCount() 851 << "\n"); 852 853 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { 854 return FAM.getResult<AssumptionAnalysis>(F); 855 }; 856 857 // Now process as many calls as we have within this caller in the sequence. 858 // We bail out as soon as the caller has to change so we can update the 859 // call graph and prepare the context of that new caller. 860 bool DidInline = false; 861 while (!Calls->empty() && Calls->front().first->getCaller() == &F) { 862 auto P = Calls->pop(); 863 CallBase *CB = P.first; 864 const int InlineHistoryID = P.second; 865 Function &Callee = *CB->getCalledFunction(); 866 867 if (InlineHistoryID != -1 && 868 inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) { 869 LLVM_DEBUG(dbgs() << "Skipping inlining due to history: " 870 << F.getName() << " -> " << Callee.getName() << "\n"); 871 setInlineRemark(*CB, "recursive"); 872 continue; 873 } 874 875 // Check if this inlining may repeat breaking an SCC apart that has 876 // already been split once before. In that case, inlining here may 877 // trigger infinite inlining, much like is prevented within the inliner 878 // itself by the InlineHistory above, but spread across CGSCC iterations 879 // and thus hidden from the full inline history. 880 if (CG.lookupSCC(*CG.lookup(Callee)) == C && 881 UR.InlinedInternalEdges.count({&N, C})) { 882 LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " 883 "previously split out of this SCC by inlining: " 884 << F.getName() << " -> " << Callee.getName() << "\n"); 885 setInlineRemark(*CB, "recursive SCC split"); 886 continue; 887 } 888 889 std::unique_ptr<InlineAdvice> Advice = 890 Advisor.getAdvice(*CB, OnlyMandatory); 891 892 // Check whether we want to inline this callsite. 893 if (!Advice) 894 continue; 895 896 if (!Advice->isInliningRecommended()) { 897 Advice->recordUnattemptedInlining(); 898 continue; 899 } 900 901 // Setup the data structure used to plumb customization into the 902 // `InlineFunction` routine. 903 InlineFunctionInfo IFI( 904 /*cg=*/nullptr, GetAssumptionCache, PSI, 905 &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())), 906 &FAM.getResult<BlockFrequencyAnalysis>(Callee)); 907 908 InlineResult IR = 909 InlineFunction(*CB, IFI, &FAM.getResult<AAManager>(*CB->getCaller())); 910 if (!IR.isSuccess()) { 911 Advice->recordUnsuccessfulInlining(IR); 912 continue; 913 } 914 915 DidInline = true; 916 InlinedCallees.insert(&Callee); 917 ++NumInlined; 918 919 LLVM_DEBUG(dbgs() << " Size after inlining: " 920 << F.getInstructionCount() << "\n"); 921 922 // Add any new callsites to defined functions to the worklist. 923 if (!IFI.InlinedCallSites.empty()) { 924 int NewHistoryID = InlineHistory.size(); 925 InlineHistory.push_back({&Callee, InlineHistoryID}); 926 927 for (CallBase *ICB : reverse(IFI.InlinedCallSites)) { 928 Function *NewCallee = ICB->getCalledFunction(); 929 assert(!(NewCallee && NewCallee->isIntrinsic()) && 930 "Intrinsic calls should not be tracked."); 931 if (!NewCallee) { 932 // Try to promote an indirect (virtual) call without waiting for 933 // the post-inline cleanup and the next DevirtSCCRepeatedPass 934 // iteration because the next iteration may not happen and we may 935 // miss inlining it. 936 if (tryPromoteCall(*ICB)) 937 NewCallee = ICB->getCalledFunction(); 938 } 939 if (NewCallee) 940 if (!NewCallee->isDeclaration()) 941 Calls->push({ICB, NewHistoryID}); 942 } 943 } 944 945 // Merge the attributes based on the inlining. 946 AttributeFuncs::mergeAttributesForInlining(F, Callee); 947 948 // For local functions or discardable functions without comdats, check 949 // whether this makes the callee trivially dead. In that case, we can drop 950 // the body of the function eagerly which may reduce the number of callers 951 // of other functions to one, changing inline cost thresholds. Non-local 952 // discardable functions with comdats are checked later on. 953 bool CalleeWasDeleted = false; 954 if (Callee.isDiscardableIfUnused() && Callee.hasZeroLiveUses() && 955 !CG.isLibFunction(Callee)) { 956 if (Callee.hasLocalLinkage() || !Callee.hasComdat()) { 957 Calls->erase_if([&](const std::pair<CallBase *, int> &Call) { 958 return Call.first->getCaller() == &Callee; 959 }); 960 // Clear the body and queue the function itself for deletion when we 961 // finish inlining and call graph updates. 962 // Note that after this point, it is an error to do anything other 963 // than use the callee's address or delete it. 964 Callee.dropAllReferences(); 965 assert(!is_contained(DeadFunctions, &Callee) && 966 "Cannot put cause a function to become dead twice!"); 967 DeadFunctions.push_back(&Callee); 968 CalleeWasDeleted = true; 969 } else { 970 DeadFunctionsInComdats.push_back(&Callee); 971 } 972 } 973 if (CalleeWasDeleted) 974 Advice->recordInliningWithCalleeDeleted(); 975 else 976 Advice->recordInlining(); 977 } 978 979 if (!DidInline) 980 continue; 981 Changed = true; 982 983 // At this point, since we have made changes we have at least removed 984 // a call instruction. However, in the process we do some incremental 985 // simplification of the surrounding code. This simplification can 986 // essentially do all of the same things as a function pass and we can 987 // re-use the exact same logic for updating the call graph to reflect the 988 // change. 989 990 // Inside the update, we also update the FunctionAnalysisManager in the 991 // proxy for this particular SCC. We do this as the SCC may have changed and 992 // as we're going to mutate this particular function we want to make sure 993 // the proxy is in place to forward any invalidation events. 994 LazyCallGraph::SCC *OldC = C; 995 C = &updateCGAndAnalysisManagerForCGSCCPass(CG, *C, N, AM, UR, FAM); 996 LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n"); 997 998 // If this causes an SCC to split apart into multiple smaller SCCs, there 999 // is a subtle risk we need to prepare for. Other transformations may 1000 // expose an "infinite inlining" opportunity later, and because of the SCC 1001 // mutation, we will revisit this function and potentially re-inline. If we 1002 // do, and that re-inlining also has the potentially to mutate the SCC 1003 // structure, the infinite inlining problem can manifest through infinite 1004 // SCC splits and merges. To avoid this, we capture the originating caller 1005 // node and the SCC containing the call edge. This is a slight over 1006 // approximation of the possible inlining decisions that must be avoided, 1007 // but is relatively efficient to store. We use C != OldC to know when 1008 // a new SCC is generated and the original SCC may be generated via merge 1009 // in later iterations. 1010 // 1011 // It is also possible that even if no new SCC is generated 1012 // (i.e., C == OldC), the original SCC could be split and then merged 1013 // into the same one as itself. and the original SCC will be added into 1014 // UR.CWorklist again, we want to catch such cases too. 1015 // 1016 // FIXME: This seems like a very heavyweight way of retaining the inline 1017 // history, we should look for a more efficient way of tracking it. 1018 if ((C != OldC || UR.CWorklist.count(OldC)) && 1019 llvm::any_of(InlinedCallees, [&](Function *Callee) { 1020 return CG.lookupSCC(*CG.lookup(*Callee)) == OldC; 1021 })) { 1022 LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, " 1023 "retaining this to avoid infinite inlining.\n"); 1024 UR.InlinedInternalEdges.insert({&N, OldC}); 1025 } 1026 InlinedCallees.clear(); 1027 1028 // Invalidate analyses for this function now so that we don't have to 1029 // invalidate analyses for all functions in this SCC later. 1030 FAM.invalidate(F, PreservedAnalyses::none()); 1031 } 1032 1033 // We must ensure that we only delete functions with comdats if every function 1034 // in the comdat is going to be deleted. 1035 if (!DeadFunctionsInComdats.empty()) { 1036 filterDeadComdatFunctions(DeadFunctionsInComdats); 1037 for (auto *Callee : DeadFunctionsInComdats) 1038 Callee->dropAllReferences(); 1039 DeadFunctions.append(DeadFunctionsInComdats); 1040 } 1041 1042 // Now that we've finished inlining all of the calls across this SCC, delete 1043 // all of the trivially dead functions, updating the call graph and the CGSCC 1044 // pass manager in the process. 1045 // 1046 // Note that this walks a pointer set which has non-deterministic order but 1047 // that is OK as all we do is delete things and add pointers to unordered 1048 // sets. 1049 for (Function *DeadF : DeadFunctions) { 1050 // Get the necessary information out of the call graph and nuke the 1051 // function there. Also, clear out any cached analyses. 1052 auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); 1053 FAM.clear(*DeadF, DeadF->getName()); 1054 AM.clear(DeadC, DeadC.getName()); 1055 auto &DeadRC = DeadC.getOuterRefSCC(); 1056 CG.removeDeadFunction(*DeadF); 1057 1058 // Mark the relevant parts of the call graph as invalid so we don't visit 1059 // them. 1060 UR.InvalidatedSCCs.insert(&DeadC); 1061 UR.InvalidatedRefSCCs.insert(&DeadRC); 1062 1063 // If the updated SCC was the one containing the deleted function, clear it. 1064 if (&DeadC == UR.UpdatedC) 1065 UR.UpdatedC = nullptr; 1066 1067 // And delete the actual function from the module. 1068 M.getFunctionList().erase(DeadF); 1069 1070 ++NumDeleted; 1071 } 1072 1073 if (!Changed) 1074 return PreservedAnalyses::all(); 1075 1076 PreservedAnalyses PA; 1077 // Even if we change the IR, we update the core CGSCC data structures and so 1078 // can preserve the proxy to the function analysis manager. 1079 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 1080 // We have already invalidated all analyses on modified functions. 1081 PA.preserveSet<AllAnalysesOn<Function>>(); 1082 return PA; 1083 } 1084 1085 ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params, 1086 bool MandatoryFirst, 1087 InliningAdvisorMode Mode, 1088 unsigned MaxDevirtIterations) 1089 : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations) { 1090 // Run the inliner first. The theory is that we are walking bottom-up and so 1091 // the callees have already been fully optimized, and we want to inline them 1092 // into the callers so that our optimizations can reflect that. 1093 // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO 1094 // because it makes profile annotation in the backend inaccurate. 1095 if (MandatoryFirst) 1096 PM.addPass(InlinerPass(/*OnlyMandatory*/ true)); 1097 PM.addPass(InlinerPass()); 1098 } 1099 1100 PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M, 1101 ModuleAnalysisManager &MAM) { 1102 auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M); 1103 if (!IAA.tryCreate(Params, Mode, 1104 {CGSCCInlineReplayFile, 1105 CGSCCInlineReplayScope, 1106 CGSCCInlineReplayFallback, 1107 {CGSCCInlineReplayFormat}})) { 1108 M.getContext().emitError( 1109 "Could not setup Inlining Advisor for the requested " 1110 "mode and/or options"); 1111 return PreservedAnalyses::all(); 1112 } 1113 1114 // We wrap the CGSCC pipeline in a devirtualization repeater. This will try 1115 // to detect when we devirtualize indirect calls and iterate the SCC passes 1116 // in that case to try and catch knock-on inlining or function attrs 1117 // opportunities. Then we add it to the module pipeline by walking the SCCs 1118 // in postorder (or bottom-up). 1119 // If MaxDevirtIterations is 0, we just don't use the devirtualization 1120 // wrapper. 1121 if (MaxDevirtIterations == 0) 1122 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(PM))); 1123 else 1124 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( 1125 createDevirtSCCRepeatedPass(std::move(PM), MaxDevirtIterations))); 1126 1127 MPM.addPass(std::move(AfterCGMPM)); 1128 MPM.run(M, MAM); 1129 1130 // Discard the InlineAdvisor, a subsequent inlining session should construct 1131 // its own. 1132 auto PA = PreservedAnalyses::all(); 1133 if (!KeepAdvisorForPrinting) 1134 PA.abandon<InlineAdvisorAnalysis>(); 1135 return PA; 1136 } 1137 1138 void InlinerPass::printPipeline( 1139 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 1140 static_cast<PassInfoMixin<InlinerPass> *>(this)->printPipeline( 1141 OS, MapClassName2PassName); 1142 if (OnlyMandatory) 1143 OS << "<only-mandatory>"; 1144 } 1145 1146 void ModuleInlinerWrapperPass::printPipeline( 1147 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 1148 // Print some info about passes added to the wrapper. This is however 1149 // incomplete as InlineAdvisorAnalysis part isn't included (which also depends 1150 // on Params and Mode). 1151 if (!MPM.isEmpty()) { 1152 MPM.printPipeline(OS, MapClassName2PassName); 1153 OS << ","; 1154 } 1155 OS << "cgscc("; 1156 if (MaxDevirtIterations != 0) 1157 OS << "devirt<" << MaxDevirtIterations << ">("; 1158 PM.printPipeline(OS, MapClassName2PassName); 1159 if (MaxDevirtIterations != 0) 1160 OS << ")"; 1161 OS << ")"; 1162 } 1163