1 //===- Inliner.cpp - Code common to all inliners --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the mechanics required to implement inlining without 10 // missing any calls and updating the call graph. The decisions of which calls 11 // are profitable to inline are implemented elsewhere. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/Inliner.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/None.h" 18 #include "llvm/ADT/Optional.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallPtrSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/Analysis/AliasAnalysis.h" 26 #include "llvm/Analysis/AssumptionCache.h" 27 #include "llvm/Analysis/BasicAliasAnalysis.h" 28 #include "llvm/Analysis/BlockFrequencyInfo.h" 29 #include "llvm/Analysis/CGSCCPassManager.h" 30 #include "llvm/Analysis/CallGraph.h" 31 #include "llvm/Analysis/InlineCost.h" 32 #include "llvm/Analysis/LazyCallGraph.h" 33 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 34 #include "llvm/Analysis/ProfileSummaryInfo.h" 35 #include "llvm/Analysis/TargetLibraryInfo.h" 36 #include "llvm/Analysis/TargetTransformInfo.h" 37 #include "llvm/IR/Attributes.h" 38 #include "llvm/IR/BasicBlock.h" 39 #include "llvm/IR/DataLayout.h" 40 #include "llvm/IR/DebugLoc.h" 41 #include "llvm/IR/DerivedTypes.h" 42 #include "llvm/IR/DiagnosticInfo.h" 43 #include "llvm/IR/Function.h" 44 #include "llvm/IR/InstIterator.h" 45 #include "llvm/IR/Instruction.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/IntrinsicInst.h" 48 #include "llvm/IR/Metadata.h" 49 #include "llvm/IR/Module.h" 50 #include "llvm/IR/PassManager.h" 51 #include "llvm/IR/User.h" 52 #include "llvm/IR/Value.h" 53 #include "llvm/Pass.h" 54 #include "llvm/Support/Casting.h" 55 #include "llvm/Support/CommandLine.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/raw_ostream.h" 58 #include "llvm/Transforms/Utils/CallPromotionUtils.h" 59 #include "llvm/Transforms/Utils/Cloning.h" 60 #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" 61 #include "llvm/Transforms/Utils/Local.h" 62 #include "llvm/Transforms/Utils/ModuleUtils.h" 63 #include <algorithm> 64 #include <cassert> 65 #include <functional> 66 #include <sstream> 67 #include <tuple> 68 #include <utility> 69 #include <vector> 70 71 using namespace llvm; 72 73 #define DEBUG_TYPE "inline" 74 75 STATISTIC(NumInlined, "Number of functions inlined"); 76 STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); 77 STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); 78 STATISTIC(NumMergedAllocas, "Number of allocas merged together"); 79 80 // This weirdly named statistic tracks the number of times that, when attempting 81 // to inline a function A into B, we analyze the callers of B in order to see 82 // if those would be more profitable and blocked inline steps. 83 STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); 84 85 /// Flag to disable manual alloca merging. 86 /// 87 /// Merging of allocas was originally done as a stack-size saving technique 88 /// prior to LLVM's code generator having support for stack coloring based on 89 /// lifetime markers. It is now in the process of being removed. To experiment 90 /// with disabling it and relying fully on lifetime marker based stack 91 /// coloring, you can pass this flag to LLVM. 92 static cl::opt<bool> 93 DisableInlinedAllocaMerging("disable-inlined-alloca-merging", 94 cl::init(false), cl::Hidden); 95 96 // An integer used to limit the cost of inline deferral. The default negative 97 // number tells shouldBeDeferred to only take the secondary cost into account. 98 static cl::opt<int> 99 InlineDeferralScale("inline-deferral-scale", 100 cl::desc("Scale to limit the cost of inline deferral"), 101 cl::init(-1), cl::Hidden); 102 103 namespace { 104 105 enum class InlinerFunctionImportStatsOpts { 106 No = 0, 107 Basic = 1, 108 Verbose = 2, 109 }; 110 111 } // end anonymous namespace 112 113 static cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats( 114 "inliner-function-import-stats", 115 cl::init(InlinerFunctionImportStatsOpts::No), 116 cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", 117 "basic statistics"), 118 clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", 119 "printing of statistics for each inlined function")), 120 cl::Hidden, cl::desc("Enable inliner stats for imported functions")); 121 122 /// Flag to add inline messages as callsite attributes 'inline-remark'. 123 static cl::opt<bool> 124 InlineRemarkAttribute("inline-remark-attribute", cl::init(false), 125 cl::Hidden, 126 cl::desc("Enable adding inline-remark attribute to" 127 " callsites processed by inliner but decided" 128 " to be not inlined")); 129 130 LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} 131 132 LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) 133 : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} 134 135 /// For this class, we declare that we require and preserve the call graph. 136 /// If the derived class implements this method, it should 137 /// always explicitly call the implementation here. 138 void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const { 139 AU.addRequired<AssumptionCacheTracker>(); 140 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 141 AU.addRequired<TargetLibraryInfoWrapperPass>(); 142 getAAResultsAnalysisUsage(AU); 143 CallGraphSCCPass::getAnalysisUsage(AU); 144 } 145 146 using InlinedArrayAllocasTy = DenseMap<ArrayType *, std::vector<AllocaInst *>>; 147 148 /// Look at all of the allocas that we inlined through this call site. If we 149 /// have already inlined other allocas through other calls into this function, 150 /// then we know that they have disjoint lifetimes and that we can merge them. 151 /// 152 /// There are many heuristics possible for merging these allocas, and the 153 /// different options have different tradeoffs. One thing that we *really* 154 /// don't want to hurt is SRoA: once inlining happens, often allocas are no 155 /// longer address taken and so they can be promoted. 156 /// 157 /// Our "solution" for that is to only merge allocas whose outermost type is an 158 /// array type. These are usually not promoted because someone is using a 159 /// variable index into them. These are also often the most important ones to 160 /// merge. 161 /// 162 /// A better solution would be to have real memory lifetime markers in the IR 163 /// and not have the inliner do any merging of allocas at all. This would 164 /// allow the backend to do proper stack slot coloring of all allocas that 165 /// *actually make it to the backend*, which is really what we want. 166 /// 167 /// Because we don't have this information, we do this simple and useful hack. 168 static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI, 169 InlinedArrayAllocasTy &InlinedArrayAllocas, 170 int InlineHistory) { 171 SmallPtrSet<AllocaInst *, 16> UsedAllocas; 172 173 // When processing our SCC, check to see if the call site was inlined from 174 // some other call site. For example, if we're processing "A" in this code: 175 // A() { B() } 176 // B() { x = alloca ... C() } 177 // C() { y = alloca ... } 178 // Assume that C was not inlined into B initially, and so we're processing A 179 // and decide to inline B into A. Doing this makes an alloca available for 180 // reuse and makes a callsite (C) available for inlining. When we process 181 // the C call site we don't want to do any alloca merging between X and Y 182 // because their scopes are not disjoint. We could make this smarter by 183 // keeping track of the inline history for each alloca in the 184 // InlinedArrayAllocas but this isn't likely to be a significant win. 185 if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. 186 return; 187 188 // Loop over all the allocas we have so far and see if they can be merged with 189 // a previously inlined alloca. If not, remember that we had it. 190 for (unsigned AllocaNo = 0, E = IFI.StaticAllocas.size(); AllocaNo != E; 191 ++AllocaNo) { 192 AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; 193 194 // Don't bother trying to merge array allocations (they will usually be 195 // canonicalized to be an allocation *of* an array), or allocations whose 196 // type is not itself an array (because we're afraid of pessimizing SRoA). 197 ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); 198 if (!ATy || AI->isArrayAllocation()) 199 continue; 200 201 // Get the list of all available allocas for this array type. 202 std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy]; 203 204 // Loop over the allocas in AllocasForType to see if we can reuse one. Note 205 // that we have to be careful not to reuse the same "available" alloca for 206 // multiple different allocas that we just inlined, we use the 'UsedAllocas' 207 // set to keep track of which "available" allocas are being used by this 208 // function. Also, AllocasForType can be empty of course! 209 bool MergedAwayAlloca = false; 210 for (AllocaInst *AvailableAlloca : AllocasForType) { 211 unsigned Align1 = AI->getAlignment(), 212 Align2 = AvailableAlloca->getAlignment(); 213 214 // The available alloca has to be in the right function, not in some other 215 // function in this SCC. 216 if (AvailableAlloca->getParent() != AI->getParent()) 217 continue; 218 219 // If the inlined function already uses this alloca then we can't reuse 220 // it. 221 if (!UsedAllocas.insert(AvailableAlloca).second) 222 continue; 223 224 // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare 225 // success! 226 LLVM_DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI 227 << "\n\t\tINTO: " << *AvailableAlloca << '\n'); 228 229 // Move affected dbg.declare calls immediately after the new alloca to 230 // avoid the situation when a dbg.declare precedes its alloca. 231 if (auto *L = LocalAsMetadata::getIfExists(AI)) 232 if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) 233 for (User *U : MDV->users()) 234 if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) 235 DDI->moveBefore(AvailableAlloca->getNextNode()); 236 237 AI->replaceAllUsesWith(AvailableAlloca); 238 239 if (Align1 != Align2) { 240 if (!Align1 || !Align2) { 241 const DataLayout &DL = Caller->getParent()->getDataLayout(); 242 unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType()); 243 244 Align1 = Align1 ? Align1 : TypeAlign; 245 Align2 = Align2 ? Align2 : TypeAlign; 246 } 247 248 if (Align1 > Align2) 249 AvailableAlloca->setAlignment(MaybeAlign(AI->getAlignment())); 250 } 251 252 AI->eraseFromParent(); 253 MergedAwayAlloca = true; 254 ++NumMergedAllocas; 255 IFI.StaticAllocas[AllocaNo] = nullptr; 256 break; 257 } 258 259 // If we already nuked the alloca, we're done with it. 260 if (MergedAwayAlloca) 261 continue; 262 263 // If we were unable to merge away the alloca either because there are no 264 // allocas of the right type available or because we reused them all 265 // already, remember that this alloca came from an inlined function and mark 266 // it used so we don't reuse it for other allocas from this inline 267 // operation. 268 AllocasForType.push_back(AI); 269 UsedAllocas.insert(AI); 270 } 271 } 272 273 /// If it is possible to inline the specified call site, 274 /// do so and update the CallGraph for this operation. 275 /// 276 /// This function also does some basic book-keeping to update the IR. The 277 /// InlinedArrayAllocas map keeps track of any allocas that are already 278 /// available from other functions inlined into the caller. If we are able to 279 /// inline this call site we attempt to reuse already available allocas or add 280 /// any new allocas to the set if not possible. 281 static InlineResult inlineCallIfPossible( 282 CallBase &CB, InlineFunctionInfo &IFI, 283 InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, 284 bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter, 285 ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { 286 Function *Callee = CB.getCalledFunction(); 287 Function *Caller = CB.getCaller(); 288 289 AAResults &AAR = AARGetter(*Callee); 290 291 // Try to inline the function. Get the list of static allocas that were 292 // inlined. 293 InlineResult IR = InlineFunction(CB, IFI, &AAR, InsertLifetime); 294 if (!IR.isSuccess()) 295 return IR; 296 297 if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) 298 ImportedFunctionsStats.recordInline(*Caller, *Callee); 299 300 AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); 301 302 if (!DisableInlinedAllocaMerging) 303 mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory); 304 305 return IR; // success 306 } 307 308 /// Return true if inlining of CB can block the caller from being 309 /// inlined which is proved to be more beneficial. \p IC is the 310 /// estimated inline cost associated with callsite \p CB. 311 /// \p TotalSecondaryCost will be set to the estimated cost of inlining the 312 /// caller if \p CB is suppressed for inlining. 313 static bool 314 shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost, 315 function_ref<InlineCost(CallBase &CB)> GetInlineCost) { 316 // For now we only handle local or inline functions. 317 if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage()) 318 return false; 319 // If the cost of inlining CB is non-positive, it is not going to prevent the 320 // caller from being inlined into its callers and hence we don't need to 321 // defer. 322 if (IC.getCost() <= 0) 323 return false; 324 // Try to detect the case where the current inlining candidate caller (call 325 // it B) is a static or linkonce-ODR function and is an inlining candidate 326 // elsewhere, and the current candidate callee (call it C) is large enough 327 // that inlining it into B would make B too big to inline later. In these 328 // circumstances it may be best not to inline C into B, but to inline B into 329 // its callers. 330 // 331 // This only applies to static and linkonce-ODR functions because those are 332 // expected to be available for inlining in the translation units where they 333 // are used. Thus we will always have the opportunity to make local inlining 334 // decisions. Importantly the linkonce-ODR linkage covers inline functions 335 // and templates in C++. 336 // 337 // FIXME: All of this logic should be sunk into getInlineCost. It relies on 338 // the internal implementation of the inline cost metrics rather than 339 // treating them as truly abstract units etc. 340 TotalSecondaryCost = 0; 341 // The candidate cost to be imposed upon the current function. 342 int CandidateCost = IC.getCost() - 1; 343 // If the caller has local linkage and can be inlined to all its callers, we 344 // can apply a huge negative bonus to TotalSecondaryCost. 345 bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse(); 346 // This bool tracks what happens if we DO inline C into B. 347 bool InliningPreventsSomeOuterInline = false; 348 unsigned NumCallerUsers = 0; 349 for (User *U : Caller->users()) { 350 CallBase *CS2 = dyn_cast<CallBase>(U); 351 352 // If this isn't a call to Caller (it could be some other sort 353 // of reference) skip it. Such references will prevent the caller 354 // from being removed. 355 if (!CS2 || CS2->getCalledFunction() != Caller) { 356 ApplyLastCallBonus = false; 357 continue; 358 } 359 360 InlineCost IC2 = GetInlineCost(*CS2); 361 ++NumCallerCallersAnalyzed; 362 if (!IC2) { 363 ApplyLastCallBonus = false; 364 continue; 365 } 366 if (IC2.isAlways()) 367 continue; 368 369 // See if inlining of the original callsite would erase the cost delta of 370 // this callsite. We subtract off the penalty for the call instruction, 371 // which we would be deleting. 372 if (IC2.getCostDelta() <= CandidateCost) { 373 InliningPreventsSomeOuterInline = true; 374 TotalSecondaryCost += IC2.getCost(); 375 NumCallerUsers++; 376 } 377 } 378 379 if (!InliningPreventsSomeOuterInline) 380 return false; 381 382 // If all outer calls to Caller would get inlined, the cost for the last 383 // one is set very low by getInlineCost, in anticipation that Caller will 384 // be removed entirely. We did not account for this above unless there 385 // is only one caller of Caller. 386 if (ApplyLastCallBonus) 387 TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus; 388 389 // If InlineDeferralScale is negative, then ignore the cost of primary 390 // inlining -- IC.getCost() multiplied by the number of callers to Caller. 391 if (InlineDeferralScale < 0) 392 return TotalSecondaryCost < IC.getCost(); 393 394 int TotalCost = TotalSecondaryCost + IC.getCost() * NumCallerUsers; 395 int Allowance = IC.getCost() * InlineDeferralScale; 396 return TotalCost < Allowance; 397 } 398 399 static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R, 400 const ore::NV &Arg) { 401 return R << Arg.Val; 402 } 403 404 template <class RemarkT> 405 RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) { 406 using namespace ore; 407 if (IC.isAlways()) { 408 R << "(cost=always)"; 409 } else if (IC.isNever()) { 410 R << "(cost=never)"; 411 } else { 412 R << "(cost=" << ore::NV("Cost", IC.getCost()) 413 << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")"; 414 } 415 if (const char *Reason = IC.getReason()) 416 R << ": " << ore::NV("Reason", Reason); 417 return R; 418 } 419 420 static std::string inlineCostStr(const InlineCost &IC) { 421 std::stringstream Remark; 422 Remark << IC; 423 return Remark.str(); 424 } 425 426 static void setInlineRemark(CallBase &CB, StringRef Message) { 427 if (!InlineRemarkAttribute) 428 return; 429 430 Attribute Attr = Attribute::get(CB.getContext(), "inline-remark", Message); 431 CB.addAttribute(AttributeList::FunctionIndex, Attr); 432 } 433 434 /// Return the cost only if the inliner should attempt to inline at the given 435 /// CallSite. If we return the cost, we will emit an optimisation remark later 436 /// using that cost, so we won't do so from this function. Return None if 437 /// inlining should not be attempted. 438 static Optional<InlineCost> 439 shouldInline(CallBase &CB, function_ref<InlineCost(CallBase &CB)> GetInlineCost, 440 OptimizationRemarkEmitter &ORE) { 441 using namespace ore; 442 443 InlineCost IC = GetInlineCost(CB); 444 Instruction *Call = &CB; 445 Function *Callee = CB.getCalledFunction(); 446 Function *Caller = CB.getCaller(); 447 448 if (IC.isAlways()) { 449 LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) 450 << ", Call: " << CB << "\n"); 451 return IC; 452 } 453 454 if (!IC) { 455 LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) 456 << ", Call: " << CB << "\n"); 457 if (IC.isNever()) { 458 ORE.emit([&]() { 459 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) 460 << NV("Callee", Callee) << " not inlined into " 461 << NV("Caller", Caller) << " because it should never be inlined " 462 << IC; 463 }); 464 } else { 465 ORE.emit([&]() { 466 return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) 467 << NV("Callee", Callee) << " not inlined into " 468 << NV("Caller", Caller) << " because too costly to inline " 469 << IC; 470 }); 471 } 472 setInlineRemark(CB, inlineCostStr(IC)); 473 return None; 474 } 475 476 int TotalSecondaryCost = 0; 477 if (shouldBeDeferred(Caller, IC, TotalSecondaryCost, GetInlineCost)) { 478 LLVM_DEBUG(dbgs() << " NOT Inlining: " << CB 479 << " Cost = " << IC.getCost() 480 << ", outer Cost = " << TotalSecondaryCost << '\n'); 481 ORE.emit([&]() { 482 return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", 483 Call) 484 << "Not inlining. Cost of inlining " << NV("Callee", Callee) 485 << " increases the cost of inlining " << NV("Caller", Caller) 486 << " in other contexts"; 487 }); 488 setInlineRemark(CB, "deferred"); 489 // IC does not bool() to false, so get an InlineCost that will. 490 // This will not be inspected to make an error message. 491 return None; 492 } 493 494 LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CB 495 << '\n'); 496 return IC; 497 } 498 499 /// Return true if the specified inline history ID 500 /// indicates an inline history that includes the specified function. 501 static bool inlineHistoryIncludes( 502 Function *F, int InlineHistoryID, 503 const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) { 504 while (InlineHistoryID != -1) { 505 assert(unsigned(InlineHistoryID) < InlineHistory.size() && 506 "Invalid inline history ID"); 507 if (InlineHistory[InlineHistoryID].first == F) 508 return true; 509 InlineHistoryID = InlineHistory[InlineHistoryID].second; 510 } 511 return false; 512 } 513 514 bool LegacyInlinerBase::doInitialization(CallGraph &CG) { 515 if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) 516 ImportedFunctionsStats.setModuleInfo(CG.getModule()); 517 return false; // No changes to CallGraph. 518 } 519 520 bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) { 521 if (skipSCC(SCC)) 522 return false; 523 return inlineCalls(SCC); 524 } 525 526 static void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc, 527 const BasicBlock *Block, const Function &Callee, 528 const Function &Caller, const InlineCost &IC) { 529 ORE.emit([&]() { 530 bool AlwaysInline = IC.isAlways(); 531 StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined"; 532 return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block) 533 << ore::NV("Callee", &Callee) << " inlined into " 534 << ore::NV("Caller", &Caller) << " with " << IC; 535 }); 536 } 537 538 static bool 539 inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, 540 std::function<AssumptionCache &(Function &)> GetAssumptionCache, 541 ProfileSummaryInfo *PSI, 542 std::function<const TargetLibraryInfo &(Function &)> GetTLI, 543 bool InsertLifetime, 544 function_ref<InlineCost(CallBase &CB)> GetInlineCost, 545 function_ref<AAResults &(Function &)> AARGetter, 546 ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { 547 SmallPtrSet<Function *, 8> SCCFunctions; 548 LLVM_DEBUG(dbgs() << "Inliner visiting SCC:"); 549 for (CallGraphNode *Node : SCC) { 550 Function *F = Node->getFunction(); 551 if (F) 552 SCCFunctions.insert(F); 553 LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); 554 } 555 556 // Scan through and identify all call sites ahead of time so that we only 557 // inline call sites in the original functions, not call sites that result 558 // from inlining other functions. 559 SmallVector<std::pair<CallBase *, int>, 16> CallSites; 560 561 // When inlining a callee produces new call sites, we want to keep track of 562 // the fact that they were inlined from the callee. This allows us to avoid 563 // infinite inlining in some obscure cases. To represent this, we use an 564 // index into the InlineHistory vector. 565 SmallVector<std::pair<Function *, int>, 8> InlineHistory; 566 567 for (CallGraphNode *Node : SCC) { 568 Function *F = Node->getFunction(); 569 if (!F || F->isDeclaration()) 570 continue; 571 572 OptimizationRemarkEmitter ORE(F); 573 for (BasicBlock &BB : *F) 574 for (Instruction &I : BB) { 575 auto *CB = dyn_cast<CallBase>(&I); 576 // If this isn't a call, or it is a call to an intrinsic, it can 577 // never be inlined. 578 if (!CB || isa<IntrinsicInst>(I)) 579 continue; 580 581 // If this is a direct call to an external function, we can never inline 582 // it. If it is an indirect call, inlining may resolve it to be a 583 // direct call, so we keep it. 584 if (Function *Callee = CB->getCalledFunction()) 585 if (Callee->isDeclaration()) { 586 using namespace ore; 587 588 setInlineRemark(*CB, "unavailable definition"); 589 ORE.emit([&]() { 590 return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) 591 << NV("Callee", Callee) << " will not be inlined into " 592 << NV("Caller", CB->getCaller()) 593 << " because its definition is unavailable" 594 << setIsVerbose(); 595 }); 596 continue; 597 } 598 599 CallSites.push_back(std::make_pair(CB, -1)); 600 } 601 } 602 603 LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); 604 605 // If there are no calls in this function, exit early. 606 if (CallSites.empty()) 607 return false; 608 609 // Now that we have all of the call sites, move the ones to functions in the 610 // current SCC to the end of the list. 611 unsigned FirstCallInSCC = CallSites.size(); 612 for (unsigned I = 0; I < FirstCallInSCC; ++I) 613 if (Function *F = CallSites[I].first->getCalledFunction()) 614 if (SCCFunctions.count(F)) 615 std::swap(CallSites[I--], CallSites[--FirstCallInSCC]); 616 617 InlinedArrayAllocasTy InlinedArrayAllocas; 618 InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI); 619 620 // Now that we have all of the call sites, loop over them and inline them if 621 // it looks profitable to do so. 622 bool Changed = false; 623 bool LocalChange; 624 do { 625 LocalChange = false; 626 // Iterate over the outer loop because inlining functions can cause indirect 627 // calls to become direct calls. 628 // CallSites may be modified inside so ranged for loop can not be used. 629 for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { 630 auto &P = CallSites[CSi]; 631 CallBase &CB = *P.first; 632 const int InlineHistoryID = P.second; 633 634 Function *Caller = CB.getCaller(); 635 Function *Callee = CB.getCalledFunction(); 636 637 // We can only inline direct calls to non-declarations. 638 if (!Callee || Callee->isDeclaration()) 639 continue; 640 641 bool IsTriviallyDead = isInstructionTriviallyDead(&CB, &GetTLI(*Caller)); 642 643 if (!IsTriviallyDead) { 644 // If this call site was obtained by inlining another function, verify 645 // that the include path for the function did not include the callee 646 // itself. If so, we'd be recursively inlining the same function, 647 // which would provide the same callsites, which would cause us to 648 // infinitely inline. 649 if (InlineHistoryID != -1 && 650 inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) { 651 setInlineRemark(CB, "recursive"); 652 continue; 653 } 654 } 655 656 // FIXME for new PM: because of the old PM we currently generate ORE and 657 // in turn BFI on demand. With the new PM, the ORE dependency should 658 // just become a regular analysis dependency. 659 OptimizationRemarkEmitter ORE(Caller); 660 661 auto OIC = shouldInline(CB, GetInlineCost, ORE); 662 // If the policy determines that we should inline this function, 663 // delete the call instead. 664 if (!OIC) 665 continue; 666 667 // If this call site is dead and it is to a readonly function, we should 668 // just delete the call instead of trying to inline it, regardless of 669 // size. This happens because IPSCCP propagates the result out of the 670 // call and then we're left with the dead call. 671 if (IsTriviallyDead) { 672 LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << CB << "\n"); 673 // Update the call graph by deleting the edge from Callee to Caller. 674 setInlineRemark(CB, "trivially dead"); 675 CG[Caller]->removeCallEdgeFor(CB); 676 CB.eraseFromParent(); 677 ++NumCallsDeleted; 678 } else { 679 // Get DebugLoc to report. CB will be invalid after Inliner. 680 DebugLoc DLoc = CB.getDebugLoc(); 681 BasicBlock *Block = CB.getParent(); 682 683 // Attempt to inline the function. 684 using namespace ore; 685 686 InlineResult IR = inlineCallIfPossible( 687 CB, InlineInfo, InlinedArrayAllocas, InlineHistoryID, 688 InsertLifetime, AARGetter, ImportedFunctionsStats); 689 if (!IR.isSuccess()) { 690 setInlineRemark(CB, std::string(IR.getFailureReason()) + "; " + 691 inlineCostStr(*OIC)); 692 ORE.emit([&]() { 693 return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, 694 Block) 695 << NV("Callee", Callee) << " will not be inlined into " 696 << NV("Caller", Caller) << ": " 697 << NV("Reason", IR.getFailureReason()); 698 }); 699 continue; 700 } 701 ++NumInlined; 702 703 emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC); 704 705 // If inlining this function gave us any new call sites, throw them 706 // onto our worklist to process. They are useful inline candidates. 707 if (!InlineInfo.InlinedCalls.empty()) { 708 // Create a new inline history entry for this, so that we remember 709 // that these new callsites came about due to inlining Callee. 710 int NewHistoryID = InlineHistory.size(); 711 InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); 712 713 #ifndef NDEBUG 714 // Make sure no dupplicates in the inline candidates. This could 715 // happen when a callsite is simpilfied to reusing the return value 716 // of another callsite during function cloning, thus the other 717 // callsite will be reconsidered here. 718 DenseSet<CallBase *> DbgCallSites; 719 for (auto &II : CallSites) 720 DbgCallSites.insert(II.first); 721 #endif 722 723 for (Value *Ptr : InlineInfo.InlinedCalls) { 724 #ifndef NDEBUG 725 assert(DbgCallSites.count(dyn_cast<CallBase>(Ptr)) == 0); 726 #endif 727 CallSites.push_back( 728 std::make_pair(dyn_cast<CallBase>(Ptr), NewHistoryID)); 729 } 730 } 731 } 732 733 // If we inlined or deleted the last possible call site to the function, 734 // delete the function body now. 735 if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && 736 // TODO: Can remove if in SCC now. 737 !SCCFunctions.count(Callee) && 738 // The function may be apparently dead, but if there are indirect 739 // callgraph references to the node, we cannot delete it yet, this 740 // could invalidate the CGSCC iterator. 741 CG[Callee]->getNumReferences() == 0) { 742 LLVM_DEBUG(dbgs() << " -> Deleting dead function: " 743 << Callee->getName() << "\n"); 744 CallGraphNode *CalleeNode = CG[Callee]; 745 746 // Remove any call graph edges from the callee to its callees. 747 CalleeNode->removeAllCalledFunctions(); 748 749 // Removing the node for callee from the call graph and delete it. 750 delete CG.removeFunctionFromModule(CalleeNode); 751 ++NumDeleted; 752 } 753 754 // Remove this call site from the list. If possible, use 755 // swap/pop_back for efficiency, but do not use it if doing so would 756 // move a call site to a function in this SCC before the 757 // 'FirstCallInSCC' barrier. 758 if (SCC.isSingular()) { 759 CallSites[CSi] = CallSites.back(); 760 CallSites.pop_back(); 761 } else { 762 CallSites.erase(CallSites.begin() + CSi); 763 } 764 --CSi; 765 766 Changed = true; 767 LocalChange = true; 768 } 769 } while (LocalChange); 770 771 return Changed; 772 } 773 774 bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { 775 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 776 ACT = &getAnalysis<AssumptionCacheTracker>(); 777 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 778 GetTLI = [&](Function &F) -> const TargetLibraryInfo & { 779 return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 780 }; 781 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { 782 return ACT->getAssumptionCache(F); 783 }; 784 return inlineCallsImpl( 785 SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime, 786 [&](CallBase &CB) { return getInlineCost(CB); }, LegacyAARGetter(*this), 787 ImportedFunctionsStats); 788 } 789 790 /// Remove now-dead linkonce functions at the end of 791 /// processing to avoid breaking the SCC traversal. 792 bool LegacyInlinerBase::doFinalization(CallGraph &CG) { 793 if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) 794 ImportedFunctionsStats.dump(InlinerFunctionImportStats == 795 InlinerFunctionImportStatsOpts::Verbose); 796 return removeDeadFunctions(CG); 797 } 798 799 /// Remove dead functions that are not included in DNR (Do Not Remove) list. 800 bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG, 801 bool AlwaysInlineOnly) { 802 SmallVector<CallGraphNode *, 16> FunctionsToRemove; 803 SmallVector<Function *, 16> DeadFunctionsInComdats; 804 805 auto RemoveCGN = [&](CallGraphNode *CGN) { 806 // Remove any call graph edges from the function to its callees. 807 CGN->removeAllCalledFunctions(); 808 809 // Remove any edges from the external node to the function's call graph 810 // node. These edges might have been made irrelegant due to 811 // optimization of the program. 812 CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); 813 814 // Removing the node for callee from the call graph and delete it. 815 FunctionsToRemove.push_back(CGN); 816 }; 817 818 // Scan for all of the functions, looking for ones that should now be removed 819 // from the program. Insert the dead ones in the FunctionsToRemove set. 820 for (const auto &I : CG) { 821 CallGraphNode *CGN = I.second.get(); 822 Function *F = CGN->getFunction(); 823 if (!F || F->isDeclaration()) 824 continue; 825 826 // Handle the case when this function is called and we only want to care 827 // about always-inline functions. This is a bit of a hack to share code 828 // between here and the InlineAlways pass. 829 if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline)) 830 continue; 831 832 // If the only remaining users of the function are dead constants, remove 833 // them. 834 F->removeDeadConstantUsers(); 835 836 if (!F->isDefTriviallyDead()) 837 continue; 838 839 // It is unsafe to drop a function with discardable linkage from a COMDAT 840 // without also dropping the other members of the COMDAT. 841 // The inliner doesn't visit non-function entities which are in COMDAT 842 // groups so it is unsafe to do so *unless* the linkage is local. 843 if (!F->hasLocalLinkage()) { 844 if (F->hasComdat()) { 845 DeadFunctionsInComdats.push_back(F); 846 continue; 847 } 848 } 849 850 RemoveCGN(CGN); 851 } 852 if (!DeadFunctionsInComdats.empty()) { 853 // Filter out the functions whose comdats remain alive. 854 filterDeadComdatFunctions(CG.getModule(), DeadFunctionsInComdats); 855 // Remove the rest. 856 for (Function *F : DeadFunctionsInComdats) 857 RemoveCGN(CG[F]); 858 } 859 860 if (FunctionsToRemove.empty()) 861 return false; 862 863 // Now that we know which functions to delete, do so. We didn't want to do 864 // this inline, because that would invalidate our CallGraph::iterator 865 // objects. :( 866 // 867 // Note that it doesn't matter that we are iterating over a non-stable order 868 // here to do this, it doesn't matter which order the functions are deleted 869 // in. 870 array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); 871 FunctionsToRemove.erase( 872 std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()), 873 FunctionsToRemove.end()); 874 for (CallGraphNode *CGN : FunctionsToRemove) { 875 delete CG.removeFunctionFromModule(CGN); 876 ++NumDeleted; 877 } 878 return true; 879 } 880 881 InlinerPass::~InlinerPass() { 882 if (ImportedFunctionsStats) { 883 assert(InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No); 884 ImportedFunctionsStats->dump(InlinerFunctionImportStats == 885 InlinerFunctionImportStatsOpts::Verbose); 886 } 887 } 888 889 PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, 890 CGSCCAnalysisManager &AM, LazyCallGraph &CG, 891 CGSCCUpdateResult &UR) { 892 const ModuleAnalysisManager &MAM = 893 AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager(); 894 bool Changed = false; 895 896 assert(InitialC.size() > 0 && "Cannot handle an empty SCC!"); 897 Module &M = *InitialC.begin()->getFunction().getParent(); 898 ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M); 899 900 if (!ImportedFunctionsStats && 901 InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { 902 ImportedFunctionsStats = 903 std::make_unique<ImportedFunctionsInliningStatistics>(); 904 ImportedFunctionsStats->setModuleInfo(M); 905 } 906 907 // We use a single common worklist for calls across the entire SCC. We 908 // process these in-order and append new calls introduced during inlining to 909 // the end. 910 // 911 // Note that this particular order of processing is actually critical to 912 // avoid very bad behaviors. Consider *highly connected* call graphs where 913 // each function contains a small amonut of code and a couple of calls to 914 // other functions. Because the LLVM inliner is fundamentally a bottom-up 915 // inliner, it can handle gracefully the fact that these all appear to be 916 // reasonable inlining candidates as it will flatten things until they become 917 // too big to inline, and then move on and flatten another batch. 918 // 919 // However, when processing call edges *within* an SCC we cannot rely on this 920 // bottom-up behavior. As a consequence, with heavily connected *SCCs* of 921 // functions we can end up incrementally inlining N calls into each of 922 // N functions because each incremental inlining decision looks good and we 923 // don't have a topological ordering to prevent explosions. 924 // 925 // To compensate for this, we don't process transitive edges made immediate 926 // by inlining until we've done one pass of inlining across the entire SCC. 927 // Large, highly connected SCCs still lead to some amount of code bloat in 928 // this model, but it is uniformly spread across all the functions in the SCC 929 // and eventually they all become too large to inline, rather than 930 // incrementally maknig a single function grow in a super linear fashion. 931 SmallVector<std::pair<CallBase *, int>, 16> Calls; 932 933 FunctionAnalysisManager &FAM = 934 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG) 935 .getManager(); 936 937 // Populate the initial list of calls in this SCC. 938 for (auto &N : InitialC) { 939 auto &ORE = 940 FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction()); 941 // We want to generally process call sites top-down in order for 942 // simplifications stemming from replacing the call with the returned value 943 // after inlining to be visible to subsequent inlining decisions. 944 // FIXME: Using instructions sequence is a really bad way to do this. 945 // Instead we should do an actual RPO walk of the function body. 946 for (Instruction &I : instructions(N.getFunction())) 947 if (auto *CB = dyn_cast<CallBase>(&I)) 948 if (Function *Callee = CB->getCalledFunction()) { 949 if (!Callee->isDeclaration()) 950 Calls.push_back({CB, -1}); 951 else if (!isa<IntrinsicInst>(I)) { 952 using namespace ore; 953 setInlineRemark(*CB, "unavailable definition"); 954 ORE.emit([&]() { 955 return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) 956 << NV("Callee", Callee) << " will not be inlined into " 957 << NV("Caller", CB->getCaller()) 958 << " because its definition is unavailable" 959 << setIsVerbose(); 960 }); 961 } 962 } 963 } 964 if (Calls.empty()) 965 return PreservedAnalyses::all(); 966 967 // Capture updatable variables for the current SCC and RefSCC. 968 auto *C = &InitialC; 969 auto *RC = &C->getOuterRefSCC(); 970 971 // When inlining a callee produces new call sites, we want to keep track of 972 // the fact that they were inlined from the callee. This allows us to avoid 973 // infinite inlining in some obscure cases. To represent this, we use an 974 // index into the InlineHistory vector. 975 SmallVector<std::pair<Function *, int>, 16> InlineHistory; 976 977 // Track a set vector of inlined callees so that we can augment the caller 978 // with all of their edges in the call graph before pruning out the ones that 979 // got simplified away. 980 SmallSetVector<Function *, 4> InlinedCallees; 981 982 // Track the dead functions to delete once finished with inlining calls. We 983 // defer deleting these to make it easier to handle the call graph updates. 984 SmallVector<Function *, 4> DeadFunctions; 985 986 // Loop forward over all of the calls. Note that we cannot cache the size as 987 // inlining can introduce new calls that need to be processed. 988 for (int I = 0; I < (int)Calls.size(); ++I) { 989 // We expect the calls to typically be batched with sequences of calls that 990 // have the same caller, so we first set up some shared infrastructure for 991 // this caller. We also do any pruning we can at this layer on the caller 992 // alone. 993 Function &F = *Calls[I].first->getCaller(); 994 LazyCallGraph::Node &N = *CG.lookup(F); 995 if (CG.lookupSCC(N) != C) 996 continue; 997 if (F.hasOptNone()) { 998 setInlineRemark(*Calls[I].first, "optnone attribute"); 999 continue; 1000 } 1001 1002 LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"); 1003 1004 // Get a FunctionAnalysisManager via a proxy for this particular node. We 1005 // do this each time we visit a node as the SCC may have changed and as 1006 // we're going to mutate this particular function we want to make sure the 1007 // proxy is in place to forward any invalidation events. We can use the 1008 // manager we get here for looking up results for functions other than this 1009 // node however because those functions aren't going to be mutated by this 1010 // pass. 1011 FunctionAnalysisManager &FAM = 1012 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).getManager(); 1013 1014 // Get the remarks emission analysis for the caller. 1015 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1016 1017 std::function<AssumptionCache &(Function &)> GetAssumptionCache = 1018 [&](Function &F) -> AssumptionCache & { 1019 return FAM.getResult<AssumptionAnalysis>(F); 1020 }; 1021 auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { 1022 return FAM.getResult<BlockFrequencyAnalysis>(F); 1023 }; 1024 auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { 1025 return FAM.getResult<TargetLibraryAnalysis>(F); 1026 }; 1027 1028 auto GetInlineCost = [&](CallBase &CB) { 1029 Function &Callee = *CB.getCalledFunction(); 1030 auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee); 1031 bool RemarksEnabled = 1032 Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled( 1033 DEBUG_TYPE); 1034 return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, {GetBFI}, 1035 GetTLI, PSI, RemarksEnabled ? &ORE : nullptr); 1036 }; 1037 1038 // Now process as many calls as we have within this caller in the sequnece. 1039 // We bail out as soon as the caller has to change so we can update the 1040 // call graph and prepare the context of that new caller. 1041 bool DidInline = false; 1042 for (; I < (int)Calls.size() && Calls[I].first->getCaller() == &F; ++I) { 1043 auto &P = Calls[I]; 1044 CallBase *CB = P.first; 1045 const int InlineHistoryID = P.second; 1046 Function &Callee = *CB->getCalledFunction(); 1047 1048 if (InlineHistoryID != -1 && 1049 inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) { 1050 setInlineRemark(*CB, "recursive"); 1051 continue; 1052 } 1053 1054 // Check if this inlining may repeat breaking an SCC apart that has 1055 // already been split once before. In that case, inlining here may 1056 // trigger infinite inlining, much like is prevented within the inliner 1057 // itself by the InlineHistory above, but spread across CGSCC iterations 1058 // and thus hidden from the full inline history. 1059 if (CG.lookupSCC(*CG.lookup(Callee)) == C && 1060 UR.InlinedInternalEdges.count({&N, C})) { 1061 LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " 1062 "previously split out of this SCC by inlining: " 1063 << F.getName() << " -> " << Callee.getName() << "\n"); 1064 setInlineRemark(*CB, "recursive SCC split"); 1065 continue; 1066 } 1067 1068 auto OIC = shouldInline(*CB, GetInlineCost, ORE); 1069 // Check whether we want to inline this callsite. 1070 if (!OIC) 1071 continue; 1072 auto DoInline = [&]() -> InlineResult { 1073 // Setup the data structure used to plumb customization into the 1074 // `InlineFunction` routine. 1075 InlineFunctionInfo IFI( 1076 /*cg=*/nullptr, &GetAssumptionCache, PSI, 1077 &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())), 1078 &FAM.getResult<BlockFrequencyAnalysis>(Callee)); 1079 1080 InlineResult IR = InlineFunction(*CB, IFI); 1081 if (!IR.isSuccess()) 1082 return IR; 1083 1084 DidInline = true; 1085 InlinedCallees.insert(&Callee); 1086 ++NumInlined; 1087 1088 // Add any new callsites to defined functions to the worklist. 1089 if (!IFI.InlinedCallSites.empty()) { 1090 int NewHistoryID = InlineHistory.size(); 1091 InlineHistory.push_back({&Callee, InlineHistoryID}); 1092 1093 for (CallBase *ICB : reverse(IFI.InlinedCallSites)) { 1094 Function *NewCallee = ICB->getCalledFunction(); 1095 if (!NewCallee) { 1096 // Try to promote an indirect (virtual) call without waiting for 1097 // the post-inline cleanup and the next DevirtSCCRepeatedPass 1098 // iteration because the next iteration may not happen and we may 1099 // miss inlining it. 1100 if (tryPromoteCall(*ICB)) 1101 NewCallee = ICB->getCalledFunction(); 1102 } 1103 if (NewCallee) 1104 if (!NewCallee->isDeclaration()) 1105 Calls.push_back({ICB, NewHistoryID}); 1106 } 1107 } 1108 1109 if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) 1110 ImportedFunctionsStats->recordInline(F, Callee); 1111 1112 // Merge the attributes based on the inlining. 1113 AttributeFuncs::mergeAttributesForInlining(F, Callee); 1114 1115 // For local functions, check whether this makes the callee trivially 1116 // dead. In that case, we can drop the body of the function eagerly 1117 // which may reduce the number of callers of other functions to one, 1118 // changing inline cost thresholds. 1119 if (Callee.hasLocalLinkage()) { 1120 // To check this we also need to nuke any dead constant uses (perhaps 1121 // made dead by this operation on other functions). 1122 Callee.removeDeadConstantUsers(); 1123 if (Callee.use_empty() && !CG.isLibFunction(Callee)) { 1124 Calls.erase( 1125 std::remove_if(Calls.begin() + I + 1, Calls.end(), 1126 [&](const std::pair<CallBase *, int> &Call) { 1127 return Call.first->getCaller() == &Callee; 1128 }), 1129 Calls.end()); 1130 // Clear the body and queue the function itself for deletion when we 1131 // finish inlining and call graph updates. 1132 // Note that after this point, it is an error to do anything other 1133 // than use the callee's address or delete it. 1134 Callee.dropAllReferences(); 1135 assert(find(DeadFunctions, &Callee) == DeadFunctions.end() && 1136 "Cannot put cause a function to become dead twice!"); 1137 DeadFunctions.push_back(&Callee); 1138 } 1139 } 1140 return IR; 1141 }; 1142 // Capture the context of CB before inlining, as a successful inlining may 1143 // change that context, and we want to report success or failure in the 1144 // original context. 1145 auto DLoc = CB->getDebugLoc(); 1146 auto *Block = CB->getParent(); 1147 1148 auto Outcome = DoInline(); 1149 if (!Outcome.isSuccess()) { 1150 using namespace ore; 1151 setInlineRemark(*CB, std::string(Outcome.getFailureReason()) + "; " + 1152 inlineCostStr(*OIC)); 1153 ORE.emit([&]() { 1154 return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) 1155 << NV("Callee", &Callee) << " will not be inlined into " 1156 << NV("Caller", &F) << ": " 1157 << NV("Reason", Outcome.getFailureReason()); 1158 }); 1159 continue; 1160 } 1161 1162 emitInlinedInto(ORE, DLoc, Block, Callee, F, *OIC); 1163 } 1164 1165 // Back the call index up by one to put us in a good position to go around 1166 // the outer loop. 1167 --I; 1168 1169 if (!DidInline) 1170 continue; 1171 Changed = true; 1172 1173 // Add all the inlined callees' edges as ref edges to the caller. These are 1174 // by definition trivial edges as we always have *some* transitive ref edge 1175 // chain. While in some cases these edges are direct calls inside the 1176 // callee, they have to be modeled in the inliner as reference edges as 1177 // there may be a reference edge anywhere along the chain from the current 1178 // caller to the callee that causes the whole thing to appear like 1179 // a (transitive) reference edge that will require promotion to a call edge 1180 // below. 1181 for (Function *InlinedCallee : InlinedCallees) { 1182 LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee); 1183 for (LazyCallGraph::Edge &E : *CalleeN) 1184 RC->insertTrivialRefEdge(N, E.getNode()); 1185 } 1186 1187 // At this point, since we have made changes we have at least removed 1188 // a call instruction. However, in the process we do some incremental 1189 // simplification of the surrounding code. This simplification can 1190 // essentially do all of the same things as a function pass and we can 1191 // re-use the exact same logic for updating the call graph to reflect the 1192 // change. 1193 LazyCallGraph::SCC *OldC = C; 1194 C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR); 1195 LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n"); 1196 RC = &C->getOuterRefSCC(); 1197 1198 // If this causes an SCC to split apart into multiple smaller SCCs, there 1199 // is a subtle risk we need to prepare for. Other transformations may 1200 // expose an "infinite inlining" opportunity later, and because of the SCC 1201 // mutation, we will revisit this function and potentially re-inline. If we 1202 // do, and that re-inlining also has the potentially to mutate the SCC 1203 // structure, the infinite inlining problem can manifest through infinite 1204 // SCC splits and merges. To avoid this, we capture the originating caller 1205 // node and the SCC containing the call edge. This is a slight over 1206 // approximation of the possible inlining decisions that must be avoided, 1207 // but is relatively efficient to store. We use C != OldC to know when 1208 // a new SCC is generated and the original SCC may be generated via merge 1209 // in later iterations. 1210 // 1211 // It is also possible that even if no new SCC is generated 1212 // (i.e., C == OldC), the original SCC could be split and then merged 1213 // into the same one as itself. and the original SCC will be added into 1214 // UR.CWorklist again, we want to catch such cases too. 1215 // 1216 // FIXME: This seems like a very heavyweight way of retaining the inline 1217 // history, we should look for a more efficient way of tracking it. 1218 if ((C != OldC || UR.CWorklist.count(OldC)) && 1219 llvm::any_of(InlinedCallees, [&](Function *Callee) { 1220 return CG.lookupSCC(*CG.lookup(*Callee)) == OldC; 1221 })) { 1222 LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, " 1223 "retaining this to avoid infinite inlining.\n"); 1224 UR.InlinedInternalEdges.insert({&N, OldC}); 1225 } 1226 InlinedCallees.clear(); 1227 } 1228 1229 // Now that we've finished inlining all of the calls across this SCC, delete 1230 // all of the trivially dead functions, updating the call graph and the CGSCC 1231 // pass manager in the process. 1232 // 1233 // Note that this walks a pointer set which has non-deterministic order but 1234 // that is OK as all we do is delete things and add pointers to unordered 1235 // sets. 1236 for (Function *DeadF : DeadFunctions) { 1237 // Get the necessary information out of the call graph and nuke the 1238 // function there. Also, cclear out any cached analyses. 1239 auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); 1240 FunctionAnalysisManager &FAM = 1241 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG).getManager(); 1242 FAM.clear(*DeadF, DeadF->getName()); 1243 AM.clear(DeadC, DeadC.getName()); 1244 auto &DeadRC = DeadC.getOuterRefSCC(); 1245 CG.removeDeadFunction(*DeadF); 1246 1247 // Mark the relevant parts of the call graph as invalid so we don't visit 1248 // them. 1249 UR.InvalidatedSCCs.insert(&DeadC); 1250 UR.InvalidatedRefSCCs.insert(&DeadRC); 1251 1252 // And delete the actual function from the module. 1253 M.getFunctionList().erase(DeadF); 1254 ++NumDeleted; 1255 } 1256 1257 if (!Changed) 1258 return PreservedAnalyses::all(); 1259 1260 // Even if we change the IR, we update the core CGSCC data structures and so 1261 // can preserve the proxy to the function analysis manager. 1262 PreservedAnalyses PA; 1263 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 1264 return PA; 1265 } 1266