1 //===- Inliner.cpp - Code common to all inliners --------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the mechanics required to implement inlining without 11 // missing any calls and updating the call graph. The decisions of which calls 12 // are profitable to inline are implemented elsewhere. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "inline" 17 #include "llvm/Module.h" 18 #include "llvm/Instructions.h" 19 #include "llvm/IntrinsicInst.h" 20 #include "llvm/Analysis/CallGraph.h" 21 #include "llvm/Analysis/InlineCost.h" 22 #include "llvm/Target/TargetData.h" 23 #include "llvm/Transforms/IPO/InlinerPass.h" 24 #include "llvm/Transforms/Utils/Cloning.h" 25 #include "llvm/Transforms/Utils/Local.h" 26 #include "llvm/Support/CallSite.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/Statistic.h" 32 #include <set> 33 using namespace llvm; 34 35 STATISTIC(NumInlined, "Number of functions inlined"); 36 STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); 37 STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); 38 STATISTIC(NumMergedAllocas, "Number of allocas merged together"); 39 40 static cl::opt<int> 41 InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, 42 cl::desc("Control the amount of inlining to perform (default = 225)")); 43 44 static cl::opt<int> 45 HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), 46 cl::desc("Threshold for inlining functions with inline hint")); 47 48 // Threshold to use when optsize is specified (and there is no -inline-limit). 49 const int OptSizeThreshold = 75; 50 51 Inliner::Inliner(char &ID) 52 : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {} 53 54 Inliner::Inliner(char &ID, int Threshold) 55 : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? 56 InlineLimit : Threshold) {} 57 58 /// getAnalysisUsage - For this class, we declare that we require and preserve 59 /// the call graph. If the derived class implements this method, it should 60 /// always explicitly call the implementation here. 61 void Inliner::getAnalysisUsage(AnalysisUsage &Info) const { 62 CallGraphSCCPass::getAnalysisUsage(Info); 63 } 64 65 66 typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> > 67 InlinedArrayAllocasTy; 68 69 /// InlineCallIfPossible - If it is possible to inline the specified call site, 70 /// do so and update the CallGraph for this operation. 71 /// 72 /// This function also does some basic book-keeping to update the IR. The 73 /// InlinedArrayAllocas map keeps track of any allocas that are already 74 /// available from other functions inlined into the caller. If we are able to 75 /// inline this call site we attempt to reuse already available allocas or add 76 /// any new allocas to the set if not possible. 77 static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, 78 InlinedArrayAllocasTy &InlinedArrayAllocas, 79 int InlineHistory) { 80 Function *Callee = CS.getCalledFunction(); 81 Function *Caller = CS.getCaller(); 82 83 // Try to inline the function. Get the list of static allocas that were 84 // inlined. 85 if (!InlineFunction(CS, IFI)) 86 return false; 87 88 // If the inlined function had a higher stack protection level than the 89 // calling function, then bump up the caller's stack protection level. 90 if (Callee->hasFnAttr(Attribute::StackProtectReq)) 91 Caller->addFnAttr(Attribute::StackProtectReq); 92 else if (Callee->hasFnAttr(Attribute::StackProtect) && 93 !Caller->hasFnAttr(Attribute::StackProtectReq)) 94 Caller->addFnAttr(Attribute::StackProtect); 95 96 // Look at all of the allocas that we inlined through this call site. If we 97 // have already inlined other allocas through other calls into this function, 98 // then we know that they have disjoint lifetimes and that we can merge them. 99 // 100 // There are many heuristics possible for merging these allocas, and the 101 // different options have different tradeoffs. One thing that we *really* 102 // don't want to hurt is SRoA: once inlining happens, often allocas are no 103 // longer address taken and so they can be promoted. 104 // 105 // Our "solution" for that is to only merge allocas whose outermost type is an 106 // array type. These are usually not promoted because someone is using a 107 // variable index into them. These are also often the most important ones to 108 // merge. 109 // 110 // A better solution would be to have real memory lifetime markers in the IR 111 // and not have the inliner do any merging of allocas at all. This would 112 // allow the backend to do proper stack slot coloring of all allocas that 113 // *actually make it to the backend*, which is really what we want. 114 // 115 // Because we don't have this information, we do this simple and useful hack. 116 // 117 SmallPtrSet<AllocaInst*, 16> UsedAllocas; 118 119 // When processing our SCC, check to see if CS was inlined from some other 120 // call site. For example, if we're processing "A" in this code: 121 // A() { B() } 122 // B() { x = alloca ... C() } 123 // C() { y = alloca ... } 124 // Assume that C was not inlined into B initially, and so we're processing A 125 // and decide to inline B into A. Doing this makes an alloca available for 126 // reuse and makes a callsite (C) available for inlining. When we process 127 // the C call site we don't want to do any alloca merging between X and Y 128 // because their scopes are not disjoint. We could make this smarter by 129 // keeping track of the inline history for each alloca in the 130 // InlinedArrayAllocas but this isn't likely to be a significant win. 131 if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. 132 return true; 133 134 // Loop over all the allocas we have so far and see if they can be merged with 135 // a previously inlined alloca. If not, remember that we had it. 136 for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); 137 AllocaNo != e; ++AllocaNo) { 138 AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; 139 140 // Don't bother trying to merge array allocations (they will usually be 141 // canonicalized to be an allocation *of* an array), or allocations whose 142 // type is not itself an array (because we're afraid of pessimizing SRoA). 143 const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); 144 if (ATy == 0 || AI->isArrayAllocation()) 145 continue; 146 147 // Get the list of all available allocas for this array type. 148 std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy]; 149 150 // Loop over the allocas in AllocasForType to see if we can reuse one. Note 151 // that we have to be careful not to reuse the same "available" alloca for 152 // multiple different allocas that we just inlined, we use the 'UsedAllocas' 153 // set to keep track of which "available" allocas are being used by this 154 // function. Also, AllocasForType can be empty of course! 155 bool MergedAwayAlloca = false; 156 for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { 157 AllocaInst *AvailableAlloca = AllocasForType[i]; 158 159 // The available alloca has to be in the right function, not in some other 160 // function in this SCC. 161 if (AvailableAlloca->getParent() != AI->getParent()) 162 continue; 163 164 // If the inlined function already uses this alloca then we can't reuse 165 // it. 166 if (!UsedAllocas.insert(AvailableAlloca)) 167 continue; 168 169 // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare 170 // success! 171 DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " 172 << *AvailableAlloca << '\n'); 173 174 AI->replaceAllUsesWith(AvailableAlloca); 175 AI->eraseFromParent(); 176 MergedAwayAlloca = true; 177 ++NumMergedAllocas; 178 IFI.StaticAllocas[AllocaNo] = 0; 179 break; 180 } 181 182 // If we already nuked the alloca, we're done with it. 183 if (MergedAwayAlloca) 184 continue; 185 186 // If we were unable to merge away the alloca either because there are no 187 // allocas of the right type available or because we reused them all 188 // already, remember that this alloca came from an inlined function and mark 189 // it used so we don't reuse it for other allocas from this inline 190 // operation. 191 AllocasForType.push_back(AI); 192 UsedAllocas.insert(AI); 193 } 194 195 return true; 196 } 197 198 unsigned Inliner::getInlineThreshold(CallSite CS) const { 199 int thres = InlineThreshold; 200 201 // Listen to optsize when -inline-limit is not given. 202 Function *Caller = CS.getCaller(); 203 if (Caller && !Caller->isDeclaration() && 204 Caller->hasFnAttr(Attribute::OptimizeForSize) && 205 InlineLimit.getNumOccurrences() == 0) 206 thres = OptSizeThreshold; 207 208 // Listen to inlinehint when it would increase the threshold. 209 Function *Callee = CS.getCalledFunction(); 210 if (HintThreshold > thres && Callee && !Callee->isDeclaration() && 211 Callee->hasFnAttr(Attribute::InlineHint)) 212 thres = HintThreshold; 213 214 return thres; 215 } 216 217 /// shouldInline - Return true if the inliner should attempt to inline 218 /// at the given CallSite. 219 bool Inliner::shouldInline(CallSite CS) { 220 InlineCost IC = getInlineCost(CS); 221 222 if (IC.isAlways()) { 223 DEBUG(dbgs() << " Inlining: cost=always" 224 << ", Call: " << *CS.getInstruction() << "\n"); 225 return true; 226 } 227 228 if (IC.isNever()) { 229 DEBUG(dbgs() << " NOT Inlining: cost=never" 230 << ", Call: " << *CS.getInstruction() << "\n"); 231 return false; 232 } 233 234 int Cost = IC.getValue(); 235 Function *Caller = CS.getCaller(); 236 int CurrentThreshold = getInlineThreshold(CS); 237 float FudgeFactor = getInlineFudgeFactor(CS); 238 int AdjThreshold = (int)(CurrentThreshold * FudgeFactor); 239 if (Cost >= AdjThreshold) { 240 DEBUG(dbgs() << " NOT Inlining: cost=" << Cost 241 << ", thres=" << AdjThreshold 242 << ", Call: " << *CS.getInstruction() << "\n"); 243 return false; 244 } 245 246 // Try to detect the case where the current inlining candidate caller 247 // (call it B) is a static function and is an inlining candidate elsewhere, 248 // and the current candidate callee (call it C) is large enough that 249 // inlining it into B would make B too big to inline later. In these 250 // circumstances it may be best not to inline C into B, but to inline B 251 // into its callers. 252 if (Caller->hasLocalLinkage()) { 253 int TotalSecondaryCost = 0; 254 bool outerCallsFound = false; 255 // This bool tracks what happens if we do NOT inline C into B. 256 bool callerWillBeRemoved = true; 257 // This bool tracks what happens if we DO inline C into B. 258 bool inliningPreventsSomeOuterInline = false; 259 for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); 260 I != E; ++I) { 261 CallSite CS2(*I); 262 263 // If this isn't a call to Caller (it could be some other sort 264 // of reference) skip it. Such references will prevent the caller 265 // from being removed. 266 if (!CS2 || CS2.getCalledFunction() != Caller) { 267 callerWillBeRemoved = false; 268 continue; 269 } 270 271 InlineCost IC2 = getInlineCost(CS2); 272 if (IC2.isNever()) 273 callerWillBeRemoved = false; 274 if (IC2.isAlways() || IC2.isNever()) 275 continue; 276 277 outerCallsFound = true; 278 int Cost2 = IC2.getValue(); 279 int CurrentThreshold2 = getInlineThreshold(CS2); 280 float FudgeFactor2 = getInlineFudgeFactor(CS2); 281 282 if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) 283 callerWillBeRemoved = false; 284 285 // See if we have this case. We subtract off the penalty 286 // for the call instruction, which we would be deleting. 287 if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) && 288 Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= 289 (int)(CurrentThreshold2 * FudgeFactor2)) { 290 inliningPreventsSomeOuterInline = true; 291 TotalSecondaryCost += Cost2; 292 } 293 } 294 // If all outer calls to Caller would get inlined, the cost for the last 295 // one is set very low by getInlineCost, in anticipation that Caller will 296 // be removed entirely. We did not account for this above unless there 297 // is only one caller of Caller. 298 if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end()) 299 TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; 300 301 if (outerCallsFound && inliningPreventsSomeOuterInline && 302 TotalSecondaryCost < Cost) { 303 DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << 304 " Cost = " << Cost << 305 ", outer Cost = " << TotalSecondaryCost << '\n'); 306 return false; 307 } 308 } 309 310 DEBUG(dbgs() << " Inlining: cost=" << Cost 311 << ", thres=" << AdjThreshold 312 << ", Call: " << *CS.getInstruction() << '\n'); 313 return true; 314 } 315 316 /// InlineHistoryIncludes - Return true if the specified inline history ID 317 /// indicates an inline history that includes the specified function. 318 static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, 319 const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) { 320 while (InlineHistoryID != -1) { 321 assert(unsigned(InlineHistoryID) < InlineHistory.size() && 322 "Invalid inline history ID"); 323 if (InlineHistory[InlineHistoryID].first == F) 324 return true; 325 InlineHistoryID = InlineHistory[InlineHistoryID].second; 326 } 327 return false; 328 } 329 330 331 bool Inliner::runOnSCC(CallGraphSCC &SCC) { 332 CallGraph &CG = getAnalysis<CallGraph>(); 333 const TargetData *TD = getAnalysisIfAvailable<TargetData>(); 334 335 SmallPtrSet<Function*, 8> SCCFunctions; 336 DEBUG(dbgs() << "Inliner visiting SCC:"); 337 for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { 338 Function *F = (*I)->getFunction(); 339 if (F) SCCFunctions.insert(F); 340 DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); 341 } 342 343 // Scan through and identify all call sites ahead of time so that we only 344 // inline call sites in the original functions, not call sites that result 345 // from inlining other functions. 346 SmallVector<std::pair<CallSite, int>, 16> CallSites; 347 348 // When inlining a callee produces new call sites, we want to keep track of 349 // the fact that they were inlined from the callee. This allows us to avoid 350 // infinite inlining in some obscure cases. To represent this, we use an 351 // index into the InlineHistory vector. 352 SmallVector<std::pair<Function*, int>, 8> InlineHistory; 353 354 for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { 355 Function *F = (*I)->getFunction(); 356 if (!F) continue; 357 358 for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) 359 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { 360 CallSite CS(cast<Value>(I)); 361 // If this isn't a call, or it is a call to an intrinsic, it can 362 // never be inlined. 363 if (!CS || isa<IntrinsicInst>(I)) 364 continue; 365 366 // If this is a direct call to an external function, we can never inline 367 // it. If it is an indirect call, inlining may resolve it to be a 368 // direct call, so we keep it. 369 if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) 370 continue; 371 372 CallSites.push_back(std::make_pair(CS, -1)); 373 } 374 } 375 376 DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); 377 378 // If there are no calls in this function, exit early. 379 if (CallSites.empty()) 380 return false; 381 382 // Now that we have all of the call sites, move the ones to functions in the 383 // current SCC to the end of the list. 384 unsigned FirstCallInSCC = CallSites.size(); 385 for (unsigned i = 0; i < FirstCallInSCC; ++i) 386 if (Function *F = CallSites[i].first.getCalledFunction()) 387 if (SCCFunctions.count(F)) 388 std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); 389 390 391 InlinedArrayAllocasTy InlinedArrayAllocas; 392 InlineFunctionInfo InlineInfo(&CG, TD); 393 394 // Now that we have all of the call sites, loop over them and inline them if 395 // it looks profitable to do so. 396 bool Changed = false; 397 bool LocalChange; 398 do { 399 LocalChange = false; 400 // Iterate over the outer loop because inlining functions can cause indirect 401 // calls to become direct calls. 402 for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { 403 CallSite CS = CallSites[CSi].first; 404 405 Function *Caller = CS.getCaller(); 406 Function *Callee = CS.getCalledFunction(); 407 408 // If this call site is dead and it is to a readonly function, we should 409 // just delete the call instead of trying to inline it, regardless of 410 // size. This happens because IPSCCP propagates the result out of the 411 // call and then we're left with the dead call. 412 if (isInstructionTriviallyDead(CS.getInstruction())) { 413 DEBUG(dbgs() << " -> Deleting dead call: " 414 << *CS.getInstruction() << "\n"); 415 // Update the call graph by deleting the edge from Callee to Caller. 416 CG[Caller]->removeCallEdgeFor(CS); 417 CS.getInstruction()->eraseFromParent(); 418 ++NumCallsDeleted; 419 // Update the cached cost info with the missing call 420 growCachedCostInfo(Caller, NULL); 421 } else { 422 // We can only inline direct calls to non-declarations. 423 if (Callee == 0 || Callee->isDeclaration()) continue; 424 425 // If this call site was obtained by inlining another function, verify 426 // that the include path for the function did not include the callee 427 // itself. If so, we'd be recursively inlining the same function, 428 // which would provide the same callsites, which would cause us to 429 // infinitely inline. 430 int InlineHistoryID = CallSites[CSi].second; 431 if (InlineHistoryID != -1 && 432 InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) 433 continue; 434 435 436 // If the policy determines that we should inline this function, 437 // try to do so. 438 if (!shouldInline(CS)) 439 continue; 440 441 // Attempt to inline the function. 442 if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, 443 InlineHistoryID)) 444 continue; 445 ++NumInlined; 446 447 // If inlining this function gave us any new call sites, throw them 448 // onto our worklist to process. They are useful inline candidates. 449 if (!InlineInfo.InlinedCalls.empty()) { 450 // Create a new inline history entry for this, so that we remember 451 // that these new callsites came about due to inlining Callee. 452 int NewHistoryID = InlineHistory.size(); 453 InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); 454 455 for (unsigned i = 0, e = InlineInfo.InlinedCalls.size(); 456 i != e; ++i) { 457 Value *Ptr = InlineInfo.InlinedCalls[i]; 458 CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); 459 } 460 } 461 462 // Update the cached cost info with the inlined call. 463 growCachedCostInfo(Caller, Callee); 464 } 465 466 // If we inlined or deleted the last possible call site to the function, 467 // delete the function body now. 468 if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && 469 // TODO: Can remove if in SCC now. 470 !SCCFunctions.count(Callee) && 471 472 // The function may be apparently dead, but if there are indirect 473 // callgraph references to the node, we cannot delete it yet, this 474 // could invalidate the CGSCC iterator. 475 CG[Callee]->getNumReferences() == 0) { 476 DEBUG(dbgs() << " -> Deleting dead function: " 477 << Callee->getName() << "\n"); 478 CallGraphNode *CalleeNode = CG[Callee]; 479 480 // Remove any call graph edges from the callee to its callees. 481 CalleeNode->removeAllCalledFunctions(); 482 483 resetCachedCostInfo(Callee); 484 485 // Removing the node for callee from the call graph and delete it. 486 delete CG.removeFunctionFromModule(CalleeNode); 487 ++NumDeleted; 488 } 489 490 // Remove this call site from the list. If possible, use 491 // swap/pop_back for efficiency, but do not use it if doing so would 492 // move a call site to a function in this SCC before the 493 // 'FirstCallInSCC' barrier. 494 if (SCC.isSingular()) { 495 CallSites[CSi] = CallSites.back(); 496 CallSites.pop_back(); 497 } else { 498 CallSites.erase(CallSites.begin()+CSi); 499 } 500 --CSi; 501 502 Changed = true; 503 LocalChange = true; 504 } 505 } while (LocalChange); 506 507 return Changed; 508 } 509 510 // doFinalization - Remove now-dead linkonce functions at the end of 511 // processing to avoid breaking the SCC traversal. 512 bool Inliner::doFinalization(CallGraph &CG) { 513 return removeDeadFunctions(CG); 514 } 515 516 /// removeDeadFunctions - Remove dead functions that are not included in 517 /// DNR (Do Not Remove) list. 518 bool Inliner::removeDeadFunctions(CallGraph &CG, 519 SmallPtrSet<const Function *, 16> *DNR) { 520 SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove; 521 522 // Scan for all of the functions, looking for ones that should now be removed 523 // from the program. Insert the dead ones in the FunctionsToRemove set. 524 for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) { 525 CallGraphNode *CGN = I->second; 526 if (CGN->getFunction() == 0) 527 continue; 528 529 Function *F = CGN->getFunction(); 530 531 // If the only remaining users of the function are dead constants, remove 532 // them. 533 F->removeDeadConstantUsers(); 534 535 if (DNR && DNR->count(F)) 536 continue; 537 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && 538 !F->hasAvailableExternallyLinkage()) 539 continue; 540 if (!F->use_empty()) 541 continue; 542 543 // Remove any call graph edges from the function to its callees. 544 CGN->removeAllCalledFunctions(); 545 546 // Remove any edges from the external node to the function's call graph 547 // node. These edges might have been made irrelegant due to 548 // optimization of the program. 549 CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); 550 551 // Removing the node for callee from the call graph and delete it. 552 FunctionsToRemove.insert(CGN); 553 } 554 555 // Now that we know which functions to delete, do so. We didn't want to do 556 // this inline, because that would invalidate our CallGraph::iterator 557 // objects. :( 558 // 559 // Note that it doesn't matter that we are iterating over a non-stable set 560 // here to do this, it doesn't matter which order the functions are deleted 561 // in. 562 bool Changed = false; 563 for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(), 564 E = FunctionsToRemove.end(); I != E; ++I) { 565 resetCachedCostInfo((*I)->getFunction()); 566 delete CG.removeFunctionFromModule(*I); 567 ++NumDeleted; 568 Changed = true; 569 } 570 571 return Changed; 572 } 573