1 //===- Inliner.cpp - Code common to all inliners --------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the mechanics required to implement inlining without 11 // missing any calls and updating the call graph. The decisions of which calls 12 // are profitable to inline are implemented elsewhere. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "inline" 17 #include "llvm/Module.h" 18 #include "llvm/Instructions.h" 19 #include "llvm/IntrinsicInst.h" 20 #include "llvm/Analysis/CallGraph.h" 21 #include "llvm/Support/CallSite.h" 22 #include "llvm/Target/TargetData.h" 23 #include "llvm/Transforms/IPO/InlinerPass.h" 24 #include "llvm/Transforms/Utils/InlineCost.h" 25 #include "llvm/Transforms/Utils/Cloning.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include "llvm/ADT/SmallPtrSet.h" 30 #include "llvm/ADT/Statistic.h" 31 #include <set> 32 using namespace llvm; 33 34 STATISTIC(NumInlined, "Number of functions inlined"); 35 STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); 36 STATISTIC(NumMergedAllocas, "Number of allocas merged together"); 37 38 static cl::opt<int> 39 InlineLimit("inline-threshold", cl::Hidden, cl::init(200), cl::ZeroOrMore, 40 cl::desc("Control the amount of inlining to perform (default = 200)")); 41 42 Inliner::Inliner(void *ID) 43 : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {} 44 45 Inliner::Inliner(void *ID, int Threshold) 46 : CallGraphSCCPass(ID), InlineThreshold(Threshold) {} 47 48 /// getAnalysisUsage - For this class, we declare that we require and preserve 49 /// the call graph. If the derived class implements this method, it should 50 /// always explicitly call the implementation here. 51 void Inliner::getAnalysisUsage(AnalysisUsage &Info) const { 52 CallGraphSCCPass::getAnalysisUsage(Info); 53 } 54 55 56 typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> > 57 InlinedArrayAllocasTy; 58 59 /// InlineCallIfPossible - If it is possible to inline the specified call site, 60 /// do so and update the CallGraph for this operation. 61 /// 62 /// This function also does some basic book-keeping to update the IR. The 63 /// InlinedArrayAllocas map keeps track of any allocas that are already 64 /// available from other functions inlined into the caller. If we are able to 65 /// inline this call site we attempt to reuse already available allocas or add 66 /// any new allocas to the set if not possible. 67 static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, 68 const TargetData *TD, 69 InlinedArrayAllocasTy &InlinedArrayAllocas) { 70 Function *Callee = CS.getCalledFunction(); 71 Function *Caller = CS.getCaller(); 72 73 // Try to inline the function. Get the list of static allocas that were 74 // inlined. 75 SmallVector<AllocaInst*, 16> StaticAllocas; 76 if (!InlineFunction(CS, &CG, TD, &StaticAllocas)) 77 return false; 78 79 // If the inlined function had a higher stack protection level than the 80 // calling function, then bump up the caller's stack protection level. 81 if (Callee->hasFnAttr(Attribute::StackProtectReq)) 82 Caller->addFnAttr(Attribute::StackProtectReq); 83 else if (Callee->hasFnAttr(Attribute::StackProtect) && 84 !Caller->hasFnAttr(Attribute::StackProtectReq)) 85 Caller->addFnAttr(Attribute::StackProtect); 86 87 88 // Look at all of the allocas that we inlined through this call site. If we 89 // have already inlined other allocas through other calls into this function, 90 // then we know that they have disjoint lifetimes and that we can merge them. 91 // 92 // There are many heuristics possible for merging these allocas, and the 93 // different options have different tradeoffs. One thing that we *really* 94 // don't want to hurt is SRoA: once inlining happens, often allocas are no 95 // longer address taken and so they can be promoted. 96 // 97 // Our "solution" for that is to only merge allocas whose outermost type is an 98 // array type. These are usually not promoted because someone is using a 99 // variable index into them. These are also often the most important ones to 100 // merge. 101 // 102 // A better solution would be to have real memory lifetime markers in the IR 103 // and not have the inliner do any merging of allocas at all. This would 104 // allow the backend to do proper stack slot coloring of all allocas that 105 // *actually make it to the backend*, which is really what we want. 106 // 107 // Because we don't have this information, we do this simple and useful hack. 108 // 109 SmallPtrSet<AllocaInst*, 16> UsedAllocas; 110 111 // Loop over all the allocas we have so far and see if they can be merged with 112 // a previously inlined alloca. If not, remember that we had it. 113 for (unsigned AllocaNo = 0, e = StaticAllocas.size(); 114 AllocaNo != e; ++AllocaNo) { 115 AllocaInst *AI = StaticAllocas[AllocaNo]; 116 117 // Don't bother trying to merge array allocations (they will usually be 118 // canonicalized to be an allocation *of* an array), or allocations whose 119 // type is not itself an array (because we're afraid of pessimizing SRoA). 120 const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); 121 if (ATy == 0 || AI->isArrayAllocation()) 122 continue; 123 124 // Get the list of all available allocas for this array type. 125 std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy]; 126 127 // Loop over the allocas in AllocasForType to see if we can reuse one. Note 128 // that we have to be careful not to reuse the same "available" alloca for 129 // multiple different allocas that we just inlined, we use the 'UsedAllocas' 130 // set to keep track of which "available" allocas are being used by this 131 // function. Also, AllocasForType can be empty of course! 132 bool MergedAwayAlloca = false; 133 for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { 134 AllocaInst *AvailableAlloca = AllocasForType[i]; 135 136 // The available alloca has to be in the right function, not in some other 137 // function in this SCC. 138 if (AvailableAlloca->getParent() != AI->getParent()) 139 continue; 140 141 // If the inlined function already uses this alloca then we can't reuse 142 // it. 143 if (!UsedAllocas.insert(AvailableAlloca)) 144 continue; 145 146 // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare 147 // success! 148 DEBUG(errs() << " ***MERGED ALLOCA: " << *AI); 149 150 AI->replaceAllUsesWith(AvailableAlloca); 151 AI->eraseFromParent(); 152 MergedAwayAlloca = true; 153 ++NumMergedAllocas; 154 break; 155 } 156 157 // If we already nuked the alloca, we're done with it. 158 if (MergedAwayAlloca) 159 continue; 160 161 // If we were unable to merge away the alloca either because there are no 162 // allocas of the right type available or because we reused them all 163 // already, remember that this alloca came from an inlined function and mark 164 // it used so we don't reuse it for other allocas from this inline 165 // operation. 166 AllocasForType.push_back(AI); 167 UsedAllocas.insert(AI); 168 } 169 170 return true; 171 } 172 173 /// shouldInline - Return true if the inliner should attempt to inline 174 /// at the given CallSite. 175 bool Inliner::shouldInline(CallSite CS) { 176 InlineCost IC = getInlineCost(CS); 177 178 if (IC.isAlways()) { 179 DEBUG(errs() << " Inlining: cost=always" 180 << ", Call: " << *CS.getInstruction() << "\n"); 181 return true; 182 } 183 184 if (IC.isNever()) { 185 DEBUG(errs() << " NOT Inlining: cost=never" 186 << ", Call: " << *CS.getInstruction() << "\n"); 187 return false; 188 } 189 190 int Cost = IC.getValue(); 191 int CurrentThreshold = InlineThreshold; 192 Function *Fn = CS.getCaller(); 193 if (Fn && !Fn->isDeclaration() && 194 Fn->hasFnAttr(Attribute::OptimizeForSize) && 195 InlineThreshold != 50) 196 CurrentThreshold = 50; 197 198 float FudgeFactor = getInlineFudgeFactor(CS); 199 if (Cost >= (int)(CurrentThreshold * FudgeFactor)) { 200 DEBUG(errs() << " NOT Inlining: cost=" << Cost 201 << ", Call: " << *CS.getInstruction() << "\n"); 202 return false; 203 } 204 205 DEBUG(errs() << " Inlining: cost=" << Cost 206 << ", Call: " << *CS.getInstruction() << "\n"); 207 return true; 208 } 209 210 bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { 211 CallGraph &CG = getAnalysis<CallGraph>(); 212 const TargetData *TD = getAnalysisIfAvailable<TargetData>(); 213 214 SmallPtrSet<Function*, 8> SCCFunctions; 215 DEBUG(errs() << "Inliner visiting SCC:"); 216 for (unsigned i = 0, e = SCC.size(); i != e; ++i) { 217 Function *F = SCC[i]->getFunction(); 218 if (F) SCCFunctions.insert(F); 219 DEBUG(errs() << " " << (F ? F->getName() : "INDIRECTNODE")); 220 } 221 222 // Scan through and identify all call sites ahead of time so that we only 223 // inline call sites in the original functions, not call sites that result 224 // from inlining other functions. 225 SmallVector<CallSite, 16> CallSites; 226 227 for (unsigned i = 0, e = SCC.size(); i != e; ++i) { 228 Function *F = SCC[i]->getFunction(); 229 if (!F) continue; 230 231 for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) 232 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { 233 CallSite CS = CallSite::get(I); 234 // If this this isn't a call, or it is a call to an intrinsic, it can 235 // never be inlined. 236 if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I)) 237 continue; 238 239 // If this is a direct call to an external function, we can never inline 240 // it. If it is an indirect call, inlining may resolve it to be a 241 // direct call, so we keep it. 242 if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) 243 continue; 244 245 CallSites.push_back(CS); 246 } 247 } 248 249 DEBUG(errs() << ": " << CallSites.size() << " call sites.\n"); 250 251 // Now that we have all of the call sites, move the ones to functions in the 252 // current SCC to the end of the list. 253 unsigned FirstCallInSCC = CallSites.size(); 254 for (unsigned i = 0; i < FirstCallInSCC; ++i) 255 if (Function *F = CallSites[i].getCalledFunction()) 256 if (SCCFunctions.count(F)) 257 std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); 258 259 260 InlinedArrayAllocasTy InlinedArrayAllocas; 261 262 // Now that we have all of the call sites, loop over them and inline them if 263 // it looks profitable to do so. 264 bool Changed = false; 265 bool LocalChange; 266 do { 267 LocalChange = false; 268 // Iterate over the outer loop because inlining functions can cause indirect 269 // calls to become direct calls. 270 for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { 271 CallSite CS = CallSites[CSi]; 272 273 Function *Callee = CS.getCalledFunction(); 274 // We can only inline direct calls to non-declarations. 275 if (Callee == 0 || Callee->isDeclaration()) continue; 276 277 // If the policy determines that we should inline this function, 278 // try to do so. 279 if (!shouldInline(CS)) 280 continue; 281 282 Function *Caller = CS.getCaller(); 283 // Attempt to inline the function... 284 if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas)) 285 continue; 286 287 // If we inlined the last possible call site to the function, delete the 288 // function body now. 289 if (Callee->use_empty() && Callee->hasLocalLinkage() && 290 // TODO: Can remove if in SCC now. 291 !SCCFunctions.count(Callee) && 292 293 // The function may be apparently dead, but if there are indirect 294 // callgraph references to the node, we cannot delete it yet, this 295 // could invalidate the CGSCC iterator. 296 CG[Callee]->getNumReferences() == 0) { 297 DEBUG(errs() << " -> Deleting dead function: " 298 << Callee->getName() << "\n"); 299 CallGraphNode *CalleeNode = CG[Callee]; 300 301 // Remove any call graph edges from the callee to its callees. 302 CalleeNode->removeAllCalledFunctions(); 303 304 resetCachedCostInfo(Callee); 305 306 // Removing the node for callee from the call graph and delete it. 307 delete CG.removeFunctionFromModule(CalleeNode); 308 ++NumDeleted; 309 } 310 311 // Remove any cached cost info for this caller, as inlining the 312 // callee has increased the size of the caller (which may be the 313 // same as the callee). 314 resetCachedCostInfo(Caller); 315 316 // Remove this call site from the list. If possible, use 317 // swap/pop_back for efficiency, but do not use it if doing so would 318 // move a call site to a function in this SCC before the 319 // 'FirstCallInSCC' barrier. 320 if (SCC.size() == 1) { 321 std::swap(CallSites[CSi], CallSites.back()); 322 CallSites.pop_back(); 323 } else { 324 CallSites.erase(CallSites.begin()+CSi); 325 } 326 --CSi; 327 328 ++NumInlined; 329 Changed = true; 330 LocalChange = true; 331 } 332 } while (LocalChange); 333 334 return Changed; 335 } 336 337 // doFinalization - Remove now-dead linkonce functions at the end of 338 // processing to avoid breaking the SCC traversal. 339 bool Inliner::doFinalization(CallGraph &CG) { 340 return removeDeadFunctions(CG); 341 } 342 343 /// removeDeadFunctions - Remove dead functions that are not included in 344 /// DNR (Do Not Remove) list. 345 bool Inliner::removeDeadFunctions(CallGraph &CG, 346 SmallPtrSet<const Function *, 16> *DNR) { 347 SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove; 348 349 // Scan for all of the functions, looking for ones that should now be removed 350 // from the program. Insert the dead ones in the FunctionsToRemove set. 351 for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) { 352 CallGraphNode *CGN = I->second; 353 if (CGN->getFunction() == 0) 354 continue; 355 356 Function *F = CGN->getFunction(); 357 358 // If the only remaining users of the function are dead constants, remove 359 // them. 360 F->removeDeadConstantUsers(); 361 362 if (DNR && DNR->count(F)) 363 continue; 364 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && 365 !F->hasAvailableExternallyLinkage()) 366 continue; 367 if (!F->use_empty()) 368 continue; 369 370 // Remove any call graph edges from the function to its callees. 371 CGN->removeAllCalledFunctions(); 372 373 // Remove any edges from the external node to the function's call graph 374 // node. These edges might have been made irrelegant due to 375 // optimization of the program. 376 CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); 377 378 // Removing the node for callee from the call graph and delete it. 379 FunctionsToRemove.insert(CGN); 380 } 381 382 // Now that we know which functions to delete, do so. We didn't want to do 383 // this inline, because that would invalidate our CallGraph::iterator 384 // objects. :( 385 // 386 // Note that it doesn't matter that we are iterating over a non-stable set 387 // here to do this, it doesn't matter which order the functions are deleted 388 // in. 389 bool Changed = false; 390 for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(), 391 E = FunctionsToRemove.end(); I != E; ++I) { 392 resetCachedCostInfo((*I)->getFunction()); 393 delete CG.removeFunctionFromModule(*I); 394 ++NumDeleted; 395 Changed = true; 396 } 397 398 return Changed; 399 } 400