1 //===- PartialInlining.cpp - Inline parts of functions --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs partial inlining, typically by inlining an if statement 11 // that surrounds the body of the function. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/PartialInlining.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/Analysis/BlockFrequencyInfo.h" 18 #include "llvm/Analysis/BranchProbabilityInfo.h" 19 #include "llvm/Analysis/InlineCost.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/OptimizationDiagnosticInfo.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/TargetLibraryInfo.h" 24 #include "llvm/Analysis/TargetTransformInfo.h" 25 #include "llvm/IR/CFG.h" 26 #include "llvm/IR/DiagnosticInfo.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/Instructions.h" 29 #include "llvm/IR/Module.h" 30 #include "llvm/Pass.h" 31 #include "llvm/Transforms/IPO.h" 32 #include "llvm/Transforms/Utils/Cloning.h" 33 #include "llvm/Transforms/Utils/CodeExtractor.h" 34 using namespace llvm; 35 36 #define DEBUG_TYPE "partial-inlining" 37 38 STATISTIC(NumPartialInlined, 39 "Number of callsites functions partially inlined into."); 40 41 // Command line option to disable partial-inlining. The default is false: 42 static cl::opt<bool> 43 DisablePartialInlining("disable-partial-inlining", cl::init(false), 44 cl::Hidden, cl::desc("Disable partial ininling")); 45 46 static cl::opt<unsigned> MaxNumInlineBlocks( 47 "max-num-inline-blocks", cl::init(5), cl::Hidden, 48 cl::desc("Max Number of Blocks To be Partially Inlined")); 49 50 // Command line option to set the maximum number of partial inlining allowed 51 // for the module. The default value of -1 means no limit. 52 static cl::opt<int> MaxNumPartialInlining( 53 "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, 54 cl::desc("Max number of partial inlining. The default is unlimited")); 55 56 namespace { 57 58 struct FunctionOutliningInfo { 59 FunctionOutliningInfo() 60 : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr), 61 ReturnBlockPreds() {} 62 // Returns the number of blocks to be inlined including all blocks 63 // in Entries and one return block. 64 unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; } 65 66 // A set of blocks including the function entry that guard 67 // the region to be outlined. 68 SmallVector<BasicBlock *, 4> Entries; 69 // The return block that is not included in the outlined region. 70 BasicBlock *ReturnBlock; 71 // The dominating block of the region ot be outlined. 72 BasicBlock *NonReturnBlock; 73 // The set of blocks in Entries that that are predecessors to ReturnBlock 74 SmallVector<BasicBlock *, 4> ReturnBlockPreds; 75 }; 76 77 struct PartialInlinerImpl { 78 PartialInlinerImpl( 79 std::function<AssumptionCache &(Function &)> *GetAC, 80 std::function<TargetTransformInfo &(Function &)> *GTTI, 81 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI, 82 ProfileSummaryInfo *ProfSI) 83 : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {} 84 bool run(Module &M); 85 Function *unswitchFunction(Function *F); 86 87 std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F); 88 89 private: 90 int NumPartialInlining = 0; 91 std::function<AssumptionCache &(Function &)> *GetAssumptionCache; 92 std::function<TargetTransformInfo &(Function &)> *GetTTI; 93 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI; 94 ProfileSummaryInfo *PSI; 95 96 bool shouldPartialInline(CallSite CS, OptimizationRemarkEmitter &ORE); 97 bool IsLimitReached() { 98 return (MaxNumPartialInlining != -1 && 99 NumPartialInlining >= MaxNumPartialInlining); 100 } 101 }; 102 103 struct PartialInlinerLegacyPass : public ModulePass { 104 static char ID; // Pass identification, replacement for typeid 105 PartialInlinerLegacyPass() : ModulePass(ID) { 106 initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry()); 107 } 108 109 void getAnalysisUsage(AnalysisUsage &AU) const override { 110 AU.addRequired<AssumptionCacheTracker>(); 111 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 112 AU.addRequired<TargetTransformInfoWrapperPass>(); 113 } 114 bool runOnModule(Module &M) override { 115 if (skipModule(M)) 116 return false; 117 118 AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>(); 119 TargetTransformInfoWrapperPass *TTIWP = 120 &getAnalysis<TargetTransformInfoWrapperPass>(); 121 ProfileSummaryInfo *PSI = 122 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 123 124 std::function<AssumptionCache &(Function &)> GetAssumptionCache = 125 [&ACT](Function &F) -> AssumptionCache & { 126 return ACT->getAssumptionCache(F); 127 }; 128 129 std::function<TargetTransformInfo &(Function &)> GetTTI = 130 [&TTIWP](Function &F) -> TargetTransformInfo & { 131 return TTIWP->getTTI(F); 132 }; 133 134 return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M); 135 } 136 }; 137 } 138 139 std::unique_ptr<FunctionOutliningInfo> 140 PartialInlinerImpl::computeOutliningInfo(Function *F) { 141 BasicBlock *EntryBlock = &F->front(); 142 BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator()); 143 if (!BR || BR->isUnconditional()) 144 return std::unique_ptr<FunctionOutliningInfo>(); 145 146 // Returns true if Succ is BB's successor 147 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) { 148 return is_contained(successors(BB), Succ); 149 }; 150 151 auto SuccSize = [](BasicBlock *BB) { 152 return std::distance(succ_begin(BB), succ_end(BB)); 153 }; 154 155 auto IsReturnBlock = [](BasicBlock *BB) { 156 TerminatorInst *TI = BB->getTerminator(); 157 return isa<ReturnInst>(TI); 158 }; 159 160 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) { 161 if (IsReturnBlock(Succ1)) 162 return std::make_tuple(Succ1, Succ2); 163 if (IsReturnBlock(Succ2)) 164 return std::make_tuple(Succ2, Succ1); 165 166 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr); 167 }; 168 169 // Detect a triangular shape: 170 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) { 171 if (IsSuccessor(Succ1, Succ2)) 172 return std::make_tuple(Succ1, Succ2); 173 if (IsSuccessor(Succ2, Succ1)) 174 return std::make_tuple(Succ2, Succ1); 175 176 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr); 177 }; 178 179 std::unique_ptr<FunctionOutliningInfo> OutliningInfo = 180 llvm::make_unique<FunctionOutliningInfo>(); 181 182 BasicBlock *CurrEntry = EntryBlock; 183 bool CandidateFound = false; 184 do { 185 // The number of blocks to be inlined has already reached 186 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this 187 // disables partial inlining for the function. 188 if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks) 189 break; 190 191 if (SuccSize(CurrEntry) != 2) 192 break; 193 194 BasicBlock *Succ1 = *succ_begin(CurrEntry); 195 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1); 196 197 BasicBlock *ReturnBlock, *NonReturnBlock; 198 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); 199 200 if (ReturnBlock) { 201 OutliningInfo->Entries.push_back(CurrEntry); 202 OutliningInfo->ReturnBlock = ReturnBlock; 203 OutliningInfo->NonReturnBlock = NonReturnBlock; 204 CandidateFound = true; 205 break; 206 } 207 208 BasicBlock *CommSucc; 209 BasicBlock *OtherSucc; 210 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2); 211 212 if (!CommSucc) 213 break; 214 215 OutliningInfo->Entries.push_back(CurrEntry); 216 CurrEntry = OtherSucc; 217 218 } while (true); 219 220 if (!CandidateFound) 221 return std::unique_ptr<FunctionOutliningInfo>(); 222 223 // Do sanity check of the entries: threre should not 224 // be any successors (not in the entry set) other than 225 // {ReturnBlock, NonReturnBlock} 226 assert(OutliningInfo->Entries[0] == &F->front()); 227 DenseSet<BasicBlock *> Entries; 228 for (BasicBlock *E : OutliningInfo->Entries) 229 Entries.insert(E); 230 231 // Returns true of BB has Predecessor which is not 232 // in Entries set. 233 auto HasNonEntryPred = [Entries](BasicBlock *BB) { 234 for (auto Pred : predecessors(BB)) { 235 if (!Entries.count(Pred)) 236 return true; 237 } 238 return false; 239 }; 240 auto CheckAndNormalizeCandidate = 241 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) { 242 for (BasicBlock *E : OutliningInfo->Entries) { 243 for (auto Succ : successors(E)) { 244 if (Entries.count(Succ)) 245 continue; 246 if (Succ == OutliningInfo->ReturnBlock) 247 OutliningInfo->ReturnBlockPreds.push_back(E); 248 else if (Succ != OutliningInfo->NonReturnBlock) 249 return false; 250 } 251 // There should not be any outside incoming edges either: 252 if (HasNonEntryPred(E)) 253 return false; 254 } 255 return true; 256 }; 257 258 if (!CheckAndNormalizeCandidate(OutliningInfo.get())) 259 return std::unique_ptr<FunctionOutliningInfo>(); 260 261 // Now further growing the candidate's inlining region by 262 // peeling off dominating blocks from the outlining region: 263 while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) { 264 BasicBlock *Cand = OutliningInfo->NonReturnBlock; 265 if (SuccSize(Cand) != 2) 266 break; 267 268 if (HasNonEntryPred(Cand)) 269 break; 270 271 BasicBlock *Succ1 = *succ_begin(Cand); 272 BasicBlock *Succ2 = *(succ_begin(Cand) + 1); 273 274 BasicBlock *ReturnBlock, *NonReturnBlock; 275 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); 276 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock) 277 break; 278 279 if (NonReturnBlock->getSinglePredecessor() != Cand) 280 break; 281 282 // Now grow and update OutlininigInfo: 283 OutliningInfo->Entries.push_back(Cand); 284 OutliningInfo->NonReturnBlock = NonReturnBlock; 285 OutliningInfo->ReturnBlockPreds.push_back(Cand); 286 Entries.insert(Cand); 287 } 288 289 return OutliningInfo; 290 } 291 292 bool PartialInlinerImpl::shouldPartialInline(CallSite CS, 293 OptimizationRemarkEmitter &ORE) { 294 // TODO : more sharing with shouldInline in Inliner.cpp 295 using namespace ore; 296 Instruction *Call = CS.getInstruction(); 297 Function *Callee = CS.getCalledFunction(); 298 Function *Caller = CS.getCaller(); 299 auto &CalleeTTI = (*GetTTI)(*Callee); 300 InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, 301 *GetAssumptionCache, GetBFI, PSI); 302 303 if (IC.isAlways()) { 304 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) 305 << NV("Callee", Callee) 306 << " should always be fully inlined, not partially"); 307 return false; 308 } 309 310 if (IC.isNever()) { 311 ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) 312 << NV("Callee", Callee) << " not partially inlined into " 313 << NV("Caller", Caller) 314 << " because it should never be inlined (cost=never)"); 315 return false; 316 } 317 318 if (!IC) { 319 ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) 320 << NV("Callee", Callee) << " not partially inlined into " 321 << NV("Caller", Caller) << " because too costly to inline (cost=" 322 << NV("Cost", IC.getCost()) << ", threshold=" 323 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); 324 return false; 325 } 326 327 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) 328 << NV("Callee", Callee) << " can be partially inlined into " 329 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) 330 << " (threshold=" 331 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); 332 return true; 333 } 334 335 Function *PartialInlinerImpl::unswitchFunction(Function *F) { 336 337 if (F->hasAddressTaken()) 338 return nullptr; 339 340 // Let inliner handle it 341 if (F->hasFnAttribute(Attribute::AlwaysInline)) 342 return nullptr; 343 344 if (F->hasFnAttribute(Attribute::NoInline)) 345 return nullptr; 346 347 if (PSI->isFunctionEntryCold(F)) 348 return nullptr; 349 350 std::unique_ptr<FunctionOutliningInfo> OutliningInfo = 351 computeOutliningInfo(F); 352 353 if (!OutliningInfo) 354 return nullptr; 355 356 // Clone the function, so that we can hack away on it. 357 ValueToValueMapTy VMap; 358 Function *DuplicateFunction = CloneFunction(F, VMap); 359 BasicBlock *NewReturnBlock = 360 cast<BasicBlock>(VMap[OutliningInfo->ReturnBlock]); 361 BasicBlock *NewNonReturnBlock = 362 cast<BasicBlock>(VMap[OutliningInfo->NonReturnBlock]); 363 DenseSet<BasicBlock *> NewEntries; 364 for (BasicBlock *BB : OutliningInfo->Entries) { 365 NewEntries.insert(cast<BasicBlock>(VMap[BB])); 366 } 367 368 // Go ahead and update all uses to the duplicate, so that we can just 369 // use the inliner functionality when we're done hacking. 370 F->replaceAllUsesWith(DuplicateFunction); 371 372 auto getFirstPHI = [](BasicBlock *BB) { 373 BasicBlock::iterator I = BB->begin(); 374 PHINode *FirstPhi = nullptr; 375 while (I != BB->end()) { 376 PHINode *Phi = dyn_cast<PHINode>(I); 377 if (!Phi) 378 break; 379 if (!FirstPhi) { 380 FirstPhi = Phi; 381 break; 382 } 383 } 384 return FirstPhi; 385 }; 386 // Special hackery is needed with PHI nodes that have inputs from more than 387 // one extracted block. For simplicity, just split the PHIs into a two-level 388 // sequence of PHIs, some of which will go in the extracted region, and some 389 // of which will go outside. 390 BasicBlock *PreReturn = NewReturnBlock; 391 // only split block when necessary: 392 PHINode *FirstPhi = getFirstPHI(PreReturn); 393 unsigned NumPredsFromEntries = OutliningInfo->ReturnBlockPreds.size(); 394 if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) { 395 396 NewReturnBlock = NewReturnBlock->splitBasicBlock( 397 NewReturnBlock->getFirstNonPHI()->getIterator()); 398 BasicBlock::iterator I = PreReturn->begin(); 399 Instruction *Ins = &NewReturnBlock->front(); 400 while (I != PreReturn->end()) { 401 PHINode *OldPhi = dyn_cast<PHINode>(I); 402 if (!OldPhi) 403 break; 404 405 PHINode *RetPhi = 406 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins); 407 OldPhi->replaceAllUsesWith(RetPhi); 408 Ins = NewReturnBlock->getFirstNonPHI(); 409 410 RetPhi->addIncoming(&*I, PreReturn); 411 for (BasicBlock *E : OutliningInfo->ReturnBlockPreds) { 412 BasicBlock *NewE = cast<BasicBlock>(VMap[E]); 413 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE); 414 OldPhi->removeIncomingValue(NewE); 415 } 416 ++I; 417 } 418 for (auto E : OutliningInfo->ReturnBlockPreds) { 419 BasicBlock *NewE = cast<BasicBlock>(VMap[E]); 420 NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); 421 } 422 } 423 424 // Returns true if the block is to be partial inlined into the caller 425 // (i.e. not to be extracted to the out of line function) 426 auto ToBeInlined = [&](BasicBlock *BB) { 427 return BB == NewReturnBlock || NewEntries.count(BB); 428 }; 429 // Gather up the blocks that we're going to extract. 430 std::vector<BasicBlock *> ToExtract; 431 ToExtract.push_back(NewNonReturnBlock); 432 for (BasicBlock &BB : *DuplicateFunction) 433 if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock) 434 ToExtract.push_back(&BB); 435 436 // The CodeExtractor needs a dominator tree. 437 DominatorTree DT; 438 DT.recalculate(*DuplicateFunction); 439 440 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. 441 LoopInfo LI(DT); 442 BranchProbabilityInfo BPI(*DuplicateFunction, LI); 443 BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI); 444 445 // Extract the body of the if. 446 Function *ExtractedFunction = 447 CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI) 448 .extractCodeRegion(); 449 450 // Inline the top-level if test into all callers. 451 std::vector<User *> Users(DuplicateFunction->user_begin(), 452 DuplicateFunction->user_end()); 453 454 for (User *User : Users) { 455 CallSite CS; 456 if (CallInst *CI = dyn_cast<CallInst>(User)) 457 CS = CallSite(CI); 458 else if (InvokeInst *II = dyn_cast<InvokeInst>(User)) 459 CS = CallSite(II); 460 else 461 llvm_unreachable("All uses must be calls"); 462 463 if (IsLimitReached()) 464 continue; 465 466 OptimizationRemarkEmitter ORE(CS.getCaller()); 467 if (!shouldPartialInline(CS, ORE)) 468 continue; 469 470 DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); 471 BasicBlock *Block = CS.getParent(); 472 ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block) 473 << ore::NV("Callee", F) << " partially inlined into " 474 << ore::NV("Caller", CS.getCaller())); 475 476 InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI); 477 InlineFunction(CS, IFI); 478 NumPartialInlining++; 479 // update stats 480 NumPartialInlined++; 481 } 482 483 // Ditch the duplicate, since we're done with it, and rewrite all remaining 484 // users (function pointers, etc.) back to the original function. 485 DuplicateFunction->replaceAllUsesWith(F); 486 DuplicateFunction->eraseFromParent(); 487 488 489 return ExtractedFunction; 490 } 491 492 bool PartialInlinerImpl::run(Module &M) { 493 if (DisablePartialInlining) 494 return false; 495 496 std::vector<Function *> Worklist; 497 Worklist.reserve(M.size()); 498 for (Function &F : M) 499 if (!F.use_empty() && !F.isDeclaration()) 500 Worklist.push_back(&F); 501 502 bool Changed = false; 503 while (!Worklist.empty()) { 504 Function *CurrFunc = Worklist.back(); 505 Worklist.pop_back(); 506 507 if (CurrFunc->use_empty()) 508 continue; 509 510 bool Recursive = false; 511 for (User *U : CurrFunc->users()) 512 if (Instruction *I = dyn_cast<Instruction>(U)) 513 if (I->getParent()->getParent() == CurrFunc) { 514 Recursive = true; 515 break; 516 } 517 if (Recursive) 518 continue; 519 520 if (Function *NewFunc = unswitchFunction(CurrFunc)) { 521 Worklist.push_back(NewFunc); 522 Changed = true; 523 } 524 } 525 526 return Changed; 527 } 528 529 char PartialInlinerLegacyPass::ID = 0; 530 INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", 531 "Partial Inliner", false, false) 532 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 533 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 534 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 535 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner", 536 "Partial Inliner", false, false) 537 538 ModulePass *llvm::createPartialInliningPass() { 539 return new PartialInlinerLegacyPass(); 540 } 541 542 PreservedAnalyses PartialInlinerPass::run(Module &M, 543 ModuleAnalysisManager &AM) { 544 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 545 546 std::function<AssumptionCache &(Function &)> GetAssumptionCache = 547 [&FAM](Function &F) -> AssumptionCache & { 548 return FAM.getResult<AssumptionAnalysis>(F); 549 }; 550 551 std::function<BlockFrequencyInfo &(Function &)> GetBFI = 552 [&FAM](Function &F) -> BlockFrequencyInfo & { 553 return FAM.getResult<BlockFrequencyAnalysis>(F); 554 }; 555 556 std::function<TargetTransformInfo &(Function &)> GetTTI = 557 [&FAM](Function &F) -> TargetTransformInfo & { 558 return FAM.getResult<TargetIRAnalysis>(F); 559 }; 560 561 ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 562 563 if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M)) 564 return PreservedAnalyses::none(); 565 return PreservedAnalyses::all(); 566 } 567