1 //===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Outline cold regions to a separate function. 11 // TODO: Update BFI and BPI 12 // TODO: Add all the outlined functions to a separate section. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/Statistic.h" 18 #include "llvm/Analysis/AliasAnalysis.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/BranchProbabilityInfo.h" 21 #include "llvm/Analysis/CFG.h" 22 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 23 #include "llvm/Analysis/PostDominators.h" 24 #include "llvm/Analysis/ProfileSummaryInfo.h" 25 #include "llvm/Analysis/TargetTransformInfo.h" 26 #include "llvm/IR/BasicBlock.h" 27 #include "llvm/IR/CFG.h" 28 #include "llvm/IR/DataLayout.h" 29 #include "llvm/IR/DiagnosticInfo.h" 30 #include "llvm/IR/Dominators.h" 31 #include "llvm/IR/Function.h" 32 #include "llvm/IR/Instruction.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/Metadata.h" 35 #include "llvm/IR/Module.h" 36 #include "llvm/IR/PassManager.h" 37 #include "llvm/IR/Type.h" 38 #include "llvm/IR/Use.h" 39 #include "llvm/IR/User.h" 40 #include "llvm/IR/Value.h" 41 #include "llvm/Pass.h" 42 #include "llvm/Support/BlockFrequency.h" 43 #include "llvm/Support/BranchProbability.h" 44 #include "llvm/Support/Debug.h" 45 #include "llvm/Support/raw_ostream.h" 46 #include "llvm/Transforms/IPO.h" 47 #include "llvm/Transforms/IPO/HotColdSplitting.h" 48 #include "llvm/Transforms/Scalar.h" 49 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 50 #include "llvm/Transforms/Utils/Cloning.h" 51 #include "llvm/Transforms/Utils/CodeExtractor.h" 52 #include "llvm/Transforms/Utils/Local.h" 53 #include "llvm/Transforms/Utils/SSAUpdater.h" 54 #include "llvm/Transforms/Utils/ValueMapper.h" 55 #include <algorithm> 56 #include <cassert> 57 58 #define DEBUG_TYPE "hotcoldsplit" 59 60 STATISTIC(NumColdSESEFound, 61 "Number of cold single entry single exit (SESE) regions found."); 62 STATISTIC(NumColdSESEOutlined, 63 "Number of cold single entry single exit (SESE) regions outlined."); 64 65 using namespace llvm; 66 67 static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis", 68 cl::init(true), cl::Hidden); 69 70 71 namespace { 72 73 struct PostDomTree : PostDomTreeBase<BasicBlock> { 74 PostDomTree(Function &F) { recalculate(F); } 75 }; 76 77 typedef DenseSet<const BasicBlock *> DenseSetBB; 78 typedef DenseMap<const BasicBlock *, uint64_t> DenseMapBBInt; 79 80 // From: https://reviews.llvm.org/D22558 81 // Exit is not part of the region. 82 static bool isSingleEntrySingleExit(BasicBlock *Entry, const BasicBlock *Exit, 83 DominatorTree *DT, PostDomTree *PDT, 84 SmallVectorImpl<BasicBlock *> &Region) { 85 if (!DT->dominates(Entry, Exit)) 86 return false; 87 88 if (!PDT->dominates(Exit, Entry)) 89 return false; 90 91 for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) { 92 if (*I == Exit) { 93 I.skipChildren(); 94 continue; 95 } 96 if (!DT->dominates(Entry, *I)) 97 return false; 98 Region.push_back(*I); 99 ++I; 100 } 101 return true; 102 } 103 104 bool blockEndsInUnreachable(const BasicBlock &BB) { 105 if (BB.empty()) 106 return true; 107 const TerminatorInst *I = BB.getTerminator(); 108 if (isa<ReturnInst>(I) || isa<IndirectBrInst>(I)) 109 return true; 110 // Unreachable blocks do not have any successor. 111 return succ_empty(&BB); 112 } 113 114 static bool exceptionHandlingFunctions(const CallInst *CI) { 115 auto F = CI->getCalledFunction(); 116 if (!F) 117 return false; 118 auto FName = F->getName(); 119 return FName == "__cxa_begin_catch" || 120 FName == "__cxa_free_exception" || 121 FName == "__cxa_allocate_exception" || 122 FName == "__cxa_begin_catch" || 123 FName == "__cxa_end_catch"; 124 } 125 126 static 127 bool unlikelyExecuted(const BasicBlock &BB) { 128 if (blockEndsInUnreachable(BB)) 129 return true; 130 // Exception handling blocks are unlikely executed. 131 if (BB.isEHPad()) 132 return true; 133 for (const Instruction &I : BB) 134 if (const CallInst *CI = dyn_cast<CallInst>(&I)) { 135 // The block is cold if it calls functions tagged as cold or noreturn. 136 if (CI->hasFnAttr(Attribute::Cold) || 137 CI->hasFnAttr(Attribute::NoReturn) || 138 exceptionHandlingFunctions(CI)) 139 return true; 140 141 // Assume that inline assembly is hot code. 142 if (isa<InlineAsm>(CI->getCalledValue())) 143 return false; 144 } 145 return false; 146 } 147 148 static DenseSetBB getHotBlocks(Function &F) { 149 150 // Mark all cold basic blocks. 151 DenseSetBB ColdBlocks; 152 for (BasicBlock &BB : F) 153 if (unlikelyExecuted(BB)) 154 ColdBlocks.insert((const BasicBlock *)&BB); 155 156 // Forward propagation: basic blocks are hot when they are reachable from the 157 // beginning of the function through a path that does not contain cold blocks. 158 SmallVector<const BasicBlock *, 8> WL; 159 DenseSetBB HotBlocks; 160 161 const BasicBlock *It = &F.front(); 162 if (!ColdBlocks.count(It)) { 163 HotBlocks.insert(It); 164 // Breadth First Search to mark edges reachable from hot. 165 WL.push_back(It); 166 while (WL.size() > 0) { 167 It = WL.pop_back_val(); 168 169 for (const BasicBlock *Succ : successors(It)) { 170 // Do not visit blocks that are cold. 171 if (!ColdBlocks.count(Succ) && !HotBlocks.count(Succ)) { 172 HotBlocks.insert(Succ); 173 WL.push_back(Succ); 174 } 175 } 176 } 177 } 178 179 assert(WL.empty() && "work list should be empty"); 180 181 DenseMapBBInt NumHotSuccessors; 182 // Back propagation: when all successors of a basic block are cold, the 183 // basic block is cold as well. 184 for (BasicBlock &BBRef : F) { 185 const BasicBlock *BB = &BBRef; 186 if (HotBlocks.count(BB)) { 187 // Keep a count of hot successors for every hot block. 188 NumHotSuccessors[BB] = 0; 189 for (const BasicBlock *Succ : successors(BB)) 190 if (!ColdBlocks.count(Succ)) 191 NumHotSuccessors[BB] += 1; 192 193 // Add to work list the blocks with all successors cold. Those are the 194 // root nodes in the next loop, where we will move those blocks from 195 // HotBlocks to ColdBlocks and iterate over their predecessors. 196 if (NumHotSuccessors[BB] == 0) 197 WL.push_back(BB); 198 } 199 } 200 201 while (WL.size() > 0) { 202 It = WL.pop_back_val(); 203 if (ColdBlocks.count(It)) 204 continue; 205 206 // Move the block from HotBlocks to ColdBlocks. 207 HotBlocks.erase(It); 208 ColdBlocks.insert(It); 209 210 // Iterate over the predecessors. 211 for (const BasicBlock *Pred : predecessors(It)) { 212 if (HotBlocks.count(Pred)) { 213 NumHotSuccessors[Pred] -= 1; 214 215 // If Pred has no more hot successors, add it to the work list. 216 if (NumHotSuccessors[Pred] == 0) 217 WL.push_back(Pred); 218 } 219 } 220 } 221 222 return HotBlocks; 223 } 224 225 class HotColdSplitting { 226 public: 227 HotColdSplitting(ProfileSummaryInfo *ProfSI, 228 function_ref<BlockFrequencyInfo *(Function &)> GBFI, 229 function_ref<TargetTransformInfo &(Function &)> GTTI, 230 std::function<OptimizationRemarkEmitter &(Function &)> *GORE) 231 : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {} 232 bool run(Module &M); 233 234 private: 235 bool shouldOutlineFrom(const Function &F) const; 236 const Function *outlineColdBlocks(Function &F, const DenseSetBB &ColdBlock, 237 DominatorTree *DT, PostDomTree *PDT); 238 Function *extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region, 239 DominatorTree *DT, BlockFrequencyInfo *BFI, 240 OptimizationRemarkEmitter &ORE); 241 bool isOutlineCandidate(const SmallVectorImpl<BasicBlock *> &Region, 242 const BasicBlock *Exit) const { 243 if (!Exit) 244 return false; 245 246 // Regions with landing pads etc. 247 for (const BasicBlock *BB : Region) { 248 if (BB->isEHPad() || BB->hasAddressTaken()) 249 return false; 250 } 251 return true; 252 } 253 SmallPtrSet<const Function *, 2> OutlinedFunctions; 254 ProfileSummaryInfo *PSI; 255 function_ref<BlockFrequencyInfo *(Function &)> GetBFI; 256 function_ref<TargetTransformInfo &(Function &)> GetTTI; 257 std::function<OptimizationRemarkEmitter &(Function &)> *GetORE; 258 }; 259 260 class HotColdSplittingLegacyPass : public ModulePass { 261 public: 262 static char ID; 263 HotColdSplittingLegacyPass() : ModulePass(ID) { 264 initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry()); 265 } 266 267 void getAnalysisUsage(AnalysisUsage &AU) const override { 268 AU.addRequired<AssumptionCacheTracker>(); 269 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 270 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 271 AU.addRequired<TargetTransformInfoWrapperPass>(); 272 } 273 274 bool runOnModule(Module &M) override; 275 }; 276 277 } // end anonymous namespace 278 279 // Returns false if the function should not be considered for hot-cold split 280 // optimization. 281 bool HotColdSplitting::shouldOutlineFrom(const Function &F) const { 282 // Do not try to outline again from an already outlined cold function. 283 if (OutlinedFunctions.count(&F)) 284 return false; 285 286 if (F.size() <= 2) 287 return false; 288 289 if (F.hasAddressTaken()) 290 return false; 291 292 if (F.hasFnAttribute(Attribute::AlwaysInline)) 293 return false; 294 295 if (F.hasFnAttribute(Attribute::NoInline)) 296 return false; 297 298 if (F.getCallingConv() == CallingConv::Cold) 299 return false; 300 301 if (PSI->isFunctionEntryCold(&F)) 302 return false; 303 return true; 304 } 305 306 Function * 307 HotColdSplitting::extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region, 308 DominatorTree *DT, BlockFrequencyInfo *BFI, 309 OptimizationRemarkEmitter &ORE) { 310 LLVM_DEBUG(for (auto *BB : Region) 311 llvm::dbgs() << "\nExtracting: " << *BB;); 312 313 // TODO: Pass BFI and BPI to update profile information. 314 CodeExtractor CE(Region, DT); 315 316 SetVector<Value *> Inputs, Outputs, Sinks; 317 CE.findInputsOutputs(Inputs, Outputs, Sinks); 318 319 // Do not extract regions that have live exit variables. 320 if (Outputs.size() > 0) 321 return nullptr; 322 323 if (Function *OutF = CE.extractCodeRegion()) { 324 User *U = *OutF->user_begin(); 325 CallInst *CI = cast<CallInst>(U); 326 CallSite CS(CI); 327 NumColdSESEOutlined++; 328 if (GetTTI(*OutF).useColdCCForColdCall(*OutF)) { 329 OutF->setCallingConv(CallingConv::Cold); 330 CS.setCallingConv(CallingConv::Cold); 331 } 332 CI->setIsNoInline(); 333 LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF); 334 return OutF; 335 } 336 337 ORE.emit([&]() { 338 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed", 339 &*Region[0]->begin()) 340 << "Failed to extract region at block " 341 << ore::NV("Block", Region.front()); 342 }); 343 return nullptr; 344 } 345 346 // Return the function created after outlining, nullptr otherwise. 347 const Function *HotColdSplitting::outlineColdBlocks(Function &F, 348 const DenseSetBB &HotBlocks, 349 DominatorTree *DT, 350 PostDomTree *PDT) { 351 auto BFI = GetBFI(F); 352 auto &ORE = (*GetORE)(F); 353 // Walking the dominator tree allows us to find the largest 354 // cold region. 355 BasicBlock *Begin = DT->getRootNode()->getBlock(); 356 for (auto I = df_begin(Begin), E = df_end(Begin); I != E; ++I) { 357 BasicBlock *BB = *I; 358 if (PSI->isColdBB(BB, BFI) || !HotBlocks.count(BB)) { 359 SmallVector<BasicBlock *, 4> ValidColdRegion, Region; 360 BasicBlock *Exit = (*PDT)[BB]->getIDom()->getBlock(); 361 BasicBlock *ExitColdRegion = nullptr; 362 363 // Estimated cold region between a BB and its dom-frontier. 364 while (Exit && isSingleEntrySingleExit(BB, Exit, DT, PDT, Region) && 365 isOutlineCandidate(Region, Exit)) { 366 ExitColdRegion = Exit; 367 ValidColdRegion = Region; 368 Region.clear(); 369 // Update Exit recursively to its dom-frontier. 370 Exit = (*PDT)[Exit]->getIDom()->getBlock(); 371 } 372 if (ExitColdRegion) { 373 // Do not outline a region with only one block. 374 if (ValidColdRegion.size() == 1) 375 continue; 376 377 ++NumColdSESEFound; 378 ValidColdRegion.push_back(ExitColdRegion); 379 // Candidate for outlining. FIXME: Continue outlining. 380 return extractColdRegion(ValidColdRegion, DT, BFI, ORE); 381 } 382 } 383 } 384 return nullptr; 385 } 386 387 bool HotColdSplitting::run(Module &M) { 388 for (auto &F : M) { 389 if (!shouldOutlineFrom(F)) 390 continue; 391 DominatorTree DT(F); 392 PostDomTree PDT(F); 393 PDT.recalculate(F); 394 DenseSetBB HotBlocks; 395 if (EnableStaticAnalyis) // Static analysis of cold blocks. 396 HotBlocks = getHotBlocks(F); 397 398 const Function *Outlined = outlineColdBlocks(F, HotBlocks, &DT, &PDT); 399 if (Outlined) 400 OutlinedFunctions.insert(Outlined); 401 } 402 return true; 403 } 404 405 bool HotColdSplittingLegacyPass::runOnModule(Module &M) { 406 if (skipModule(M)) 407 return false; 408 ProfileSummaryInfo *PSI = 409 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 410 auto GTTI = [this](Function &F) -> TargetTransformInfo & { 411 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 412 }; 413 auto GBFI = [this](Function &F) { 414 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 415 }; 416 std::unique_ptr<OptimizationRemarkEmitter> ORE; 417 std::function<OptimizationRemarkEmitter &(Function &)> GetORE = 418 [&ORE](Function &F) -> OptimizationRemarkEmitter & { 419 ORE.reset(new OptimizationRemarkEmitter(&F)); 420 return *ORE.get(); 421 }; 422 423 return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M); 424 } 425 426 PreservedAnalyses 427 HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) { 428 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 429 430 std::function<AssumptionCache &(Function &)> GetAssumptionCache = 431 [&FAM](Function &F) -> AssumptionCache & { 432 return FAM.getResult<AssumptionAnalysis>(F); 433 }; 434 435 auto GBFI = [&FAM](Function &F) { 436 return &FAM.getResult<BlockFrequencyAnalysis>(F); 437 }; 438 439 std::function<TargetTransformInfo &(Function &)> GTTI = 440 [&FAM](Function &F) -> TargetTransformInfo & { 441 return FAM.getResult<TargetIRAnalysis>(F); 442 }; 443 444 std::unique_ptr<OptimizationRemarkEmitter> ORE; 445 std::function<OptimizationRemarkEmitter &(Function &)> GetORE = 446 [&ORE](Function &F) -> OptimizationRemarkEmitter & { 447 ORE.reset(new OptimizationRemarkEmitter(&F)); 448 return *ORE.get(); 449 }; 450 451 ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 452 453 if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M)) 454 return PreservedAnalyses::none(); 455 return PreservedAnalyses::all(); 456 } 457 458 char HotColdSplittingLegacyPass::ID = 0; 459 INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit", 460 "Hot Cold Splitting", false, false) 461 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 462 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 463 INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit", 464 "Hot Cold Splitting", false, false) 465 466 ModulePass *llvm::createHotColdSplittingPass() { 467 return new HotColdSplittingLegacyPass(); 468 } 469