1 //===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Outline cold regions to a separate function. 11 // TODO: Update BFI and BPI 12 // TODO: Add all the outlined functions to a separate section. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/Statistic.h" 18 #include "llvm/Analysis/AliasAnalysis.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/BranchProbabilityInfo.h" 21 #include "llvm/Analysis/CFG.h" 22 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 23 #include "llvm/Analysis/PostDominators.h" 24 #include "llvm/Analysis/ProfileSummaryInfo.h" 25 #include "llvm/Analysis/TargetTransformInfo.h" 26 #include "llvm/IR/BasicBlock.h" 27 #include "llvm/IR/CFG.h" 28 #include "llvm/IR/DataLayout.h" 29 #include "llvm/IR/DiagnosticInfo.h" 30 #include "llvm/IR/Dominators.h" 31 #include "llvm/IR/Function.h" 32 #include "llvm/IR/Instruction.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/Metadata.h" 35 #include "llvm/IR/Module.h" 36 #include "llvm/IR/PassManager.h" 37 #include "llvm/IR/Type.h" 38 #include "llvm/IR/Use.h" 39 #include "llvm/IR/User.h" 40 #include "llvm/IR/Value.h" 41 #include "llvm/Pass.h" 42 #include "llvm/Support/BlockFrequency.h" 43 #include "llvm/Support/BranchProbability.h" 44 #include "llvm/Support/Debug.h" 45 #include "llvm/Support/raw_ostream.h" 46 #include "llvm/Transforms/IPO.h" 47 #include "llvm/Transforms/Scalar.h" 48 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 49 #include "llvm/Transforms/Utils/Cloning.h" 50 #include "llvm/Transforms/Utils/CodeExtractor.h" 51 #include "llvm/Transforms/Utils/Local.h" 52 #include "llvm/Transforms/Utils/SSAUpdater.h" 53 #include "llvm/Transforms/Utils/ValueMapper.h" 54 #include <algorithm> 55 #include <cassert> 56 57 #define DEBUG_TYPE "hotcoldsplit" 58 59 STATISTIC(NumColdSESEFound, 60 "Number of cold single entry single exit (SESE) regions found."); 61 STATISTIC(NumColdSESEOutlined, 62 "Number of cold single entry single exit (SESE) regions outlined."); 63 64 using namespace llvm; 65 66 static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis", 67 cl::init(true), cl::Hidden); 68 69 70 namespace { 71 72 struct PostDomTree : PostDomTreeBase<BasicBlock> { 73 PostDomTree(Function &F) { recalculate(F); } 74 }; 75 76 typedef DenseSet<const BasicBlock *> DenseSetBB; 77 typedef DenseMap<const BasicBlock *, uint64_t> DenseMapBBInt; 78 79 // From: https://reviews.llvm.org/D22558 80 // Exit is not part of the region. 81 static bool isSingleEntrySingleExit(BasicBlock *Entry, const BasicBlock *Exit, 82 DominatorTree *DT, PostDomTree *PDT, 83 SmallVectorImpl<BasicBlock *> &Region) { 84 if (!DT->dominates(Entry, Exit)) 85 return false; 86 87 if (!PDT->dominates(Exit, Entry)) 88 return false; 89 90 for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) { 91 if (*I == Exit) { 92 I.skipChildren(); 93 continue; 94 } 95 if (!DT->dominates(Entry, *I)) 96 return false; 97 Region.push_back(*I); 98 ++I; 99 } 100 return true; 101 } 102 103 bool blockEndsInUnreachable(const BasicBlock &BB) { 104 if (BB.empty()) 105 return true; 106 const TerminatorInst *I = BB.getTerminator(); 107 if (isa<ReturnInst>(I) || isa<IndirectBrInst>(I)) 108 return true; 109 // Unreachable blocks do not have any successor. 110 return succ_empty(&BB); 111 } 112 113 static 114 bool unlikelyExecuted(const BasicBlock &BB) { 115 if (blockEndsInUnreachable(BB)) 116 return true; 117 // Exception handling blocks are unlikely executed. 118 if (BB.isEHPad()) 119 return true; 120 for (const Instruction &I : BB) 121 if (const CallInst *CI = dyn_cast<CallInst>(&I)) { 122 // The block is cold if it calls functions tagged as cold or noreturn. 123 if (CI->hasFnAttr(Attribute::Cold) || 124 CI->hasFnAttr(Attribute::NoReturn)) 125 return true; 126 127 // Assume that inline assembly is hot code. 128 if (isa<InlineAsm>(CI->getCalledValue())) 129 return false; 130 } 131 return false; 132 } 133 134 static DenseSetBB getHotBlocks(Function &F) { 135 136 // Mark all cold basic blocks. 137 DenseSetBB ColdBlocks; 138 for (BasicBlock &BB : F) 139 if (unlikelyExecuted(BB)) 140 ColdBlocks.insert((const BasicBlock *)&BB); 141 142 // Forward propagation: basic blocks are hot when they are reachable from the 143 // beginning of the function through a path that does not contain cold blocks. 144 SmallVector<const BasicBlock *, 8> WL; 145 DenseSetBB HotBlocks; 146 147 const BasicBlock *It = &F.front(); 148 if (!ColdBlocks.count(It)) { 149 HotBlocks.insert(It); 150 // Breadth First Search to mark edges reachable from hot. 151 WL.push_back(It); 152 while (WL.size() > 0) { 153 It = WL.pop_back_val(); 154 155 for (const BasicBlock *Succ : successors(It)) { 156 // Do not visit blocks that are cold. 157 if (!ColdBlocks.count(Succ) && !HotBlocks.count(Succ)) { 158 HotBlocks.insert(Succ); 159 WL.push_back(Succ); 160 } 161 } 162 } 163 } 164 165 assert(WL.empty() && "work list should be empty"); 166 167 DenseMapBBInt NumHotSuccessors; 168 // Back propagation: when all successors of a basic block are cold, the 169 // basic block is cold as well. 170 for (BasicBlock &BBRef : F) { 171 const BasicBlock *BB = &BBRef; 172 if (HotBlocks.count(BB)) { 173 // Keep a count of hot successors for every hot block. 174 NumHotSuccessors[BB] = 0; 175 for (const BasicBlock *Succ : successors(BB)) 176 if (!ColdBlocks.count(Succ)) 177 NumHotSuccessors[BB] += 1; 178 179 // Add to work list the blocks with all successors cold. Those are the 180 // root nodes in the next loop, where we will move those blocks from 181 // HotBlocks to ColdBlocks and iterate over their predecessors. 182 if (NumHotSuccessors[BB] == 0) 183 WL.push_back(BB); 184 } 185 } 186 187 while (WL.size() > 0) { 188 It = WL.pop_back_val(); 189 if (ColdBlocks.count(It)) 190 continue; 191 192 // Move the block from HotBlocks to ColdBlocks. 193 HotBlocks.erase(It); 194 ColdBlocks.insert(It); 195 196 // Iterate over the predecessors. 197 for (const BasicBlock *Pred : predecessors(It)) { 198 if (HotBlocks.count(Pred)) { 199 NumHotSuccessors[Pred] -= 1; 200 201 // If Pred has no more hot successors, add it to the work list. 202 if (NumHotSuccessors[Pred] == 0) 203 WL.push_back(Pred); 204 } 205 } 206 } 207 208 return HotBlocks; 209 } 210 211 class HotColdSplitting { 212 public: 213 HotColdSplitting(ProfileSummaryInfo *ProfSI, 214 function_ref<BlockFrequencyInfo *(Function &)> GBFI, 215 function_ref<TargetTransformInfo &(Function &)> GTTI, 216 std::function<OptimizationRemarkEmitter &(Function &)> *GORE) 217 : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {} 218 bool run(Module &M); 219 220 private: 221 bool shouldOutlineFrom(const Function &F) const; 222 const Function *outlineColdBlocks(Function &F, const DenseSetBB &ColdBlock, 223 DominatorTree *DT, PostDomTree *PDT); 224 Function *extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region, 225 DominatorTree *DT, BlockFrequencyInfo *BFI, 226 OptimizationRemarkEmitter &ORE); 227 bool isOutlineCandidate(const SmallVectorImpl<BasicBlock *> &Region, 228 const BasicBlock *Exit) const { 229 if (!Exit) 230 return false; 231 232 // Regions with landing pads etc. 233 for (const BasicBlock *BB : Region) { 234 if (BB->isEHPad() || BB->hasAddressTaken()) 235 return false; 236 } 237 return true; 238 } 239 SmallPtrSet<const Function *, 2> OutlinedFunctions; 240 ProfileSummaryInfo *PSI; 241 function_ref<BlockFrequencyInfo *(Function &)> GetBFI; 242 function_ref<TargetTransformInfo &(Function &)> GetTTI; 243 std::function<OptimizationRemarkEmitter &(Function &)> *GetORE; 244 }; 245 246 class HotColdSplittingLegacyPass : public ModulePass { 247 public: 248 static char ID; 249 HotColdSplittingLegacyPass() : ModulePass(ID) { 250 initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry()); 251 } 252 253 void getAnalysisUsage(AnalysisUsage &AU) const override { 254 AU.addRequired<AssumptionCacheTracker>(); 255 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 256 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 257 AU.addRequired<TargetTransformInfoWrapperPass>(); 258 } 259 260 bool runOnModule(Module &M) override; 261 }; 262 263 } // end anonymous namespace 264 265 // Returns false if the function should not be considered for hot-cold split 266 // optimization. 267 bool HotColdSplitting::shouldOutlineFrom(const Function &F) const { 268 // Do not try to outline again from an already outlined cold function. 269 if (OutlinedFunctions.count(&F)) 270 return false; 271 272 if (F.size() <= 2) 273 return false; 274 275 if (F.hasAddressTaken()) 276 return false; 277 278 if (F.hasFnAttribute(Attribute::AlwaysInline)) 279 return false; 280 281 if (F.hasFnAttribute(Attribute::NoInline)) 282 return false; 283 284 if (F.getCallingConv() == CallingConv::Cold) 285 return false; 286 287 if (PSI->isFunctionEntryCold(&F)) 288 return false; 289 return true; 290 } 291 292 Function * 293 HotColdSplitting::extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region, 294 DominatorTree *DT, BlockFrequencyInfo *BFI, 295 OptimizationRemarkEmitter &ORE) { 296 LLVM_DEBUG(for (auto *BB : Region) 297 llvm::dbgs() << "\nExtracting: " << *BB;); 298 299 // TODO: Pass BFI and BPI to update profile information. 300 CodeExtractor CE(Region, DT); 301 302 SetVector<Value *> Inputs, Outputs, Sinks; 303 CE.findInputsOutputs(Inputs, Outputs, Sinks); 304 305 // Do not extract regions that have live exit variables. 306 if (Outputs.size() > 0) 307 return nullptr; 308 309 if (Function *OutF = CE.extractCodeRegion()) { 310 User *U = *OutF->user_begin(); 311 CallInst *CI = cast<CallInst>(U); 312 CallSite CS(CI); 313 NumColdSESEOutlined++; 314 if (GetTTI(*OutF).useColdCCForColdCall(*OutF)) { 315 OutF->setCallingConv(CallingConv::Cold); 316 CS.setCallingConv(CallingConv::Cold); 317 } 318 CI->setIsNoInline(); 319 LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF); 320 return OutF; 321 } 322 323 ORE.emit([&]() { 324 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed", 325 &*Region[0]->begin()) 326 << "Failed to extract region at block " 327 << ore::NV("Block", Region.front()); 328 }); 329 return nullptr; 330 } 331 332 // Return the function created after outlining, nullptr otherwise. 333 const Function *HotColdSplitting::outlineColdBlocks(Function &F, 334 const DenseSetBB &HotBlocks, 335 DominatorTree *DT, 336 PostDomTree *PDT) { 337 auto BFI = GetBFI(F); 338 auto &ORE = (*GetORE)(F); 339 // Walking the dominator tree allows us to find the largest 340 // cold region. 341 BasicBlock *Begin = DT->getRootNode()->getBlock(); 342 for (auto I = df_begin(Begin), E = df_end(Begin); I != E; ++I) { 343 BasicBlock *BB = *I; 344 if (PSI->isColdBB(BB, BFI) || !HotBlocks.count(BB)) { 345 SmallVector<BasicBlock *, 4> ValidColdRegion, Region; 346 BasicBlock *Exit = (*PDT)[BB]->getIDom()->getBlock(); 347 BasicBlock *ExitColdRegion = nullptr; 348 349 // Estimated cold region between a BB and its dom-frontier. 350 while (Exit && isSingleEntrySingleExit(BB, Exit, DT, PDT, Region) && 351 isOutlineCandidate(Region, Exit)) { 352 ExitColdRegion = Exit; 353 ValidColdRegion = Region; 354 Region.clear(); 355 // Update Exit recursively to its dom-frontier. 356 Exit = (*PDT)[Exit]->getIDom()->getBlock(); 357 } 358 if (ExitColdRegion) { 359 // Do not outline a region with only one block. 360 if (ValidColdRegion.size() == 1) 361 continue; 362 363 ++NumColdSESEFound; 364 ValidColdRegion.push_back(ExitColdRegion); 365 // Candidate for outlining. FIXME: Continue outlining. 366 return extractColdRegion(ValidColdRegion, DT, BFI, ORE); 367 } 368 } 369 } 370 return nullptr; 371 } 372 373 bool HotColdSplitting::run(Module &M) { 374 for (auto &F : M) { 375 if (!shouldOutlineFrom(F)) 376 continue; 377 DominatorTree DT(F); 378 PostDomTree PDT(F); 379 PDT.recalculate(F); 380 DenseSetBB HotBlocks; 381 if (EnableStaticAnalyis) // Static analysis of cold blocks. 382 HotBlocks = getHotBlocks(F); 383 384 const Function *Outlined = outlineColdBlocks(F, HotBlocks, &DT, &PDT); 385 if (Outlined) 386 OutlinedFunctions.insert(Outlined); 387 } 388 return true; 389 } 390 391 bool HotColdSplittingLegacyPass::runOnModule(Module &M) { 392 if (skipModule(M)) 393 return false; 394 ProfileSummaryInfo *PSI = 395 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 396 auto GTTI = [this](Function &F) -> TargetTransformInfo & { 397 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 398 }; 399 auto GBFI = [this](Function &F) { 400 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 401 }; 402 std::unique_ptr<OptimizationRemarkEmitter> ORE; 403 std::function<OptimizationRemarkEmitter &(Function &)> GetORE = 404 [&ORE](Function &F) -> OptimizationRemarkEmitter & { 405 ORE.reset(new OptimizationRemarkEmitter(&F)); 406 return *ORE.get(); 407 }; 408 409 return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M); 410 } 411 412 char HotColdSplittingLegacyPass::ID = 0; 413 INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit", 414 "Hot Cold Splitting", false, false) 415 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 416 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 417 INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit", 418 "Hot Cold Splitting", false, false) 419 420 ModulePass *llvm::createHotColdSplittingPass() { 421 return new HotColdSplittingLegacyPass(); 422 } 423