1 //===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Outline cold regions to a separate function.
11 // TODO: Update BFI and BPI
12 // TODO: Add all the outlined functions to a separate section.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/BlockFrequencyInfo.h"
20 #include "llvm/Analysis/BranchProbabilityInfo.h"
21 #include "llvm/Analysis/CFG.h"
22 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
23 #include "llvm/Analysis/PostDominators.h"
24 #include "llvm/Analysis/ProfileSummaryInfo.h"
25 #include "llvm/Analysis/TargetTransformInfo.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/CFG.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/DiagnosticInfo.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/Metadata.h"
35 #include "llvm/IR/Module.h"
36 #include "llvm/IR/PassManager.h"
37 #include "llvm/IR/Type.h"
38 #include "llvm/IR/Use.h"
39 #include "llvm/IR/User.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/Pass.h"
42 #include "llvm/Support/BlockFrequency.h"
43 #include "llvm/Support/BranchProbability.h"
44 #include "llvm/Support/Debug.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include "llvm/Transforms/IPO.h"
47 #include "llvm/Transforms/IPO/HotColdSplitting.h"
48 #include "llvm/Transforms/Scalar.h"
49 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
50 #include "llvm/Transforms/Utils/Cloning.h"
51 #include "llvm/Transforms/Utils/CodeExtractor.h"
52 #include "llvm/Transforms/Utils/Local.h"
53 #include "llvm/Transforms/Utils/SSAUpdater.h"
54 #include "llvm/Transforms/Utils/ValueMapper.h"
55 #include <algorithm>
56 #include <cassert>
57 
58 #define DEBUG_TYPE "hotcoldsplit"
59 
60 STATISTIC(NumColdSESEFound,
61           "Number of cold single entry single exit (SESE) regions found.");
62 STATISTIC(NumColdSESEOutlined,
63           "Number of cold single entry single exit (SESE) regions outlined.");
64 
65 using namespace llvm;
66 
67 static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
68                               cl::init(true), cl::Hidden);
69 
70 
71 namespace {
72 
73 struct PostDomTree : PostDomTreeBase<BasicBlock> {
74   PostDomTree(Function &F) { recalculate(F); }
75 };
76 
77 typedef DenseSet<const BasicBlock *> DenseSetBB;
78 typedef DenseMap<const BasicBlock *, uint64_t> DenseMapBBInt;
79 
80 // From: https://reviews.llvm.org/D22558
81 // Exit is not part of the region.
82 static bool isSingleEntrySingleExit(BasicBlock *Entry, const BasicBlock *Exit,
83                                     DominatorTree *DT, PostDomTree *PDT,
84                                     SmallVectorImpl<BasicBlock *> &Region) {
85   if (!DT->dominates(Entry, Exit))
86     return false;
87 
88   if (!PDT->dominates(Exit, Entry))
89     return false;
90 
91   for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) {
92     if (*I == Exit) {
93       I.skipChildren();
94       continue;
95     }
96     if (!DT->dominates(Entry, *I))
97       return false;
98     Region.push_back(*I);
99     ++I;
100   }
101   return true;
102 }
103 
104 // Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
105 // this function unless you modify the MBB version as well.
106 //
107 /// A no successor, non-return block probably ends in unreachable and is cold.
108 /// Also consider a block that ends in an indirect branch to be a return block,
109 /// since many targets use plain indirect branches to return.
110 bool blockEndsInUnreachable(const BasicBlock &BB) {
111   if (!succ_empty(&BB))
112     return false;
113   if (BB.empty())
114     return true;
115   const Instruction *I = BB.getTerminator();
116   return !(isa<ReturnInst>(I) || isa<IndirectBrInst>(I));
117 }
118 
119 static bool exceptionHandlingFunctions(const CallInst *CI) {
120   auto F = CI->getCalledFunction();
121   if (!F)
122     return false;
123   auto FName = F->getName();
124   return FName == "__cxa_begin_catch" ||
125          FName == "__cxa_free_exception" ||
126          FName == "__cxa_allocate_exception" ||
127          FName == "__cxa_begin_catch" ||
128          FName == "__cxa_end_catch";
129 }
130 
131 static bool unlikelyExecuted(const BasicBlock &BB) {
132   if (blockEndsInUnreachable(BB))
133     return true;
134   // Exception handling blocks are unlikely executed.
135   if (BB.isEHPad())
136     return true;
137   for (const Instruction &I : BB)
138     if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
139       // The block is cold if it calls functions tagged as cold or noreturn.
140       if (CI->hasFnAttr(Attribute::Cold) ||
141           CI->hasFnAttr(Attribute::NoReturn) ||
142           exceptionHandlingFunctions(CI))
143         return true;
144 
145       // Assume that inline assembly is hot code.
146       if (isa<InlineAsm>(CI->getCalledValue()))
147         return false;
148     }
149   return false;
150 }
151 
152 static bool returnsOrHasSideEffects(const BasicBlock &BB) {
153   const Instruction *I = BB.getTerminator();
154   if (isa<ReturnInst>(I) || isa<IndirectBrInst>(I) || isa<InvokeInst>(I))
155     return true;
156 
157   for (const Instruction &I : BB)
158     if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
159       if (CI->hasFnAttr(Attribute::NoReturn))
160         return true;
161 
162       if (isa<InlineAsm>(CI->getCalledValue()))
163         return true;
164     }
165 
166   return false;
167 }
168 
169 static DenseSetBB getHotBlocks(Function &F) {
170 
171   // Mark all cold basic blocks.
172   DenseSetBB ColdBlocks;
173   for (BasicBlock &BB : F)
174     if (unlikelyExecuted(BB)) {
175       LLVM_DEBUG(llvm::dbgs() << "\nForward propagation marks cold: " << BB);
176       ColdBlocks.insert((const BasicBlock *)&BB);
177     }
178 
179   // Forward propagation: basic blocks are hot when they are reachable from the
180   // beginning of the function through a path that does not contain cold blocks.
181   SmallVector<const BasicBlock *, 8> WL;
182   DenseSetBB HotBlocks;
183 
184   const BasicBlock *It = &F.front();
185   if (!ColdBlocks.count(It)) {
186     HotBlocks.insert(It);
187     // Breadth First Search to mark edges reachable from hot.
188     WL.push_back(It);
189     while (WL.size() > 0) {
190       It = WL.pop_back_val();
191 
192       for (const BasicBlock *Succ : successors(It)) {
193         // Do not visit blocks that are cold.
194         if (!ColdBlocks.count(Succ) && !HotBlocks.count(Succ)) {
195           HotBlocks.insert(Succ);
196           WL.push_back(Succ);
197         }
198       }
199     }
200   }
201 
202   assert(WL.empty() && "work list should be empty");
203 
204   DenseMapBBInt NumHotSuccessors;
205   // Back propagation: when all successors of a basic block are cold, the
206   // basic block is cold as well.
207   for (BasicBlock &BBRef : F) {
208     const BasicBlock *BB = &BBRef;
209     if (HotBlocks.count(BB)) {
210       // Keep a count of hot successors for every hot block.
211       NumHotSuccessors[BB] = 0;
212       for (const BasicBlock *Succ : successors(BB))
213         if (!ColdBlocks.count(Succ))
214           NumHotSuccessors[BB] += 1;
215 
216       // Add to work list the blocks with all successors cold. Those are the
217       // root nodes in the next loop, where we will move those blocks from
218       // HotBlocks to ColdBlocks and iterate over their predecessors.
219       if (NumHotSuccessors[BB] == 0)
220         WL.push_back(BB);
221     }
222   }
223 
224   while (WL.size() > 0) {
225     It = WL.pop_back_val();
226     if (ColdBlocks.count(It))
227       continue;
228 
229     // Do not back-propagate to blocks that return or have side effects.
230     if (returnsOrHasSideEffects(*It))
231       continue;
232 
233     // Move the block from HotBlocks to ColdBlocks.
234     LLVM_DEBUG(llvm::dbgs() << "\nBack propagation marks cold: " << *It);
235     HotBlocks.erase(It);
236     ColdBlocks.insert(It);
237 
238     // Iterate over the predecessors.
239     for (const BasicBlock *Pred : predecessors(It)) {
240       if (HotBlocks.count(Pred)) {
241         NumHotSuccessors[Pred] -= 1;
242 
243         // If Pred has no more hot successors, add it to the work list.
244         if (NumHotSuccessors[Pred] == 0)
245           WL.push_back(Pred);
246       }
247     }
248   }
249 
250   return HotBlocks;
251 }
252 
253 class HotColdSplitting {
254 public:
255   HotColdSplitting(ProfileSummaryInfo *ProfSI,
256                    function_ref<BlockFrequencyInfo *(Function &)> GBFI,
257                    function_ref<TargetTransformInfo &(Function &)> GTTI,
258                    std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
259       : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
260   bool run(Module &M);
261 
262 private:
263   bool shouldOutlineFrom(const Function &F) const;
264   const Function *outlineColdBlocks(Function &F, const DenseSetBB &ColdBlock,
265                                     DominatorTree *DT, PostDomTree *PDT);
266   Function *extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
267                               DominatorTree *DT, BlockFrequencyInfo *BFI,
268                               OptimizationRemarkEmitter &ORE);
269   bool isOutlineCandidate(const SmallVectorImpl<BasicBlock *> &Region,
270                           const BasicBlock *Exit) const {
271     if (!Exit)
272       return false;
273 
274     // Regions with landing pads etc.
275     for (const BasicBlock *BB : Region) {
276       if (BB->isEHPad() || BB->hasAddressTaken())
277         return false;
278     }
279     return true;
280   }
281   SmallPtrSet<const Function *, 2> OutlinedFunctions;
282   ProfileSummaryInfo *PSI;
283   function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
284   function_ref<TargetTransformInfo &(Function &)> GetTTI;
285   std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
286 };
287 
288 class HotColdSplittingLegacyPass : public ModulePass {
289 public:
290   static char ID;
291   HotColdSplittingLegacyPass() : ModulePass(ID) {
292     initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry());
293   }
294 
295   void getAnalysisUsage(AnalysisUsage &AU) const override {
296     AU.addRequired<AssumptionCacheTracker>();
297     AU.addRequired<BlockFrequencyInfoWrapperPass>();
298     AU.addRequired<ProfileSummaryInfoWrapperPass>();
299     AU.addRequired<TargetTransformInfoWrapperPass>();
300   }
301 
302   bool runOnModule(Module &M) override;
303 };
304 
305 } // end anonymous namespace
306 
307 // Returns false if the function should not be considered for hot-cold split
308 // optimization.
309 bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
310   // Do not try to outline again from an already outlined cold function.
311   if (OutlinedFunctions.count(&F))
312     return false;
313 
314   if (F.size() <= 2)
315     return false;
316 
317   if (F.hasAddressTaken())
318     return false;
319 
320   if (F.hasFnAttribute(Attribute::AlwaysInline))
321     return false;
322 
323   if (F.hasFnAttribute(Attribute::NoInline))
324     return false;
325 
326   if (F.getCallingConv() == CallingConv::Cold)
327     return false;
328 
329   if (PSI->isFunctionEntryCold(&F))
330     return false;
331   return true;
332 }
333 
334 Function *
335 HotColdSplitting::extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
336                                     DominatorTree *DT, BlockFrequencyInfo *BFI,
337                                     OptimizationRemarkEmitter &ORE) {
338   assert(!Region.empty());
339   LLVM_DEBUG(for (auto *BB : Region)
340           llvm::dbgs() << "\nExtracting: " << *BB;);
341 
342   // TODO: Pass BFI and BPI to update profile information.
343   CodeExtractor CE(Region, DT);
344 
345   SetVector<Value *> Inputs, Outputs, Sinks;
346   CE.findInputsOutputs(Inputs, Outputs, Sinks);
347 
348   // Do not extract regions that have live exit variables.
349   if (Outputs.size() > 0)
350     return nullptr;
351 
352   Function *OrigF = Region[0]->getParent();
353   if (Function *OutF = CE.extractCodeRegion()) {
354     User *U = *OutF->user_begin();
355     CallInst *CI = cast<CallInst>(U);
356     CallSite CS(CI);
357     NumColdSESEOutlined++;
358     if (GetTTI(*OutF).useColdCCForColdCall(*OutF)) {
359       OutF->setCallingConv(CallingConv::Cold);
360       CS.setCallingConv(CallingConv::Cold);
361     }
362     CI->setIsNoInline();
363     LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
364     ORE.emit([&]() {
365       return OptimizationRemark(DEBUG_TYPE, "HotColdSplit",
366                                 &*Region[0]->begin())
367              << ore::NV("Original", OrigF) << " split cold code into "
368              << ore::NV("Split", OutF);
369     });
370     return OutF;
371   }
372 
373   ORE.emit([&]() {
374     return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
375                                     &*Region[0]->begin())
376            << "Failed to extract region at block "
377            << ore::NV("Block", Region.front());
378   });
379   return nullptr;
380 }
381 
382 // Return the function created after outlining, nullptr otherwise.
383 const Function *HotColdSplitting::outlineColdBlocks(Function &F,
384                                                     const DenseSetBB &HotBlocks,
385                                                     DominatorTree *DT,
386                                                     PostDomTree *PDT) {
387   auto BFI = GetBFI(F);
388   auto &ORE = (*GetORE)(F);
389   // Walking the dominator tree allows us to find the largest
390   // cold region.
391   BasicBlock *Begin = DT->getRootNode()->getBlock();
392 
393   // Early return if the beginning of the function has been marked cold,
394   // otherwise all the function gets outlined.
395   if (PSI->isColdBB(Begin, BFI) || !HotBlocks.count(Begin))
396     return nullptr;
397 
398   for (auto I = df_begin(Begin), E = df_end(Begin); I != E; ++I) {
399     BasicBlock *BB = *I;
400     if (PSI->isColdBB(BB, BFI) || !HotBlocks.count(BB)) {
401       SmallVector<BasicBlock *, 4> ValidColdRegion, Region;
402       BasicBlock *Exit = (*PDT)[BB]->getIDom()->getBlock();
403       BasicBlock *ExitColdRegion = nullptr;
404 
405       // Estimated cold region between a BB and its dom-frontier.
406       while (Exit && isSingleEntrySingleExit(BB, Exit, DT, PDT, Region) &&
407              isOutlineCandidate(Region, Exit)) {
408         ExitColdRegion = Exit;
409         ValidColdRegion = Region;
410         Region.clear();
411         // Update Exit recursively to its dom-frontier.
412         Exit = (*PDT)[Exit]->getIDom()->getBlock();
413       }
414       if (ExitColdRegion) {
415         // Do not outline a region with only one block.
416         if (ValidColdRegion.size() == 1)
417           continue;
418 
419         ++NumColdSESEFound;
420         ValidColdRegion.push_back(ExitColdRegion);
421         // Candidate for outlining. FIXME: Continue outlining.
422         return extractColdRegion(ValidColdRegion, DT, BFI, ORE);
423       }
424     }
425   }
426   return nullptr;
427 }
428 
429 bool HotColdSplitting::run(Module &M) {
430   for (auto &F : M) {
431     if (!shouldOutlineFrom(F))
432       continue;
433     DominatorTree DT(F);
434     PostDomTree PDT(F);
435     PDT.recalculate(F);
436     DenseSetBB HotBlocks;
437     if (EnableStaticAnalyis) // Static analysis of cold blocks.
438       HotBlocks = getHotBlocks(F);
439 
440     const Function *Outlined = outlineColdBlocks(F, HotBlocks, &DT, &PDT);
441     if (Outlined)
442       OutlinedFunctions.insert(Outlined);
443   }
444   return true;
445 }
446 
447 bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
448   if (skipModule(M))
449     return false;
450   ProfileSummaryInfo *PSI =
451       getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
452   auto GTTI = [this](Function &F) -> TargetTransformInfo & {
453     return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
454   };
455   auto GBFI = [this](Function &F) {
456     return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
457   };
458   std::unique_ptr<OptimizationRemarkEmitter> ORE;
459   std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
460       [&ORE](Function &F) -> OptimizationRemarkEmitter & {
461     ORE.reset(new OptimizationRemarkEmitter(&F));
462     return *ORE.get();
463   };
464 
465   return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
466 }
467 
468 PreservedAnalyses
469 HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
470   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
471 
472   std::function<AssumptionCache &(Function &)> GetAssumptionCache =
473       [&FAM](Function &F) -> AssumptionCache & {
474     return FAM.getResult<AssumptionAnalysis>(F);
475   };
476 
477   auto GBFI = [&FAM](Function &F) {
478     return &FAM.getResult<BlockFrequencyAnalysis>(F);
479   };
480 
481   std::function<TargetTransformInfo &(Function &)> GTTI =
482       [&FAM](Function &F) -> TargetTransformInfo & {
483     return FAM.getResult<TargetIRAnalysis>(F);
484   };
485 
486   std::unique_ptr<OptimizationRemarkEmitter> ORE;
487   std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
488       [&ORE](Function &F) -> OptimizationRemarkEmitter & {
489     ORE.reset(new OptimizationRemarkEmitter(&F));
490     return *ORE.get();
491   };
492 
493   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
494 
495   if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M))
496     return PreservedAnalyses::none();
497   return PreservedAnalyses::all();
498 }
499 
500 char HotColdSplittingLegacyPass::ID = 0;
501 INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit",
502                       "Hot Cold Splitting", false, false)
503 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
504 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
505 INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit",
506                     "Hot Cold Splitting", false, false)
507 
508 ModulePass *llvm::createHotColdSplittingPass() {
509   return new HotColdSplittingLegacyPass();
510 }
511