1 //===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Outline cold regions to a separate function.
11 // TODO: Update BFI and BPI
12 // TODO: Add all the outlined functions to a separate section.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/BlockFrequencyInfo.h"
20 #include "llvm/Analysis/BranchProbabilityInfo.h"
21 #include "llvm/Analysis/CFG.h"
22 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
23 #include "llvm/Analysis/PostDominators.h"
24 #include "llvm/Analysis/ProfileSummaryInfo.h"
25 #include "llvm/Analysis/TargetTransformInfo.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/CFG.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/DiagnosticInfo.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/Metadata.h"
35 #include "llvm/IR/Module.h"
36 #include "llvm/IR/PassManager.h"
37 #include "llvm/IR/Type.h"
38 #include "llvm/IR/Use.h"
39 #include "llvm/IR/User.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/Pass.h"
42 #include "llvm/Support/BlockFrequency.h"
43 #include "llvm/Support/BranchProbability.h"
44 #include "llvm/Support/Debug.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include "llvm/Transforms/IPO.h"
47 #include "llvm/Transforms/IPO/HotColdSplitting.h"
48 #include "llvm/Transforms/Scalar.h"
49 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
50 #include "llvm/Transforms/Utils/Cloning.h"
51 #include "llvm/Transforms/Utils/CodeExtractor.h"
52 #include "llvm/Transforms/Utils/Local.h"
53 #include "llvm/Transforms/Utils/SSAUpdater.h"
54 #include "llvm/Transforms/Utils/ValueMapper.h"
55 #include <algorithm>
56 #include <cassert>
57 
58 #define DEBUG_TYPE "hotcoldsplit"
59 
60 STATISTIC(NumColdSESEFound,
61           "Number of cold single entry single exit (SESE) regions found.");
62 STATISTIC(NumColdSESEOutlined,
63           "Number of cold single entry single exit (SESE) regions outlined.");
64 
65 using namespace llvm;
66 
67 static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
68                               cl::init(true), cl::Hidden);
69 
70 
71 namespace {
72 
73 struct PostDomTree : PostDomTreeBase<BasicBlock> {
74   PostDomTree(Function &F) { recalculate(F); }
75 };
76 
77 typedef DenseSet<const BasicBlock *> DenseSetBB;
78 typedef DenseMap<const BasicBlock *, uint64_t> DenseMapBBInt;
79 
80 // From: https://reviews.llvm.org/D22558
81 // Exit is not part of the region.
82 static bool isSingleEntrySingleExit(BasicBlock *Entry, const BasicBlock *Exit,
83                                     DominatorTree *DT, PostDomTree *PDT,
84                                     SmallVectorImpl<BasicBlock *> &Region) {
85   if (!DT->dominates(Entry, Exit))
86     return false;
87 
88   if (!PDT->dominates(Exit, Entry))
89     return false;
90 
91   for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) {
92     if (*I == Exit) {
93       I.skipChildren();
94       continue;
95     }
96     if (!DT->dominates(Entry, *I))
97       return false;
98     Region.push_back(*I);
99     ++I;
100   }
101   return true;
102 }
103 
104 bool blockEndsInUnreachable(const BasicBlock &BB) {
105   if (BB.empty())
106     return true;
107   const TerminatorInst *I = BB.getTerminator();
108   if (isa<ReturnInst>(I) || isa<IndirectBrInst>(I))
109     return true;
110   // Unreachable blocks do not have any successor.
111   return succ_empty(&BB);
112 }
113 
114 static bool exceptionHandlingFunctions(const CallInst *CI) {
115   auto F = CI->getCalledFunction();
116   if (!F)
117     return false;
118   auto FName = F->getName();
119   return FName == "__cxa_begin_catch" ||
120          FName == "__cxa_free_exception" ||
121          FName == "__cxa_allocate_exception" ||
122          FName == "__cxa_begin_catch" ||
123          FName == "__cxa_end_catch";
124 }
125 
126 static
127 bool unlikelyExecuted(const BasicBlock &BB) {
128   if (blockEndsInUnreachable(BB))
129     return true;
130   // Exception handling blocks are unlikely executed.
131   if (BB.isEHPad())
132     return true;
133   for (const Instruction &I : BB)
134     if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
135       // The block is cold if it calls functions tagged as cold or noreturn.
136       if (CI->hasFnAttr(Attribute::Cold) ||
137           CI->hasFnAttr(Attribute::NoReturn) ||
138           exceptionHandlingFunctions(CI))
139         return true;
140 
141       // Assume that inline assembly is hot code.
142       if (isa<InlineAsm>(CI->getCalledValue()))
143         return false;
144     }
145   return false;
146 }
147 
148 static DenseSetBB getHotBlocks(Function &F) {
149 
150   // Mark all cold basic blocks.
151   DenseSetBB ColdBlocks;
152   for (BasicBlock &BB : F)
153     if (unlikelyExecuted(BB))
154       ColdBlocks.insert((const BasicBlock *)&BB);
155 
156   // Forward propagation: basic blocks are hot when they are reachable from the
157   // beginning of the function through a path that does not contain cold blocks.
158   SmallVector<const BasicBlock *, 8> WL;
159   DenseSetBB HotBlocks;
160 
161   const BasicBlock *It = &F.front();
162   if (!ColdBlocks.count(It)) {
163     HotBlocks.insert(It);
164     // Breadth First Search to mark edges reachable from hot.
165     WL.push_back(It);
166     while (WL.size() > 0) {
167       It = WL.pop_back_val();
168 
169       for (const BasicBlock *Succ : successors(It)) {
170         // Do not visit blocks that are cold.
171         if (!ColdBlocks.count(Succ) && !HotBlocks.count(Succ)) {
172           HotBlocks.insert(Succ);
173           WL.push_back(Succ);
174         }
175       }
176     }
177   }
178 
179   assert(WL.empty() && "work list should be empty");
180 
181   DenseMapBBInt NumHotSuccessors;
182   // Back propagation: when all successors of a basic block are cold, the
183   // basic block is cold as well.
184   for (BasicBlock &BBRef : F) {
185     const BasicBlock *BB = &BBRef;
186     if (HotBlocks.count(BB)) {
187       // Keep a count of hot successors for every hot block.
188       NumHotSuccessors[BB] = 0;
189       for (const BasicBlock *Succ : successors(BB))
190         if (!ColdBlocks.count(Succ))
191           NumHotSuccessors[BB] += 1;
192 
193       // Add to work list the blocks with all successors cold. Those are the
194       // root nodes in the next loop, where we will move those blocks from
195       // HotBlocks to ColdBlocks and iterate over their predecessors.
196       if (NumHotSuccessors[BB] == 0)
197         WL.push_back(BB);
198     }
199   }
200 
201   while (WL.size() > 0) {
202     It = WL.pop_back_val();
203     if (ColdBlocks.count(It))
204       continue;
205 
206     // Move the block from HotBlocks to ColdBlocks.
207     HotBlocks.erase(It);
208     ColdBlocks.insert(It);
209 
210     // Iterate over the predecessors.
211     for (const BasicBlock *Pred : predecessors(It)) {
212       if (HotBlocks.count(Pred)) {
213         NumHotSuccessors[Pred] -= 1;
214 
215         // If Pred has no more hot successors, add it to the work list.
216         if (NumHotSuccessors[Pred] == 0)
217           WL.push_back(Pred);
218       }
219     }
220   }
221 
222   return HotBlocks;
223 }
224 
225 class HotColdSplitting {
226 public:
227   HotColdSplitting(ProfileSummaryInfo *ProfSI,
228                    function_ref<BlockFrequencyInfo *(Function &)> GBFI,
229                    function_ref<TargetTransformInfo &(Function &)> GTTI,
230                    std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
231       : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
232   bool run(Module &M);
233 
234 private:
235   bool shouldOutlineFrom(const Function &F) const;
236   const Function *outlineColdBlocks(Function &F, const DenseSetBB &ColdBlock,
237                                     DominatorTree *DT, PostDomTree *PDT);
238   Function *extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
239                               DominatorTree *DT, BlockFrequencyInfo *BFI,
240                               OptimizationRemarkEmitter &ORE);
241   bool isOutlineCandidate(const SmallVectorImpl<BasicBlock *> &Region,
242                           const BasicBlock *Exit) const {
243     if (!Exit)
244       return false;
245 
246     // Regions with landing pads etc.
247     for (const BasicBlock *BB : Region) {
248       if (BB->isEHPad() || BB->hasAddressTaken())
249         return false;
250     }
251     return true;
252   }
253   SmallPtrSet<const Function *, 2> OutlinedFunctions;
254   ProfileSummaryInfo *PSI;
255   function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
256   function_ref<TargetTransformInfo &(Function &)> GetTTI;
257   std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
258 };
259 
260 class HotColdSplittingLegacyPass : public ModulePass {
261 public:
262   static char ID;
263   HotColdSplittingLegacyPass() : ModulePass(ID) {
264     initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry());
265   }
266 
267   void getAnalysisUsage(AnalysisUsage &AU) const override {
268     AU.addRequired<AssumptionCacheTracker>();
269     AU.addRequired<BlockFrequencyInfoWrapperPass>();
270     AU.addRequired<ProfileSummaryInfoWrapperPass>();
271     AU.addRequired<TargetTransformInfoWrapperPass>();
272   }
273 
274   bool runOnModule(Module &M) override;
275 };
276 
277 } // end anonymous namespace
278 
279 // Returns false if the function should not be considered for hot-cold split
280 // optimization.
281 bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
282   // Do not try to outline again from an already outlined cold function.
283   if (OutlinedFunctions.count(&F))
284     return false;
285 
286   if (F.size() <= 2)
287     return false;
288 
289   if (F.hasAddressTaken())
290     return false;
291 
292   if (F.hasFnAttribute(Attribute::AlwaysInline))
293     return false;
294 
295   if (F.hasFnAttribute(Attribute::NoInline))
296     return false;
297 
298   if (F.getCallingConv() == CallingConv::Cold)
299     return false;
300 
301   if (PSI->isFunctionEntryCold(&F))
302     return false;
303   return true;
304 }
305 
306 Function *
307 HotColdSplitting::extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
308                                     DominatorTree *DT, BlockFrequencyInfo *BFI,
309                                     OptimizationRemarkEmitter &ORE) {
310   LLVM_DEBUG(for (auto *BB : Region)
311           llvm::dbgs() << "\nExtracting: " << *BB;);
312 
313   // TODO: Pass BFI and BPI to update profile information.
314   CodeExtractor CE(Region, DT);
315 
316   SetVector<Value *> Inputs, Outputs, Sinks;
317   CE.findInputsOutputs(Inputs, Outputs, Sinks);
318 
319   // Do not extract regions that have live exit variables.
320   if (Outputs.size() > 0)
321     return nullptr;
322 
323   if (Function *OutF = CE.extractCodeRegion()) {
324     User *U = *OutF->user_begin();
325     CallInst *CI = cast<CallInst>(U);
326     CallSite CS(CI);
327     NumColdSESEOutlined++;
328     if (GetTTI(*OutF).useColdCCForColdCall(*OutF)) {
329       OutF->setCallingConv(CallingConv::Cold);
330       CS.setCallingConv(CallingConv::Cold);
331     }
332     CI->setIsNoInline();
333     LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
334     return OutF;
335   }
336 
337   ORE.emit([&]() {
338     return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
339                                     &*Region[0]->begin())
340            << "Failed to extract region at block "
341            << ore::NV("Block", Region.front());
342   });
343   return nullptr;
344 }
345 
346 // Return the function created after outlining, nullptr otherwise.
347 const Function *HotColdSplitting::outlineColdBlocks(Function &F,
348                                                     const DenseSetBB &HotBlocks,
349                                                     DominatorTree *DT,
350                                                     PostDomTree *PDT) {
351   auto BFI = GetBFI(F);
352   auto &ORE = (*GetORE)(F);
353   // Walking the dominator tree allows us to find the largest
354   // cold region.
355   BasicBlock *Begin = DT->getRootNode()->getBlock();
356   for (auto I = df_begin(Begin), E = df_end(Begin); I != E; ++I) {
357     BasicBlock *BB = *I;
358     if (PSI->isColdBB(BB, BFI) || !HotBlocks.count(BB)) {
359       SmallVector<BasicBlock *, 4> ValidColdRegion, Region;
360       BasicBlock *Exit = (*PDT)[BB]->getIDom()->getBlock();
361       BasicBlock *ExitColdRegion = nullptr;
362 
363       // Estimated cold region between a BB and its dom-frontier.
364       while (Exit && isSingleEntrySingleExit(BB, Exit, DT, PDT, Region) &&
365              isOutlineCandidate(Region, Exit)) {
366         ExitColdRegion = Exit;
367         ValidColdRegion = Region;
368         Region.clear();
369         // Update Exit recursively to its dom-frontier.
370         Exit = (*PDT)[Exit]->getIDom()->getBlock();
371       }
372       if (ExitColdRegion) {
373         // Do not outline a region with only one block.
374         if (ValidColdRegion.size() == 1)
375           continue;
376 
377         ++NumColdSESEFound;
378         ValidColdRegion.push_back(ExitColdRegion);
379         // Candidate for outlining. FIXME: Continue outlining.
380         return extractColdRegion(ValidColdRegion, DT, BFI, ORE);
381       }
382     }
383   }
384   return nullptr;
385 }
386 
387 bool HotColdSplitting::run(Module &M) {
388   for (auto &F : M) {
389     if (!shouldOutlineFrom(F))
390       continue;
391     DominatorTree DT(F);
392     PostDomTree PDT(F);
393     PDT.recalculate(F);
394     DenseSetBB HotBlocks;
395     if (EnableStaticAnalyis) // Static analysis of cold blocks.
396       HotBlocks = getHotBlocks(F);
397 
398     const Function *Outlined = outlineColdBlocks(F, HotBlocks, &DT, &PDT);
399     if (Outlined)
400       OutlinedFunctions.insert(Outlined);
401   }
402   return true;
403 }
404 
405 bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
406   if (skipModule(M))
407     return false;
408   ProfileSummaryInfo *PSI =
409       getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
410   auto GTTI = [this](Function &F) -> TargetTransformInfo & {
411     return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
412   };
413   auto GBFI = [this](Function &F) {
414     return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
415   };
416   std::unique_ptr<OptimizationRemarkEmitter> ORE;
417   std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
418       [&ORE](Function &F) -> OptimizationRemarkEmitter & {
419     ORE.reset(new OptimizationRemarkEmitter(&F));
420     return *ORE.get();
421   };
422 
423   return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
424 }
425 
426 PreservedAnalyses
427 HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
428   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
429 
430   std::function<AssumptionCache &(Function &)> GetAssumptionCache =
431       [&FAM](Function &F) -> AssumptionCache & {
432     return FAM.getResult<AssumptionAnalysis>(F);
433   };
434 
435   auto GBFI = [&FAM](Function &F) {
436     return &FAM.getResult<BlockFrequencyAnalysis>(F);
437   };
438 
439   std::function<TargetTransformInfo &(Function &)> GTTI =
440       [&FAM](Function &F) -> TargetTransformInfo & {
441     return FAM.getResult<TargetIRAnalysis>(F);
442   };
443 
444   std::unique_ptr<OptimizationRemarkEmitter> ORE;
445   std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
446       [&ORE](Function &F) -> OptimizationRemarkEmitter & {
447     ORE.reset(new OptimizationRemarkEmitter(&F));
448     return *ORE.get();
449   };
450 
451   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
452 
453   if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M))
454     return PreservedAnalyses::none();
455   return PreservedAnalyses::all();
456 }
457 
458 char HotColdSplittingLegacyPass::ID = 0;
459 INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit",
460                       "Hot Cold Splitting", false, false)
461 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
462 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
463 INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit",
464                     "Hot Cold Splitting", false, false)
465 
466 ModulePass *llvm::createHotColdSplittingPass() {
467   return new HotColdSplittingLegacyPass();
468 }
469