1 //===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Outline cold regions to a separate function.
11 // TODO: Update BFI and BPI
12 // TODO: Add all the outlined functions to a separate section.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/BlockFrequencyInfo.h"
20 #include "llvm/Analysis/BranchProbabilityInfo.h"
21 #include "llvm/Analysis/CFG.h"
22 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
23 #include "llvm/Analysis/PostDominators.h"
24 #include "llvm/Analysis/ProfileSummaryInfo.h"
25 #include "llvm/Analysis/TargetTransformInfo.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/CFG.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/DiagnosticInfo.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/Metadata.h"
35 #include "llvm/IR/Module.h"
36 #include "llvm/IR/PassManager.h"
37 #include "llvm/IR/Type.h"
38 #include "llvm/IR/Use.h"
39 #include "llvm/IR/User.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/Pass.h"
42 #include "llvm/Support/BlockFrequency.h"
43 #include "llvm/Support/BranchProbability.h"
44 #include "llvm/Support/Debug.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include "llvm/Transforms/IPO.h"
47 #include "llvm/Transforms/Scalar.h"
48 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
49 #include "llvm/Transforms/Utils/Cloning.h"
50 #include "llvm/Transforms/Utils/CodeExtractor.h"
51 #include "llvm/Transforms/Utils/Local.h"
52 #include "llvm/Transforms/Utils/SSAUpdater.h"
53 #include "llvm/Transforms/Utils/ValueMapper.h"
54 #include <algorithm>
55 #include <cassert>
56 
57 #define DEBUG_TYPE "hotcoldsplit"
58 
59 STATISTIC(NumColdSESEFound,
60           "Number of cold single entry single exit (SESE) regions found.");
61 STATISTIC(NumColdSESEOutlined,
62           "Number of cold single entry single exit (SESE) regions outlined.");
63 
64 using namespace llvm;
65 
66 static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
67                               cl::init(true), cl::Hidden);
68 
69 
70 namespace {
71 
72 struct PostDomTree : PostDomTreeBase<BasicBlock> {
73   PostDomTree(Function &F) { recalculate(F); }
74 };
75 
76 typedef DenseSet<const BasicBlock *> DenseSetBB;
77 typedef DenseMap<const BasicBlock *, uint64_t> DenseMapBBInt;
78 
79 // From: https://reviews.llvm.org/D22558
80 // Exit is not part of the region.
81 static bool isSingleEntrySingleExit(BasicBlock *Entry, const BasicBlock *Exit,
82                                     DominatorTree *DT, PostDomTree *PDT,
83                                     SmallVectorImpl<BasicBlock *> &Region) {
84   if (!DT->dominates(Entry, Exit))
85     return false;
86 
87   if (!PDT->dominates(Exit, Entry))
88     return false;
89 
90   for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) {
91     if (*I == Exit) {
92       I.skipChildren();
93       continue;
94     }
95     if (!DT->dominates(Entry, *I))
96       return false;
97     Region.push_back(*I);
98     ++I;
99   }
100   return true;
101 }
102 
103 bool blockEndsInUnreachable(const BasicBlock &BB) {
104   if (BB.empty())
105     return true;
106   const TerminatorInst *I = BB.getTerminator();
107   if (isa<ReturnInst>(I) || isa<IndirectBrInst>(I))
108     return true;
109   // Unreachable blocks do not have any successor.
110   return succ_empty(&BB);
111 }
112 
113 static
114 bool unlikelyExecuted(const BasicBlock &BB) {
115   if (blockEndsInUnreachable(BB))
116     return true;
117   // Exception handling blocks are unlikely executed.
118   if (BB.isEHPad())
119     return true;
120   for (const Instruction &I : BB)
121     if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
122       // The block is cold if it calls functions tagged as cold or noreturn.
123       if (CI->hasFnAttr(Attribute::Cold) ||
124           CI->hasFnAttr(Attribute::NoReturn))
125         return true;
126 
127       // Assume that inline assembly is hot code.
128       if (isa<InlineAsm>(CI->getCalledValue()))
129         return false;
130     }
131   return false;
132 }
133 
134 static DenseSetBB getHotBlocks(Function &F) {
135 
136   // Mark all cold basic blocks.
137   DenseSetBB ColdBlocks;
138   for (BasicBlock &BB : F)
139     if (unlikelyExecuted(BB))
140       ColdBlocks.insert((const BasicBlock *)&BB);
141 
142   // Forward propagation: basic blocks are hot when they are reachable from the
143   // beginning of the function through a path that does not contain cold blocks.
144   SmallVector<const BasicBlock *, 8> WL;
145   DenseSetBB HotBlocks;
146 
147   const BasicBlock *It = &F.front();
148   if (!ColdBlocks.count(It)) {
149     HotBlocks.insert(It);
150     // Breadth First Search to mark edges reachable from hot.
151     WL.push_back(It);
152     while (WL.size() > 0) {
153       It = WL.pop_back_val();
154 
155       for (const BasicBlock *Succ : successors(It)) {
156         // Do not visit blocks that are cold.
157         if (!ColdBlocks.count(Succ) && !HotBlocks.count(Succ)) {
158           HotBlocks.insert(Succ);
159           WL.push_back(Succ);
160         }
161       }
162     }
163   }
164 
165   assert(WL.empty() && "work list should be empty");
166 
167   DenseMapBBInt NumHotSuccessors;
168   // Back propagation: when all successors of a basic block are cold, the
169   // basic block is cold as well.
170   for (BasicBlock &BBRef : F) {
171     const BasicBlock *BB = &BBRef;
172     if (HotBlocks.count(BB)) {
173       // Keep a count of hot successors for every hot block.
174       NumHotSuccessors[BB] = 0;
175       for (const BasicBlock *Succ : successors(BB))
176         if (!ColdBlocks.count(Succ))
177           NumHotSuccessors[BB] += 1;
178 
179       // Add to work list the blocks with all successors cold. Those are the
180       // root nodes in the next loop, where we will move those blocks from
181       // HotBlocks to ColdBlocks and iterate over their predecessors.
182       if (NumHotSuccessors[BB] == 0)
183         WL.push_back(BB);
184     }
185   }
186 
187   while (WL.size() > 0) {
188     It = WL.pop_back_val();
189     if (ColdBlocks.count(It))
190       continue;
191 
192     // Move the block from HotBlocks to ColdBlocks.
193     HotBlocks.erase(It);
194     ColdBlocks.insert(It);
195 
196     // Iterate over the predecessors.
197     for (const BasicBlock *Pred : predecessors(It)) {
198       if (HotBlocks.count(Pred)) {
199         NumHotSuccessors[Pred] -= 1;
200 
201         // If Pred has no more hot successors, add it to the work list.
202         if (NumHotSuccessors[Pred] == 0)
203           WL.push_back(Pred);
204       }
205     }
206   }
207 
208   return HotBlocks;
209 }
210 
211 class HotColdSplitting {
212 public:
213   HotColdSplitting(ProfileSummaryInfo *ProfSI,
214                    function_ref<BlockFrequencyInfo *(Function &)> GBFI,
215                    function_ref<TargetTransformInfo &(Function &)> GTTI,
216                    std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
217       : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
218   bool run(Module &M);
219 
220 private:
221   bool shouldOutlineFrom(const Function &F) const;
222   const Function *outlineColdBlocks(Function &F, const DenseSetBB &ColdBlock,
223                                     DominatorTree *DT, PostDomTree *PDT);
224   Function *extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
225                               DominatorTree *DT, BlockFrequencyInfo *BFI,
226                               OptimizationRemarkEmitter &ORE);
227   bool isOutlineCandidate(const SmallVectorImpl<BasicBlock *> &Region,
228                           const BasicBlock *Exit) const {
229     if (!Exit)
230       return false;
231 
232     // Regions with landing pads etc.
233     for (const BasicBlock *BB : Region) {
234       if (BB->isEHPad() || BB->hasAddressTaken())
235         return false;
236     }
237     return true;
238   }
239   SmallPtrSet<const Function *, 2> OutlinedFunctions;
240   ProfileSummaryInfo *PSI;
241   function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
242   function_ref<TargetTransformInfo &(Function &)> GetTTI;
243   std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
244 };
245 
246 class HotColdSplittingLegacyPass : public ModulePass {
247 public:
248   static char ID;
249   HotColdSplittingLegacyPass() : ModulePass(ID) {
250     initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry());
251   }
252 
253   void getAnalysisUsage(AnalysisUsage &AU) const override {
254     AU.addRequired<AssumptionCacheTracker>();
255     AU.addRequired<BlockFrequencyInfoWrapperPass>();
256     AU.addRequired<ProfileSummaryInfoWrapperPass>();
257     AU.addRequired<TargetTransformInfoWrapperPass>();
258   }
259 
260   bool runOnModule(Module &M) override;
261 };
262 
263 } // end anonymous namespace
264 
265 // Returns false if the function should not be considered for hot-cold split
266 // optimization.
267 bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
268   // Do not try to outline again from an already outlined cold function.
269   if (OutlinedFunctions.count(&F))
270     return false;
271 
272   if (F.size() <= 2)
273     return false;
274 
275   if (F.hasAddressTaken())
276     return false;
277 
278   if (F.hasFnAttribute(Attribute::AlwaysInline))
279     return false;
280 
281   if (F.hasFnAttribute(Attribute::NoInline))
282     return false;
283 
284   if (F.getCallingConv() == CallingConv::Cold)
285     return false;
286 
287   if (PSI->isFunctionEntryCold(&F))
288     return false;
289   return true;
290 }
291 
292 Function *
293 HotColdSplitting::extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
294                                     DominatorTree *DT, BlockFrequencyInfo *BFI,
295                                     OptimizationRemarkEmitter &ORE) {
296   LLVM_DEBUG(for (auto *BB : Region)
297           llvm::dbgs() << "\nExtracting: " << *BB;);
298 
299   // TODO: Pass BFI and BPI to update profile information.
300   CodeExtractor CE(Region, DT);
301 
302   SetVector<Value *> Inputs, Outputs, Sinks;
303   CE.findInputsOutputs(Inputs, Outputs, Sinks);
304 
305   // Do not extract regions that have live exit variables.
306   if (Outputs.size() > 0)
307     return nullptr;
308 
309   if (Function *OutF = CE.extractCodeRegion()) {
310     User *U = *OutF->user_begin();
311     CallInst *CI = cast<CallInst>(U);
312     CallSite CS(CI);
313     NumColdSESEOutlined++;
314     if (GetTTI(*OutF).useColdCCForColdCall(*OutF)) {
315       OutF->setCallingConv(CallingConv::Cold);
316       CS.setCallingConv(CallingConv::Cold);
317     }
318     CI->setIsNoInline();
319     LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
320     return OutF;
321   }
322 
323   ORE.emit([&]() {
324     return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
325                                     &*Region[0]->begin())
326            << "Failed to extract region at block "
327            << ore::NV("Block", Region.front());
328   });
329   return nullptr;
330 }
331 
332 // Return the function created after outlining, nullptr otherwise.
333 const Function *HotColdSplitting::outlineColdBlocks(Function &F,
334                                                     const DenseSetBB &HotBlocks,
335                                                     DominatorTree *DT,
336                                                     PostDomTree *PDT) {
337   auto BFI = GetBFI(F);
338   auto &ORE = (*GetORE)(F);
339   // Walking the dominator tree allows us to find the largest
340   // cold region.
341   BasicBlock *Begin = DT->getRootNode()->getBlock();
342   for (auto I = df_begin(Begin), E = df_end(Begin); I != E; ++I) {
343     BasicBlock *BB = *I;
344     if (PSI->isColdBB(BB, BFI) || !HotBlocks.count(BB)) {
345       SmallVector<BasicBlock *, 4> ValidColdRegion, Region;
346       BasicBlock *Exit = (*PDT)[BB]->getIDom()->getBlock();
347       BasicBlock *ExitColdRegion = nullptr;
348 
349       // Estimated cold region between a BB and its dom-frontier.
350       while (Exit && isSingleEntrySingleExit(BB, Exit, DT, PDT, Region) &&
351              isOutlineCandidate(Region, Exit)) {
352         ExitColdRegion = Exit;
353         ValidColdRegion = Region;
354         Region.clear();
355         // Update Exit recursively to its dom-frontier.
356         Exit = (*PDT)[Exit]->getIDom()->getBlock();
357       }
358       if (ExitColdRegion) {
359         // Do not outline a region with only one block.
360         if (ValidColdRegion.size() == 1)
361           continue;
362 
363         ++NumColdSESEFound;
364         ValidColdRegion.push_back(ExitColdRegion);
365         // Candidate for outlining. FIXME: Continue outlining.
366         return extractColdRegion(ValidColdRegion, DT, BFI, ORE);
367       }
368     }
369   }
370   return nullptr;
371 }
372 
373 bool HotColdSplitting::run(Module &M) {
374   for (auto &F : M) {
375     if (!shouldOutlineFrom(F))
376       continue;
377     DominatorTree DT(F);
378     PostDomTree PDT(F);
379     PDT.recalculate(F);
380     DenseSetBB HotBlocks;
381     if (EnableStaticAnalyis) // Static analysis of cold blocks.
382       HotBlocks = getHotBlocks(F);
383 
384     const Function *Outlined = outlineColdBlocks(F, HotBlocks, &DT, &PDT);
385     if (Outlined)
386       OutlinedFunctions.insert(Outlined);
387   }
388   return true;
389 }
390 
391 bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
392   if (skipModule(M))
393     return false;
394   ProfileSummaryInfo *PSI =
395       getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
396   auto GTTI = [this](Function &F) -> TargetTransformInfo & {
397     return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
398   };
399   auto GBFI = [this](Function &F) {
400     return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
401   };
402   std::unique_ptr<OptimizationRemarkEmitter> ORE;
403   std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
404       [&ORE](Function &F) -> OptimizationRemarkEmitter & {
405     ORE.reset(new OptimizationRemarkEmitter(&F));
406     return *ORE.get();
407   };
408 
409   return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
410 }
411 
412 char HotColdSplittingLegacyPass::ID = 0;
413 INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit",
414                       "Hot Cold Splitting", false, false)
415 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
416 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
417 INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit",
418                     "Hot Cold Splitting", false, false)
419 
420 ModulePass *llvm::createHotColdSplittingPass() {
421   return new HotColdSplittingLegacyPass();
422 }
423