1 //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SplitFunctions pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/SplitFunctions.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Core/ParallelUtilities.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/FormatVariadic.h"
18 #include <algorithm>
19 #include <random>
20 #include <vector>
21 
22 #define DEBUG_TYPE "bolt-opts"
23 
24 using namespace llvm;
25 using namespace bolt;
26 
27 namespace {
28 class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> {
29 public:
DeprecatedSplitFunctionOptionParser(cl::Option & O)30   explicit DeprecatedSplitFunctionOptionParser(cl::Option &O)
31       : cl::parser<bool>(O) {}
32 
parse(cl::Option & O,StringRef ArgName,StringRef Arg,bool & Value)33   bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) {
34     if (Arg == "2" || Arg == "3") {
35       Value = true;
36       errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" "
37                         "for option -{1} is deprecated\n",
38                         Arg, ArgName);
39       return false;
40     }
41     return cl::parser<bool>::parse(O, ArgName, Arg, Value);
42   }
43 };
44 } // namespace
45 
46 namespace opts {
47 
48 extern cl::OptionCategory BoltOptCategory;
49 
50 extern cl::opt<bool> SplitEH;
51 extern cl::opt<unsigned> ExecutionCountThreshold;
52 extern cl::opt<uint32_t> RandomSeed;
53 
54 static cl::opt<bool> AggressiveSplitting(
55     "split-all-cold", cl::desc("outline as many cold basic blocks as possible"),
56     cl::cat(BoltOptCategory));
57 
58 static cl::opt<unsigned> SplitAlignThreshold(
59     "split-align-threshold",
60     cl::desc("when deciding to split a function, apply this alignment "
61              "while doing the size comparison (see -split-threshold). "
62              "Default value: 2."),
63     cl::init(2),
64 
65     cl::Hidden, cl::cat(BoltOptCategory));
66 
67 static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser>
68     SplitFunctions("split-functions",
69                    cl::desc("split functions into hot and cold regions"),
70                    cl::cat(BoltOptCategory));
71 
72 static cl::opt<unsigned> SplitThreshold(
73     "split-threshold",
74     cl::desc("split function only if its main size is reduced by more than "
75              "given amount of bytes. Default value: 0, i.e. split iff the "
76              "size is reduced. Note that on some architectures the size can "
77              "increase after splitting."),
78     cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
79 
80 static cl::opt<bool>
81     RandomSplit("split-random",
82                 cl::desc("split functions randomly into hot/cold regions"),
83                 cl::Hidden);
84 } // namespace opts
85 
86 namespace {
87 struct SplitCold {
canSplit__anonbffea4b90211::SplitCold88   bool canSplit(const BinaryFunction &BF) {
89     if (!BF.hasValidProfile())
90       return false;
91 
92     bool AllCold = true;
93     for (const BinaryBasicBlock &BB : BF) {
94       const uint64_t ExecCount = BB.getExecutionCount();
95       if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
96         return false;
97       if (ExecCount != 0)
98         AllCold = false;
99     }
100 
101     return !AllCold;
102   }
103 
canOutline__anonbffea4b90211::SplitCold104   bool canOutline(const BinaryBasicBlock &BB) {
105     return BB.getExecutionCount() == 0;
106   }
107 
partition__anonbffea4b90211::SplitCold108   template <typename It> void partition(const It Start, const It End) const {
109     for (auto I = Start; I != End; ++I) {
110       BinaryBasicBlock *BB = *I;
111       if (!BB->canOutline())
112         break;
113       BB->setIsCold(true);
114     }
115   }
116 };
117 
118 struct SplitRandom {
119   std::minstd_rand0 *Gen;
120 
SplitRandom__anonbffea4b90211::SplitRandom121   explicit SplitRandom(std::minstd_rand0 &Gen) : Gen(&Gen) {}
122 
canSplit__anonbffea4b90211::SplitRandom123   bool canSplit(const BinaryFunction &BF) { return true; }
canOutline__anonbffea4b90211::SplitRandom124   bool canOutline(const BinaryBasicBlock &BB) { return true; }
125 
partition__anonbffea4b90211::SplitRandom126   template <typename It> void partition(It Start, It End) const {
127     using DiffT = typename It::difference_type;
128 
129     const It OutlineableBegin = Start;
130     const It OutlineableEnd =
131         std::find_if(OutlineableBegin, End, [](const BinaryBasicBlock *BB) {
132           return !BB->canOutline();
133         });
134     const DiffT NumOutlineableBlocks = OutlineableEnd - OutlineableBegin;
135 
136     // We want to split at least one block unless there are not blocks that can
137     // be outlined
138     const auto MinimumSplit = std::min<DiffT>(NumOutlineableBlocks, 1);
139     std::uniform_int_distribution<DiffT> Dist(MinimumSplit,
140                                               NumOutlineableBlocks);
141     const DiffT NumColdBlocks = Dist(*Gen);
142     const It ColdEnd = OutlineableBegin + NumColdBlocks;
143 
144     LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of "
145                                  "{1} possible) blocks to split\n",
146                                  ColdEnd - OutlineableBegin,
147                                  OutlineableEnd - OutlineableBegin));
148 
149     std::for_each(OutlineableBegin, ColdEnd,
150                   [](BinaryBasicBlock *BB) { BB->setIsCold(true); });
151   }
152 };
153 } // namespace
154 
155 namespace llvm {
156 namespace bolt {
157 
shouldOptimize(const BinaryFunction & BF) const158 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const {
159   // Apply execution count threshold
160   if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold)
161     return false;
162 
163   return BinaryFunctionPass::shouldOptimize(BF);
164 }
165 
runOnFunctions(BinaryContext & BC)166 void SplitFunctions::runOnFunctions(BinaryContext &BC) {
167   if (!opts::SplitFunctions)
168     return;
169 
170   ParallelUtilities::WorkFuncTy WorkFun;
171   std::minstd_rand0 RandGen(opts::RandomSeed.getValue());
172   if (opts::RandomSplit)
173     WorkFun = [&](BinaryFunction &BF) {
174       splitFunction(BF, SplitRandom(RandGen));
175     };
176   else
177     WorkFun = [&](BinaryFunction &BF) { splitFunction<SplitCold>(BF); };
178 
179   ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
180     return !shouldOptimize(BF);
181   };
182 
183   // If we split functions randomly, we need to ensure that across runs with the
184   // same input, we generate random numbers for each function in the same order.
185   const bool ForceSequential = opts::RandomSplit;
186 
187   ParallelUtilities::runOnEachFunction(
188       BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, WorkFun, SkipFunc,
189       "SplitFunctions", ForceSequential);
190 
191   if (SplitBytesHot + SplitBytesCold > 0)
192     outs() << "BOLT-INFO: splitting separates " << SplitBytesHot
193            << " hot bytes from " << SplitBytesCold << " cold bytes "
194            << format("(%.2lf%% of split functions is hot).\n",
195                      100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold));
196 }
197 
198 template <typename SplitStrategy>
splitFunction(BinaryFunction & BF,SplitStrategy Strategy)199 void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy Strategy) {
200   if (BF.empty())
201     return;
202 
203   if (!Strategy.canSplit(BF))
204     return;
205 
206   FunctionLayout &Layout = BF.getLayout();
207   BinaryFunction::BasicBlockOrderType PreSplitLayout(Layout.block_begin(),
208                                                      Layout.block_end());
209 
210   BinaryContext &BC = BF.getBinaryContext();
211   size_t OriginalHotSize;
212   size_t HotSize;
213   size_t ColdSize;
214   if (BC.isX86()) {
215     std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF);
216     LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
217                       << " pre-split is <0x"
218                       << Twine::utohexstr(OriginalHotSize) << ", 0x"
219                       << Twine::utohexstr(ColdSize) << ">\n");
220   }
221 
222   BinaryFunction::BasicBlockOrderType NewLayout(Layout.block_begin(),
223                                                 Layout.block_end());
224   // Never outline the first basic block.
225   NewLayout.front()->setCanOutline(false);
226   for (BinaryBasicBlock *const BB : NewLayout) {
227     if (!BB->canOutline())
228       continue;
229     if (!Strategy.canOutline(*BB)) {
230       BB->setCanOutline(false);
231       continue;
232     }
233     // Do not split extra entry points in aarch64. They can be referred by
234     // using ADRs and when this happens, these blocks cannot be placed far
235     // away due to the limited range in ADR instruction.
236     if (BC.isAArch64() && BB->isEntryPoint()) {
237       BB->setCanOutline(false);
238       continue;
239     }
240 
241     if (BF.hasEHRanges() && !opts::SplitEH) {
242       // We cannot move landing pads (or rather entry points for landing pads).
243       if (BB->isLandingPad()) {
244         BB->setCanOutline(false);
245         continue;
246       }
247       // We cannot move a block that can throw since exception-handling
248       // runtime cannot deal with split functions. However, if we can guarantee
249       // that the block never throws, it is safe to move the block to
250       // decrease the size of the function.
251       for (MCInst &Instr : *BB) {
252         if (BC.MIB->isInvoke(Instr)) {
253           BB->setCanOutline(false);
254           break;
255         }
256       }
257     }
258   }
259 
260   if (opts::AggressiveSplitting) {
261     // All blocks with 0 count that we can move go to the end of the function.
262     // Even if they were natural to cluster formation and were seen in-between
263     // hot basic blocks.
264     stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
265       return A->canOutline() < B->canOutline();
266     });
267   } else if (BF.hasEHRanges() && !opts::SplitEH) {
268     // Typically functions with exception handling have landing pads at the end.
269     // We cannot move beginning of landing pads, but we can move 0-count blocks
270     // comprising landing pads to the end and thus facilitate splitting.
271     auto FirstLP = NewLayout.begin();
272     while ((*FirstLP)->isLandingPad())
273       ++FirstLP;
274 
275     std::stable_sort(FirstLP, NewLayout.end(),
276                      [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
277                        return A->canOutline() < B->canOutline();
278                      });
279   }
280 
281   // Separate hot from cold starting from the bottom.
282   Strategy.partition(NewLayout.rbegin(), NewLayout.rend());
283   BF.getLayout().update(NewLayout);
284 
285   // For shared objects, invoke instructions and corresponding landing pads
286   // have to be placed in the same fragment. When we split them, create
287   // trampoline landing pads that will redirect the execution to real LPs.
288   TrampolineSetType Trampolines;
289   if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit())
290     Trampolines = createEHTrampolines(BF);
291 
292   // Check the new size to see if it's worth splitting the function.
293   if (BC.isX86() && BF.isSplit()) {
294     std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF);
295     LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
296                       << " post-split is <0x" << Twine::utohexstr(HotSize)
297                       << ", 0x" << Twine::utohexstr(ColdSize) << ">\n");
298     if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
299         alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) {
300       LLVM_DEBUG(dbgs() << "Reversing splitting of function " << BF << ":\n  0x"
301                         << Twine::utohexstr(HotSize) << ", 0x"
302                         << Twine::utohexstr(ColdSize) << " -> 0x"
303                         << Twine::utohexstr(OriginalHotSize) << '\n');
304 
305       // Reverse the action of createEHTrampolines(). The trampolines will be
306       // placed immediately before the matching destination resulting in no
307       // extra code.
308       if (PreSplitLayout.size() != BF.size())
309         PreSplitLayout = mergeEHTrampolines(BF, PreSplitLayout, Trampolines);
310 
311       for (BinaryBasicBlock &BB : BF)
312         BB.setIsCold(false);
313       BF.getLayout().update(PreSplitLayout);
314     } else {
315       SplitBytesHot += HotSize;
316       SplitBytesCold += ColdSize;
317     }
318   }
319 }
320 
321 SplitFunctions::TrampolineSetType
createEHTrampolines(BinaryFunction & BF) const322 SplitFunctions::createEHTrampolines(BinaryFunction &BF) const {
323   const auto &MIB = BF.getBinaryContext().MIB;
324 
325   // Map real landing pads to the corresponding trampolines.
326   TrampolineSetType LPTrampolines;
327 
328   // Iterate over the copy of basic blocks since we are adding new blocks to the
329   // function which will invalidate its iterators.
330   std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend());
331   for (BinaryBasicBlock *BB : Blocks) {
332     for (MCInst &Instr : *BB) {
333       const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instr);
334       if (!EHInfo || !EHInfo->first)
335         continue;
336 
337       const MCSymbol *LPLabel = EHInfo->first;
338       BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel);
339       if (BB->isCold() == LPBlock->isCold())
340         continue;
341 
342       const MCSymbol *TrampolineLabel = nullptr;
343       auto Iter = LPTrampolines.find(LPLabel);
344       if (Iter != LPTrampolines.end()) {
345         TrampolineLabel = Iter->second;
346       } else {
347         // Create a trampoline basic block in the same fragment as the thrower.
348         // Note: there's no need to insert the jump instruction, it will be
349         // added by fixBranches().
350         BinaryBasicBlock *TrampolineBB = BF.addBasicBlock();
351         TrampolineBB->setIsCold(BB->isCold());
352         TrampolineBB->setExecutionCount(LPBlock->getExecutionCount());
353         TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount());
354         TrampolineBB->setCFIState(LPBlock->getCFIState());
355         TrampolineLabel = TrampolineBB->getLabel();
356         LPTrampolines.insert(std::make_pair(LPLabel, TrampolineLabel));
357       }
358 
359       // Substitute the landing pad with the trampoline.
360       MIB->updateEHInfo(Instr,
361                         MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second));
362     }
363   }
364 
365   if (LPTrampolines.empty())
366     return LPTrampolines;
367 
368   // All trampoline blocks were added to the end of the function. Place them at
369   // the end of corresponding fragments.
370   BinaryFunction::BasicBlockOrderType NewLayout(BF.getLayout().block_begin(),
371                                                 BF.getLayout().block_end());
372   stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
373     return A->isCold() < B->isCold();
374   });
375   BF.getLayout().update(NewLayout);
376 
377   // Conservatively introduce branch instructions.
378   BF.fixBranches();
379 
380   // Update exception-handling CFG for the function.
381   BF.recomputeLandingPads();
382 
383   return LPTrampolines;
384 }
385 
mergeEHTrampolines(BinaryFunction & BF,SplitFunctions::BasicBlockOrderType & Layout,const SplitFunctions::TrampolineSetType & Trampolines) const386 SplitFunctions::BasicBlockOrderType SplitFunctions::mergeEHTrampolines(
387     BinaryFunction &BF, SplitFunctions::BasicBlockOrderType &Layout,
388     const SplitFunctions::TrampolineSetType &Trampolines) const {
389   BasicBlockOrderType MergedLayout;
390   for (BinaryBasicBlock *BB : Layout) {
391     auto Iter = Trampolines.find(BB->getLabel());
392     if (Iter != Trampolines.end()) {
393       BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Iter->second);
394       assert(LPBlock && "Could not find matching landing pad block.");
395       MergedLayout.push_back(LPBlock);
396     }
397     MergedLayout.push_back(BB);
398   }
399 
400   return MergedLayout;
401 }
402 
403 } // namespace bolt
404 } // namespace llvm
405