1 //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SplitFunctions pass.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "bolt/Passes/SplitFunctions.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Core/ParallelUtilities.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/FormatVariadic.h"
18 #include <algorithm>
19 #include <random>
20 #include <vector>
21
22 #define DEBUG_TYPE "bolt-opts"
23
24 using namespace llvm;
25 using namespace bolt;
26
27 namespace {
28 class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> {
29 public:
DeprecatedSplitFunctionOptionParser(cl::Option & O)30 explicit DeprecatedSplitFunctionOptionParser(cl::Option &O)
31 : cl::parser<bool>(O) {}
32
parse(cl::Option & O,StringRef ArgName,StringRef Arg,bool & Value)33 bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) {
34 if (Arg == "2" || Arg == "3") {
35 Value = true;
36 errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" "
37 "for option -{1} is deprecated\n",
38 Arg, ArgName);
39 return false;
40 }
41 return cl::parser<bool>::parse(O, ArgName, Arg, Value);
42 }
43 };
44 } // namespace
45
46 namespace opts {
47
48 extern cl::OptionCategory BoltOptCategory;
49
50 extern cl::opt<bool> SplitEH;
51 extern cl::opt<unsigned> ExecutionCountThreshold;
52 extern cl::opt<uint32_t> RandomSeed;
53
54 static cl::opt<bool> AggressiveSplitting(
55 "split-all-cold", cl::desc("outline as many cold basic blocks as possible"),
56 cl::cat(BoltOptCategory));
57
58 static cl::opt<unsigned> SplitAlignThreshold(
59 "split-align-threshold",
60 cl::desc("when deciding to split a function, apply this alignment "
61 "while doing the size comparison (see -split-threshold). "
62 "Default value: 2."),
63 cl::init(2),
64
65 cl::Hidden, cl::cat(BoltOptCategory));
66
67 static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser>
68 SplitFunctions("split-functions",
69 cl::desc("split functions into hot and cold regions"),
70 cl::cat(BoltOptCategory));
71
72 static cl::opt<unsigned> SplitThreshold(
73 "split-threshold",
74 cl::desc("split function only if its main size is reduced by more than "
75 "given amount of bytes. Default value: 0, i.e. split iff the "
76 "size is reduced. Note that on some architectures the size can "
77 "increase after splitting."),
78 cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
79
80 static cl::opt<bool>
81 RandomSplit("split-random",
82 cl::desc("split functions randomly into hot/cold regions"),
83 cl::Hidden);
84 } // namespace opts
85
86 namespace {
87 struct SplitCold {
canSplit__anonbffea4b90211::SplitCold88 bool canSplit(const BinaryFunction &BF) {
89 if (!BF.hasValidProfile())
90 return false;
91
92 bool AllCold = true;
93 for (const BinaryBasicBlock &BB : BF) {
94 const uint64_t ExecCount = BB.getExecutionCount();
95 if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
96 return false;
97 if (ExecCount != 0)
98 AllCold = false;
99 }
100
101 return !AllCold;
102 }
103
canOutline__anonbffea4b90211::SplitCold104 bool canOutline(const BinaryBasicBlock &BB) {
105 return BB.getExecutionCount() == 0;
106 }
107
partition__anonbffea4b90211::SplitCold108 template <typename It> void partition(const It Start, const It End) const {
109 for (auto I = Start; I != End; ++I) {
110 BinaryBasicBlock *BB = *I;
111 if (!BB->canOutline())
112 break;
113 BB->setIsCold(true);
114 }
115 }
116 };
117
118 struct SplitRandom {
119 std::minstd_rand0 *Gen;
120
SplitRandom__anonbffea4b90211::SplitRandom121 explicit SplitRandom(std::minstd_rand0 &Gen) : Gen(&Gen) {}
122
canSplit__anonbffea4b90211::SplitRandom123 bool canSplit(const BinaryFunction &BF) { return true; }
canOutline__anonbffea4b90211::SplitRandom124 bool canOutline(const BinaryBasicBlock &BB) { return true; }
125
partition__anonbffea4b90211::SplitRandom126 template <typename It> void partition(It Start, It End) const {
127 using DiffT = typename It::difference_type;
128
129 const It OutlineableBegin = Start;
130 const It OutlineableEnd =
131 std::find_if(OutlineableBegin, End, [](const BinaryBasicBlock *BB) {
132 return !BB->canOutline();
133 });
134 const DiffT NumOutlineableBlocks = OutlineableEnd - OutlineableBegin;
135
136 // We want to split at least one block unless there are not blocks that can
137 // be outlined
138 const auto MinimumSplit = std::min<DiffT>(NumOutlineableBlocks, 1);
139 std::uniform_int_distribution<DiffT> Dist(MinimumSplit,
140 NumOutlineableBlocks);
141 const DiffT NumColdBlocks = Dist(*Gen);
142 const It ColdEnd = OutlineableBegin + NumColdBlocks;
143
144 LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of "
145 "{1} possible) blocks to split\n",
146 ColdEnd - OutlineableBegin,
147 OutlineableEnd - OutlineableBegin));
148
149 std::for_each(OutlineableBegin, ColdEnd,
150 [](BinaryBasicBlock *BB) { BB->setIsCold(true); });
151 }
152 };
153 } // namespace
154
155 namespace llvm {
156 namespace bolt {
157
shouldOptimize(const BinaryFunction & BF) const158 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const {
159 // Apply execution count threshold
160 if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold)
161 return false;
162
163 return BinaryFunctionPass::shouldOptimize(BF);
164 }
165
runOnFunctions(BinaryContext & BC)166 void SplitFunctions::runOnFunctions(BinaryContext &BC) {
167 if (!opts::SplitFunctions)
168 return;
169
170 ParallelUtilities::WorkFuncTy WorkFun;
171 std::minstd_rand0 RandGen(opts::RandomSeed.getValue());
172 if (opts::RandomSplit)
173 WorkFun = [&](BinaryFunction &BF) {
174 splitFunction(BF, SplitRandom(RandGen));
175 };
176 else
177 WorkFun = [&](BinaryFunction &BF) { splitFunction<SplitCold>(BF); };
178
179 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
180 return !shouldOptimize(BF);
181 };
182
183 // If we split functions randomly, we need to ensure that across runs with the
184 // same input, we generate random numbers for each function in the same order.
185 const bool ForceSequential = opts::RandomSplit;
186
187 ParallelUtilities::runOnEachFunction(
188 BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, WorkFun, SkipFunc,
189 "SplitFunctions", ForceSequential);
190
191 if (SplitBytesHot + SplitBytesCold > 0)
192 outs() << "BOLT-INFO: splitting separates " << SplitBytesHot
193 << " hot bytes from " << SplitBytesCold << " cold bytes "
194 << format("(%.2lf%% of split functions is hot).\n",
195 100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold));
196 }
197
198 template <typename SplitStrategy>
splitFunction(BinaryFunction & BF,SplitStrategy Strategy)199 void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy Strategy) {
200 if (BF.empty())
201 return;
202
203 if (!Strategy.canSplit(BF))
204 return;
205
206 FunctionLayout &Layout = BF.getLayout();
207 BinaryFunction::BasicBlockOrderType PreSplitLayout(Layout.block_begin(),
208 Layout.block_end());
209
210 BinaryContext &BC = BF.getBinaryContext();
211 size_t OriginalHotSize;
212 size_t HotSize;
213 size_t ColdSize;
214 if (BC.isX86()) {
215 std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF);
216 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
217 << " pre-split is <0x"
218 << Twine::utohexstr(OriginalHotSize) << ", 0x"
219 << Twine::utohexstr(ColdSize) << ">\n");
220 }
221
222 BinaryFunction::BasicBlockOrderType NewLayout(Layout.block_begin(),
223 Layout.block_end());
224 // Never outline the first basic block.
225 NewLayout.front()->setCanOutline(false);
226 for (BinaryBasicBlock *const BB : NewLayout) {
227 if (!BB->canOutline())
228 continue;
229 if (!Strategy.canOutline(*BB)) {
230 BB->setCanOutline(false);
231 continue;
232 }
233 // Do not split extra entry points in aarch64. They can be referred by
234 // using ADRs and when this happens, these blocks cannot be placed far
235 // away due to the limited range in ADR instruction.
236 if (BC.isAArch64() && BB->isEntryPoint()) {
237 BB->setCanOutline(false);
238 continue;
239 }
240
241 if (BF.hasEHRanges() && !opts::SplitEH) {
242 // We cannot move landing pads (or rather entry points for landing pads).
243 if (BB->isLandingPad()) {
244 BB->setCanOutline(false);
245 continue;
246 }
247 // We cannot move a block that can throw since exception-handling
248 // runtime cannot deal with split functions. However, if we can guarantee
249 // that the block never throws, it is safe to move the block to
250 // decrease the size of the function.
251 for (MCInst &Instr : *BB) {
252 if (BC.MIB->isInvoke(Instr)) {
253 BB->setCanOutline(false);
254 break;
255 }
256 }
257 }
258 }
259
260 if (opts::AggressiveSplitting) {
261 // All blocks with 0 count that we can move go to the end of the function.
262 // Even if they were natural to cluster formation and were seen in-between
263 // hot basic blocks.
264 stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
265 return A->canOutline() < B->canOutline();
266 });
267 } else if (BF.hasEHRanges() && !opts::SplitEH) {
268 // Typically functions with exception handling have landing pads at the end.
269 // We cannot move beginning of landing pads, but we can move 0-count blocks
270 // comprising landing pads to the end and thus facilitate splitting.
271 auto FirstLP = NewLayout.begin();
272 while ((*FirstLP)->isLandingPad())
273 ++FirstLP;
274
275 std::stable_sort(FirstLP, NewLayout.end(),
276 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
277 return A->canOutline() < B->canOutline();
278 });
279 }
280
281 // Separate hot from cold starting from the bottom.
282 Strategy.partition(NewLayout.rbegin(), NewLayout.rend());
283 BF.getLayout().update(NewLayout);
284
285 // For shared objects, invoke instructions and corresponding landing pads
286 // have to be placed in the same fragment. When we split them, create
287 // trampoline landing pads that will redirect the execution to real LPs.
288 TrampolineSetType Trampolines;
289 if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit())
290 Trampolines = createEHTrampolines(BF);
291
292 // Check the new size to see if it's worth splitting the function.
293 if (BC.isX86() && BF.isSplit()) {
294 std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF);
295 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
296 << " post-split is <0x" << Twine::utohexstr(HotSize)
297 << ", 0x" << Twine::utohexstr(ColdSize) << ">\n");
298 if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
299 alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) {
300 LLVM_DEBUG(dbgs() << "Reversing splitting of function " << BF << ":\n 0x"
301 << Twine::utohexstr(HotSize) << ", 0x"
302 << Twine::utohexstr(ColdSize) << " -> 0x"
303 << Twine::utohexstr(OriginalHotSize) << '\n');
304
305 // Reverse the action of createEHTrampolines(). The trampolines will be
306 // placed immediately before the matching destination resulting in no
307 // extra code.
308 if (PreSplitLayout.size() != BF.size())
309 PreSplitLayout = mergeEHTrampolines(BF, PreSplitLayout, Trampolines);
310
311 for (BinaryBasicBlock &BB : BF)
312 BB.setIsCold(false);
313 BF.getLayout().update(PreSplitLayout);
314 } else {
315 SplitBytesHot += HotSize;
316 SplitBytesCold += ColdSize;
317 }
318 }
319 }
320
321 SplitFunctions::TrampolineSetType
createEHTrampolines(BinaryFunction & BF) const322 SplitFunctions::createEHTrampolines(BinaryFunction &BF) const {
323 const auto &MIB = BF.getBinaryContext().MIB;
324
325 // Map real landing pads to the corresponding trampolines.
326 TrampolineSetType LPTrampolines;
327
328 // Iterate over the copy of basic blocks since we are adding new blocks to the
329 // function which will invalidate its iterators.
330 std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend());
331 for (BinaryBasicBlock *BB : Blocks) {
332 for (MCInst &Instr : *BB) {
333 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instr);
334 if (!EHInfo || !EHInfo->first)
335 continue;
336
337 const MCSymbol *LPLabel = EHInfo->first;
338 BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel);
339 if (BB->isCold() == LPBlock->isCold())
340 continue;
341
342 const MCSymbol *TrampolineLabel = nullptr;
343 auto Iter = LPTrampolines.find(LPLabel);
344 if (Iter != LPTrampolines.end()) {
345 TrampolineLabel = Iter->second;
346 } else {
347 // Create a trampoline basic block in the same fragment as the thrower.
348 // Note: there's no need to insert the jump instruction, it will be
349 // added by fixBranches().
350 BinaryBasicBlock *TrampolineBB = BF.addBasicBlock();
351 TrampolineBB->setIsCold(BB->isCold());
352 TrampolineBB->setExecutionCount(LPBlock->getExecutionCount());
353 TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount());
354 TrampolineBB->setCFIState(LPBlock->getCFIState());
355 TrampolineLabel = TrampolineBB->getLabel();
356 LPTrampolines.insert(std::make_pair(LPLabel, TrampolineLabel));
357 }
358
359 // Substitute the landing pad with the trampoline.
360 MIB->updateEHInfo(Instr,
361 MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second));
362 }
363 }
364
365 if (LPTrampolines.empty())
366 return LPTrampolines;
367
368 // All trampoline blocks were added to the end of the function. Place them at
369 // the end of corresponding fragments.
370 BinaryFunction::BasicBlockOrderType NewLayout(BF.getLayout().block_begin(),
371 BF.getLayout().block_end());
372 stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
373 return A->isCold() < B->isCold();
374 });
375 BF.getLayout().update(NewLayout);
376
377 // Conservatively introduce branch instructions.
378 BF.fixBranches();
379
380 // Update exception-handling CFG for the function.
381 BF.recomputeLandingPads();
382
383 return LPTrampolines;
384 }
385
mergeEHTrampolines(BinaryFunction & BF,SplitFunctions::BasicBlockOrderType & Layout,const SplitFunctions::TrampolineSetType & Trampolines) const386 SplitFunctions::BasicBlockOrderType SplitFunctions::mergeEHTrampolines(
387 BinaryFunction &BF, SplitFunctions::BasicBlockOrderType &Layout,
388 const SplitFunctions::TrampolineSetType &Trampolines) const {
389 BasicBlockOrderType MergedLayout;
390 for (BinaryBasicBlock *BB : Layout) {
391 auto Iter = Trampolines.find(BB->getLabel());
392 if (Iter != Trampolines.end()) {
393 BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Iter->second);
394 assert(LPBlock && "Could not find matching landing pad block.");
395 MergedLayout.push_back(LPBlock);
396 }
397 MergedLayout.push_back(BB);
398 }
399
400 return MergedLayout;
401 }
402
403 } // namespace bolt
404 } // namespace llvm
405