1 //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SplitFunctions pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/SplitFunctions.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Core/ParallelUtilities.h"
16 #include "llvm/Support/CommandLine.h"
17 
18 #include <vector>
19 
20 #define DEBUG_TYPE "bolt-opts"
21 
22 using namespace llvm;
23 using namespace bolt;
24 
25 namespace opts {
26 
27 extern cl::OptionCategory BoltOptCategory;
28 
29 extern cl::opt<bool> SplitEH;
30 extern cl::opt<unsigned> ExecutionCountThreshold;
31 
32 static cl::opt<bool>
33 AggressiveSplitting("split-all-cold",
34   cl::desc("outline as many cold basic blocks as possible"),
35   cl::ZeroOrMore,
36   cl::cat(BoltOptCategory));
37 
38 static cl::opt<unsigned>
39 SplitAlignThreshold("split-align-threshold",
40   cl::desc("when deciding to split a function, apply this alignment "
41            "while doing the size comparison (see -split-threshold). "
42            "Default value: 2."),
43   cl::init(2),
44   cl::ZeroOrMore,
45   cl::Hidden,
46   cl::cat(BoltOptCategory));
47 
48 static cl::opt<SplitFunctions::SplittingType>
49 SplitFunctions("split-functions",
50   cl::desc("split functions into hot and cold regions"),
51   cl::init(SplitFunctions::ST_NONE),
52   cl::values(clEnumValN(SplitFunctions::ST_NONE, "0",
53                         "do not split any function"),
54              clEnumValN(SplitFunctions::ST_LARGE, "1",
55                         "in non-relocation mode only split functions too large "
56                         "to fit into original code space"),
57              clEnumValN(SplitFunctions::ST_LARGE, "2",
58                         "same as 1 (backwards compatibility)"),
59              clEnumValN(SplitFunctions::ST_ALL, "3",
60                         "split all functions")),
61   cl::ZeroOrMore,
62   cl::cat(BoltOptCategory));
63 
64 static cl::opt<unsigned>
65 SplitThreshold("split-threshold",
66   cl::desc("split function only if its main size is reduced by more than "
67            "given amount of bytes. Default value: 0, i.e. split iff the "
68            "size is reduced. Note that on some architectures the size can "
69            "increase after splitting."),
70   cl::init(0),
71   cl::ZeroOrMore,
72   cl::Hidden,
73   cl::cat(BoltOptCategory));
74 
75 void syncOptions(BinaryContext &BC) {
76   if (!BC.HasRelocations && opts::SplitFunctions == SplitFunctions::ST_LARGE)
77     opts::SplitFunctions = SplitFunctions::ST_ALL;
78 }
79 
80 } // namespace opts
81 
82 namespace llvm {
83 namespace bolt {
84 
85 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const {
86   // Apply execution count threshold
87   if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold)
88     return false;
89 
90   return BinaryFunctionPass::shouldOptimize(BF);
91 }
92 
93 void SplitFunctions::runOnFunctions(BinaryContext &BC) {
94   opts::syncOptions(BC);
95 
96   if (opts::SplitFunctions == SplitFunctions::ST_NONE)
97     return;
98 
99   ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
100     splitFunction(BF);
101   };
102 
103   ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
104     return !shouldOptimize(BF);
105   };
106 
107   ParallelUtilities::runOnEachFunction(
108       BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, WorkFun, SkipFunc,
109       "SplitFunctions");
110 
111   if (SplitBytesHot + SplitBytesCold > 0) {
112     outs() << "BOLT-INFO: splitting separates " << SplitBytesHot
113            << " hot bytes from " << SplitBytesCold << " cold bytes "
114            << format("(%.2lf%% of split functions is hot).\n",
115                      100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold));
116   }
117 }
118 
119 void SplitFunctions::splitFunction(BinaryFunction &BF) {
120   if (!BF.size())
121     return;
122 
123   if (!BF.hasValidProfile())
124     return;
125 
126   bool AllCold = true;
127   for (BinaryBasicBlock *BB : BF.layout()) {
128     uint64_t ExecCount = BB->getExecutionCount();
129     if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
130       return;
131     if (ExecCount != 0)
132       AllCold = false;
133   }
134 
135   if (AllCold)
136     return;
137 
138   BinaryFunction::BasicBlockOrderType PreSplitLayout = BF.getLayout();
139 
140   BinaryContext &BC = BF.getBinaryContext();
141   size_t OriginalHotSize;
142   size_t HotSize;
143   size_t ColdSize;
144   if (BC.isX86()) {
145     std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF);
146     LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
147                       << " pre-split is <0x"
148                       << Twine::utohexstr(OriginalHotSize) << ", 0x"
149                       << Twine::utohexstr(ColdSize) << ">\n");
150   }
151 
152   if (opts::SplitFunctions == SplitFunctions::ST_LARGE && !BC.HasRelocations) {
153     // Split only if the function wouldn't fit.
154     if (OriginalHotSize <= BF.getMaxSize())
155       return;
156   }
157 
158   // Never outline the first basic block.
159   BF.layout_front()->setCanOutline(false);
160   for (BinaryBasicBlock *BB : BF.layout()) {
161     if (!BB->canOutline())
162       continue;
163     if (BB->getExecutionCount() != 0) {
164       BB->setCanOutline(false);
165       continue;
166     }
167     // Do not split extra entry points in aarch64. They can be referred by
168     // using ADRs and when this happens, these blocks cannot be placed far
169     // away due to the limited range in ADR instruction.
170     if (BC.isAArch64() && BB->isEntryPoint()) {
171       BB->setCanOutline(false);
172       continue;
173     }
174     if (BF.hasEHRanges() && !opts::SplitEH) {
175       // We cannot move landing pads (or rather entry points for landing
176       // pads).
177       if (BB->isLandingPad()) {
178         BB->setCanOutline(false);
179         continue;
180       }
181       // We cannot move a block that can throw since exception-handling
182       // runtime cannot deal with split functions. However, if we can guarantee
183       // that the block never throws, it is safe to move the block to
184       // decrease the size of the function.
185       for (MCInst &Instr : *BB) {
186         if (BF.getBinaryContext().MIB->isInvoke(Instr)) {
187           BB->setCanOutline(false);
188           break;
189         }
190       }
191     }
192   }
193 
194   if (opts::AggressiveSplitting) {
195     // All blocks with 0 count that we can move go to the end of the function.
196     // Even if they were natural to cluster formation and were seen in-between
197     // hot basic blocks.
198     std::stable_sort(BF.layout_begin(), BF.layout_end(),
199                      [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
200                        return A->canOutline() < B->canOutline();
201                      });
202   } else if (BF.hasEHRanges() && !opts::SplitEH) {
203     // Typically functions with exception handling have landing pads at the end.
204     // We cannot move beginning of landing pads, but we can move 0-count blocks
205     // comprising landing pads to the end and thus facilitate splitting.
206     auto FirstLP = BF.layout_begin();
207     while ((*FirstLP)->isLandingPad())
208       ++FirstLP;
209 
210     std::stable_sort(FirstLP, BF.layout_end(),
211                      [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
212                        return A->canOutline() < B->canOutline();
213                      });
214   }
215 
216   // Separate hot from cold starting from the bottom.
217   for (auto I = BF.layout_rbegin(), E = BF.layout_rend(); I != E; ++I) {
218     BinaryBasicBlock *BB = *I;
219     if (!BB->canOutline())
220       break;
221     BB->setIsCold(true);
222   }
223 
224   // Check the new size to see if it's worth splitting the function.
225   if (BC.isX86() && BF.isSplit()) {
226     std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF);
227     LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
228                       << " post-split is <0x" << Twine::utohexstr(HotSize)
229                       << ", 0x" << Twine::utohexstr(ColdSize) << ">\n");
230     if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
231         alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) {
232       LLVM_DEBUG(dbgs() << "Reversing splitting of function " << BF << ":\n  0x"
233                         << Twine::utohexstr(HotSize) << ", 0x"
234                         << Twine::utohexstr(ColdSize) << " -> 0x"
235                         << Twine::utohexstr(OriginalHotSize) << '\n');
236 
237       BF.updateBasicBlockLayout(PreSplitLayout);
238       for (BinaryBasicBlock &BB : BF) {
239         BB.setIsCold(false);
240       }
241     } else {
242       SplitBytesHot += HotSize;
243       SplitBytesCold += ColdSize;
244     }
245   }
246 }
247 
248 } // namespace bolt
249 } // namespace llvm
250