1 //===--- SplitFunctions.cpp - pass for splitting function code ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "bolt/Passes/SplitFunctions.h"
12 #include "bolt/Core/BinaryFunction.h"
13 #include "bolt/Core/ParallelUtilities.h"
14 #include "llvm/Support/CommandLine.h"
15 
16 #include <vector>
17 
18 #define DEBUG_TYPE "bolt-opts"
19 
20 using namespace llvm;
21 using namespace bolt;
22 
23 namespace opts {
24 
25 extern cl::OptionCategory BoltOptCategory;
26 
27 extern cl::opt<bool> SplitEH;
28 extern cl::opt<unsigned> ExecutionCountThreshold;
29 
30 static cl::opt<bool>
31 AggressiveSplitting("split-all-cold",
32   cl::desc("outline as many cold basic blocks as possible"),
33   cl::ZeroOrMore,
34   cl::cat(BoltOptCategory));
35 
36 static cl::opt<unsigned>
37 SplitAlignThreshold("split-align-threshold",
38   cl::desc("when deciding to split a function, apply this alignment "
39            "while doing the size comparison (see -split-threshold). "
40            "Default value: 2."),
41   cl::init(2),
42   cl::ZeroOrMore,
43   cl::Hidden,
44   cl::cat(BoltOptCategory));
45 
46 static cl::opt<SplitFunctions::SplittingType>
47 SplitFunctions("split-functions",
48   cl::desc("split functions into hot and cold regions"),
49   cl::init(SplitFunctions::ST_NONE),
50   cl::values(clEnumValN(SplitFunctions::ST_NONE, "0",
51                         "do not split any function"),
52              clEnumValN(SplitFunctions::ST_LARGE, "1",
53                         "in non-relocation mode only split functions too large "
54                         "to fit into original code space"),
55              clEnumValN(SplitFunctions::ST_LARGE, "2",
56                         "same as 1 (backwards compatibility)"),
57              clEnumValN(SplitFunctions::ST_ALL, "3",
58                         "split all functions")),
59   cl::ZeroOrMore,
60   cl::cat(BoltOptCategory));
61 
62 static cl::opt<unsigned>
63 SplitThreshold("split-threshold",
64   cl::desc("split function only if its main size is reduced by more than "
65            "given amount of bytes. Default value: 0, i.e. split iff the "
66            "size is reduced. Note that on some architectures the size can "
67            "increase after splitting."),
68   cl::init(0),
69   cl::ZeroOrMore,
70   cl::Hidden,
71   cl::cat(BoltOptCategory));
72 
73 void syncOptions(BinaryContext &BC) {
74   if (!BC.HasRelocations && opts::SplitFunctions == SplitFunctions::ST_LARGE)
75     opts::SplitFunctions = SplitFunctions::ST_ALL;
76 }
77 
78 } // namespace opts
79 
80 namespace llvm {
81 namespace bolt {
82 
83 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const {
84   // Apply execution count threshold
85   if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold)
86     return false;
87 
88   return BinaryFunctionPass::shouldOptimize(BF);
89 }
90 
91 void SplitFunctions::runOnFunctions(BinaryContext &BC) {
92   opts::syncOptions(BC);
93 
94   if (opts::SplitFunctions == SplitFunctions::ST_NONE)
95     return;
96 
97   ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
98     splitFunction(BF);
99   };
100 
101   ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
102     return !shouldOptimize(BF);
103   };
104 
105   ParallelUtilities::runOnEachFunction(
106       BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, WorkFun, SkipFunc,
107       "SplitFunctions");
108 
109   if (SplitBytesHot + SplitBytesCold > 0) {
110     outs() << "BOLT-INFO: splitting separates " << SplitBytesHot
111            << " hot bytes from " << SplitBytesCold << " cold bytes "
112            << format("(%.2lf%% of split functions is hot).\n",
113                      100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold));
114   }
115 }
116 
117 void SplitFunctions::splitFunction(BinaryFunction &BF) {
118   if (!BF.size())
119     return;
120 
121   if (!BF.hasValidProfile())
122     return;
123 
124   bool AllCold = true;
125   for (BinaryBasicBlock *BB : BF.layout()) {
126     uint64_t ExecCount = BB->getExecutionCount();
127     if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
128       return;
129     if (ExecCount != 0)
130       AllCold = false;
131   }
132 
133   if (AllCold)
134     return;
135 
136   BinaryFunction::BasicBlockOrderType PreSplitLayout = BF.getLayout();
137 
138   BinaryContext &BC = BF.getBinaryContext();
139   size_t OriginalHotSize;
140   size_t HotSize;
141   size_t ColdSize;
142   if (BC.isX86()) {
143     std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF);
144     LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
145                       << " pre-split is <0x"
146                       << Twine::utohexstr(OriginalHotSize) << ", 0x"
147                       << Twine::utohexstr(ColdSize) << ">\n");
148   }
149 
150   if (opts::SplitFunctions == SplitFunctions::ST_LARGE && !BC.HasRelocations) {
151     // Split only if the function wouldn't fit.
152     if (OriginalHotSize <= BF.getMaxSize())
153       return;
154   }
155 
156   // Never outline the first basic block.
157   BF.layout_front()->setCanOutline(false);
158   for (BinaryBasicBlock *BB : BF.layout()) {
159     if (!BB->canOutline())
160       continue;
161     if (BB->getExecutionCount() != 0) {
162       BB->setCanOutline(false);
163       continue;
164     }
165     // Do not split extra entry points in aarch64. They can be referred by
166     // using ADRs and when this happens, these blocks cannot be placed far
167     // away due to the limited range in ADR instruction.
168     if (BC.isAArch64() && BB->isEntryPoint()) {
169       BB->setCanOutline(false);
170       continue;
171     }
172     if (BF.hasEHRanges() && !opts::SplitEH) {
173       // We cannot move landing pads (or rather entry points for landing
174       // pads).
175       if (BB->isLandingPad()) {
176         BB->setCanOutline(false);
177         continue;
178       }
179       // We cannot move a block that can throw since exception-handling
180       // runtime cannot deal with split functions. However, if we can guarantee
181       // that the block never throws, it is safe to move the block to
182       // decrease the size of the function.
183       for (MCInst &Instr : *BB) {
184         if (BF.getBinaryContext().MIB->isInvoke(Instr)) {
185           BB->setCanOutline(false);
186           break;
187         }
188       }
189     }
190   }
191 
192   if (opts::AggressiveSplitting) {
193     // All blocks with 0 count that we can move go to the end of the function.
194     // Even if they were natural to cluster formation and were seen in-between
195     // hot basic blocks.
196     std::stable_sort(BF.layout_begin(), BF.layout_end(),
197                      [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
198                        return A->canOutline() < B->canOutline();
199                      });
200   } else if (BF.hasEHRanges() && !opts::SplitEH) {
201     // Typically functions with exception handling have landing pads at the end.
202     // We cannot move beginning of landing pads, but we can move 0-count blocks
203     // comprising landing pads to the end and thus facilitate splitting.
204     auto FirstLP = BF.layout_begin();
205     while ((*FirstLP)->isLandingPad())
206       ++FirstLP;
207 
208     std::stable_sort(FirstLP, BF.layout_end(),
209                      [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
210                        return A->canOutline() < B->canOutline();
211                      });
212   }
213 
214   // Separate hot from cold starting from the bottom.
215   for (auto I = BF.layout_rbegin(), E = BF.layout_rend(); I != E; ++I) {
216     BinaryBasicBlock *BB = *I;
217     if (!BB->canOutline())
218       break;
219     BB->setIsCold(true);
220   }
221 
222   // Check the new size to see if it's worth splitting the function.
223   if (BC.isX86() && BF.isSplit()) {
224     std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF);
225     LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
226                       << " post-split is <0x" << Twine::utohexstr(HotSize)
227                       << ", 0x" << Twine::utohexstr(ColdSize) << ">\n");
228     if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
229         alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) {
230       LLVM_DEBUG(dbgs() << "Reversing splitting of function " << BF << ":\n  0x"
231                         << Twine::utohexstr(HotSize) << ", 0x"
232                         << Twine::utohexstr(ColdSize) << " -> 0x"
233                         << Twine::utohexstr(OriginalHotSize) << '\n');
234 
235       BF.updateBasicBlockLayout(PreSplitLayout);
236       for (BinaryBasicBlock &BB : BF) {
237         BB.setIsCold(false);
238       }
239     } else {
240       SplitBytesHot += HotSize;
241       SplitBytesCold += ColdSize;
242     }
243   }
244 }
245 
246 } // namespace bolt
247 } // namespace llvm
248