1 //===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AlignerPass class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "bolt/Passes/Aligner.h"
14 #include "bolt/Core/ParallelUtilities.h"
15
16 #define DEBUG_TYPE "bolt-aligner"
17
18 using namespace llvm;
19
20 namespace opts {
21
22 extern cl::OptionCategory BoltOptCategory;
23
24 extern cl::opt<bool> AlignBlocks;
25 extern cl::opt<bool> PreserveBlocksAlignment;
26 extern cl::opt<unsigned> AlignFunctions;
27
28 cl::opt<unsigned>
29 AlignBlocksMinSize("align-blocks-min-size",
30 cl::desc("minimal size of the basic block that should be aligned"),
31 cl::init(0),
32 cl::ZeroOrMore,
33 cl::Hidden,
34 cl::cat(BoltOptCategory));
35
36 cl::opt<unsigned> AlignBlocksThreshold(
37 "align-blocks-threshold",
38 cl::desc(
39 "align only blocks with frequency larger than containing function "
40 "execution frequency specified in percent. E.g. 1000 means aligning "
41 "blocks that are 10 times more frequently executed than the "
42 "containing function."),
43 cl::init(800), cl::Hidden, cl::cat(BoltOptCategory));
44
45 cl::opt<unsigned> AlignFunctionsMaxBytes(
46 "align-functions-max-bytes",
47 cl::desc("maximum number of bytes to use to align functions"), cl::init(32),
48 cl::cat(BoltOptCategory));
49
50 cl::opt<unsigned>
51 BlockAlignment("block-alignment",
52 cl::desc("boundary to use for alignment of basic blocks"),
53 cl::init(16),
54 cl::ZeroOrMore,
55 cl::cat(BoltOptCategory));
56
57 cl::opt<bool>
58 UseCompactAligner("use-compact-aligner",
59 cl::desc("Use compact approach for aligning functions"),
60 cl::init(true), cl::cat(BoltOptCategory));
61
62 } // end namespace opts
63
64 namespace llvm {
65 namespace bolt {
66
67 namespace {
68
69 // Align function to the specified byte-boundary (typically, 64) offsetting
70 // the fuction by not more than the corresponding value
alignMaxBytes(BinaryFunction & Function)71 void alignMaxBytes(BinaryFunction &Function) {
72 Function.setAlignment(opts::AlignFunctions);
73 Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes);
74 Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes);
75 }
76
77 // Align function to the specified byte-boundary (typically, 64) offsetting
78 // the fuction by not more than the minimum over
79 // -- the size of the function
80 // -- the specified number of bytes
alignCompact(BinaryFunction & Function,const MCCodeEmitter * Emitter)81 void alignCompact(BinaryFunction &Function, const MCCodeEmitter *Emitter) {
82 const BinaryContext &BC = Function.getBinaryContext();
83 size_t HotSize = 0;
84 size_t ColdSize = 0;
85
86 for (const BinaryBasicBlock &BB : Function)
87 if (BB.isCold())
88 ColdSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter);
89 else
90 HotSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter);
91
92 Function.setAlignment(opts::AlignFunctions);
93 if (HotSize > 0)
94 Function.setMaxAlignmentBytes(
95 std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize));
96
97 // using the same option, max-align-bytes, both for cold and hot parts of the
98 // functions, as aligning cold functions typically does not affect performance
99 if (ColdSize > 0)
100 Function.setMaxColdAlignmentBytes(
101 std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize));
102 }
103
104 } // end anonymous namespace
105
alignBlocks(BinaryFunction & Function,const MCCodeEmitter * Emitter)106 void AlignerPass::alignBlocks(BinaryFunction &Function,
107 const MCCodeEmitter *Emitter) {
108 if (!Function.hasValidProfile() || !Function.isSimple())
109 return;
110
111 const BinaryContext &BC = Function.getBinaryContext();
112
113 const uint64_t FuncCount =
114 std::max<uint64_t>(1, Function.getKnownExecutionCount());
115 BinaryBasicBlock *PrevBB = nullptr;
116 for (BinaryBasicBlock *BB : Function.getLayout().blocks()) {
117 uint64_t Count = BB->getKnownExecutionCount();
118
119 if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) {
120 PrevBB = BB;
121 continue;
122 }
123
124 uint64_t FTCount = 0;
125 if (PrevBB && PrevBB->getFallthrough() == BB)
126 FTCount = PrevBB->getBranchInfo(*BB).Count;
127
128 PrevBB = BB;
129
130 if (Count < FTCount * 2)
131 continue;
132
133 const uint64_t BlockSize =
134 BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
135 const uint64_t BytesToUse =
136 std::min<uint64_t>(opts::BlockAlignment - 1, BlockSize);
137
138 if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize)
139 continue;
140
141 BB->setAlignment(opts::BlockAlignment);
142 BB->setAlignmentMaxBytes(BytesToUse);
143
144 // Update stats.
145 LLVM_DEBUG(
146 std::unique_lock<std::shared_timed_mutex> Lock(AlignHistogramMtx);
147 AlignHistogram[BytesToUse]++;
148 AlignedBlocksCount += BB->getKnownExecutionCount();
149 );
150 }
151 }
152
runOnFunctions(BinaryContext & BC)153 void AlignerPass::runOnFunctions(BinaryContext &BC) {
154 if (!BC.HasRelocations)
155 return;
156
157 AlignHistogram.resize(opts::BlockAlignment);
158
159 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
160 // Create a separate MCCodeEmitter to allow lock free execution
161 BinaryContext::IndependentCodeEmitter Emitter =
162 BC.createIndependentMCCodeEmitter();
163
164 if (opts::UseCompactAligner)
165 alignCompact(BF, Emitter.MCE.get());
166 else
167 alignMaxBytes(BF);
168
169 // Align objects that contains constant islands and no code
170 // to at least 8 bytes.
171 if (!BF.size() && BF.hasIslandsInfo()) {
172 const uint16_t Alignment = BF.getConstantIslandAlignment();
173 if (BF.getAlignment() < Alignment)
174 BF.setAlignment(Alignment);
175
176 if (BF.getMaxAlignmentBytes() < Alignment)
177 BF.setMaxAlignmentBytes(Alignment);
178
179 if (BF.getMaxColdAlignmentBytes() < Alignment)
180 BF.setMaxColdAlignmentBytes(Alignment);
181 }
182
183 if (opts::AlignBlocks && !opts::PreserveBlocksAlignment)
184 alignBlocks(BF, Emitter.MCE.get());
185 };
186
187 ParallelUtilities::runOnEachFunction(
188 BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun,
189 ParallelUtilities::PredicateTy(nullptr), "AlignerPass");
190
191 LLVM_DEBUG(
192 dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n";
193 for (unsigned I = 1; I < AlignHistogram.size(); ++I)
194 dbgs() << " " << I << " : " << AlignHistogram[I] << '\n';
195
196 dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: "
197 << AlignedBlocksCount << '\n';
198 );
199 }
200
201 } // end namespace bolt
202 } // end namespace llvm
203