1 //===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AlignerPass class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/Aligner.h"
14 #include "bolt/Core/ParallelUtilities.h"
15 
16 #define DEBUG_TYPE "bolt-aligner"
17 
18 using namespace llvm;
19 
20 namespace opts {
21 
22 extern cl::OptionCategory BoltOptCategory;
23 
24 extern cl::opt<bool> AlignBlocks;
25 extern cl::opt<bool> PreserveBlocksAlignment;
26 extern cl::opt<unsigned> AlignFunctions;
27 
28 cl::opt<unsigned>
29 AlignBlocksMinSize("align-blocks-min-size",
30   cl::desc("minimal size of the basic block that should be aligned"),
31   cl::init(0),
32   cl::ZeroOrMore,
33   cl::Hidden,
34   cl::cat(BoltOptCategory));
35 
36 cl::opt<unsigned> AlignBlocksThreshold(
37     "align-blocks-threshold",
38     cl::desc(
39         "align only blocks with frequency larger than containing function "
40         "execution frequency specified in percent. E.g. 1000 means aligning "
41         "blocks that are 10 times more frequently executed than the "
42         "containing function."),
43     cl::init(800), cl::Hidden, cl::cat(BoltOptCategory));
44 
45 cl::opt<unsigned> AlignFunctionsMaxBytes(
46     "align-functions-max-bytes",
47     cl::desc("maximum number of bytes to use to align functions"), cl::init(32),
48     cl::cat(BoltOptCategory));
49 
50 cl::opt<unsigned>
51 BlockAlignment("block-alignment",
52   cl::desc("boundary to use for alignment of basic blocks"),
53   cl::init(16),
54   cl::ZeroOrMore,
55   cl::cat(BoltOptCategory));
56 
57 cl::opt<bool>
58     UseCompactAligner("use-compact-aligner",
59                       cl::desc("Use compact approach for aligning functions"),
60                       cl::init(true), cl::cat(BoltOptCategory));
61 
62 } // end namespace opts
63 
64 namespace llvm {
65 namespace bolt {
66 
67 namespace {
68 
69 // Align function to the specified byte-boundary (typically, 64) offsetting
70 // the fuction by not more than the corresponding value
alignMaxBytes(BinaryFunction & Function)71 void alignMaxBytes(BinaryFunction &Function) {
72   Function.setAlignment(opts::AlignFunctions);
73   Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes);
74   Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes);
75 }
76 
77 // Align function to the specified byte-boundary (typically, 64) offsetting
78 // the fuction by not more than the minimum over
79 // -- the size of the function
80 // -- the specified number of bytes
alignCompact(BinaryFunction & Function,const MCCodeEmitter * Emitter)81 void alignCompact(BinaryFunction &Function, const MCCodeEmitter *Emitter) {
82   const BinaryContext &BC = Function.getBinaryContext();
83   size_t HotSize = 0;
84   size_t ColdSize = 0;
85 
86   for (const BinaryBasicBlock &BB : Function)
87     if (BB.isCold())
88       ColdSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter);
89     else
90       HotSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter);
91 
92   Function.setAlignment(opts::AlignFunctions);
93   if (HotSize > 0)
94     Function.setMaxAlignmentBytes(
95       std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize));
96 
97   // using the same option, max-align-bytes, both for cold and hot parts of the
98   // functions, as aligning cold functions typically does not affect performance
99   if (ColdSize > 0)
100     Function.setMaxColdAlignmentBytes(
101       std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize));
102 }
103 
104 } // end anonymous namespace
105 
alignBlocks(BinaryFunction & Function,const MCCodeEmitter * Emitter)106 void AlignerPass::alignBlocks(BinaryFunction &Function,
107                               const MCCodeEmitter *Emitter) {
108   if (!Function.hasValidProfile() || !Function.isSimple())
109     return;
110 
111   const BinaryContext &BC = Function.getBinaryContext();
112 
113   const uint64_t FuncCount =
114       std::max<uint64_t>(1, Function.getKnownExecutionCount());
115   BinaryBasicBlock *PrevBB = nullptr;
116   for (BinaryBasicBlock *BB : Function.getLayout().blocks()) {
117     uint64_t Count = BB->getKnownExecutionCount();
118 
119     if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) {
120       PrevBB = BB;
121       continue;
122     }
123 
124     uint64_t FTCount = 0;
125     if (PrevBB && PrevBB->getFallthrough() == BB)
126       FTCount = PrevBB->getBranchInfo(*BB).Count;
127 
128     PrevBB = BB;
129 
130     if (Count < FTCount * 2)
131       continue;
132 
133     const uint64_t BlockSize =
134         BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
135     const uint64_t BytesToUse =
136         std::min<uint64_t>(opts::BlockAlignment - 1, BlockSize);
137 
138     if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize)
139       continue;
140 
141     BB->setAlignment(opts::BlockAlignment);
142     BB->setAlignmentMaxBytes(BytesToUse);
143 
144     // Update stats.
145     LLVM_DEBUG(
146       std::unique_lock<std::shared_timed_mutex> Lock(AlignHistogramMtx);
147       AlignHistogram[BytesToUse]++;
148       AlignedBlocksCount += BB->getKnownExecutionCount();
149     );
150   }
151 }
152 
runOnFunctions(BinaryContext & BC)153 void AlignerPass::runOnFunctions(BinaryContext &BC) {
154   if (!BC.HasRelocations)
155     return;
156 
157   AlignHistogram.resize(opts::BlockAlignment);
158 
159   ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
160     // Create a separate MCCodeEmitter to allow lock free execution
161     BinaryContext::IndependentCodeEmitter Emitter =
162         BC.createIndependentMCCodeEmitter();
163 
164     if (opts::UseCompactAligner)
165       alignCompact(BF, Emitter.MCE.get());
166     else
167       alignMaxBytes(BF);
168 
169     // Align objects that contains constant islands and no code
170     // to at least 8 bytes.
171     if (!BF.size() && BF.hasIslandsInfo()) {
172       const uint16_t Alignment = BF.getConstantIslandAlignment();
173       if (BF.getAlignment() < Alignment)
174         BF.setAlignment(Alignment);
175 
176       if (BF.getMaxAlignmentBytes() < Alignment)
177         BF.setMaxAlignmentBytes(Alignment);
178 
179       if (BF.getMaxColdAlignmentBytes() < Alignment)
180         BF.setMaxColdAlignmentBytes(Alignment);
181     }
182 
183     if (opts::AlignBlocks && !opts::PreserveBlocksAlignment)
184       alignBlocks(BF, Emitter.MCE.get());
185   };
186 
187   ParallelUtilities::runOnEachFunction(
188       BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun,
189       ParallelUtilities::PredicateTy(nullptr), "AlignerPass");
190 
191   LLVM_DEBUG(
192     dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n";
193     for (unsigned I = 1; I < AlignHistogram.size(); ++I)
194       dbgs() << "  " << I << " : " << AlignHistogram[I] << '\n';
195 
196     dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: "
197            << AlignedBlocksCount << '\n';
198   );
199 }
200 
201 } // end namespace bolt
202 } // end namespace llvm
203