1 //===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AlignerPass class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/Aligner.h"
14 #include "bolt/Core/ParallelUtilities.h"
15 
16 #define DEBUG_TYPE "bolt-aligner"
17 
18 using namespace llvm;
19 
20 namespace opts {
21 
22 extern cl::OptionCategory BoltOptCategory;
23 
24 extern cl::opt<bool> AlignBlocks;
25 extern cl::opt<bool> PreserveBlocksAlignment;
26 
27 cl::opt<unsigned>
28 AlignBlocksMinSize("align-blocks-min-size",
29   cl::desc("minimal size of the basic block that should be aligned"),
30   cl::init(0),
31   cl::ZeroOrMore,
32   cl::Hidden,
33   cl::cat(BoltOptCategory));
34 
35 cl::opt<unsigned>
36 AlignBlocksThreshold("align-blocks-threshold",
37   cl::desc("align only blocks with frequency larger than containing function "
38            "execution frequency specified in percent. E.g. 1000 means aligning "
39            "blocks that are 10 times more frequently executed than the "
40            "containing function."),
41   cl::init(800),
42   cl::ZeroOrMore,
43   cl::Hidden,
44   cl::cat(BoltOptCategory));
45 
46 cl::opt<unsigned>
47 AlignFunctions("align-functions",
48   cl::desc("align functions at a given value (relocation mode)"),
49   cl::init(64),
50   cl::ZeroOrMore,
51   cl::cat(BoltOptCategory));
52 
53 cl::opt<unsigned>
54 AlignFunctionsMaxBytes("align-functions-max-bytes",
55   cl::desc("maximum number of bytes to use to align functions"),
56   cl::init(32),
57   cl::ZeroOrMore,
58   cl::cat(BoltOptCategory));
59 
60 cl::opt<unsigned>
61 BlockAlignment("block-alignment",
62   cl::desc("boundary to use for alignment of basic blocks"),
63   cl::init(16),
64   cl::ZeroOrMore,
65   cl::cat(BoltOptCategory));
66 
67 cl::opt<bool>
68 UseCompactAligner("use-compact-aligner",
69   cl::desc("Use compact approach for aligning functions"),
70   cl::init(true),
71   cl::ZeroOrMore,
72   cl::cat(BoltOptCategory));
73 
74 } // end namespace opts
75 
76 namespace llvm {
77 namespace bolt {
78 
79 namespace {
80 
81 // Align function to the specified byte-boundary (typically, 64) offsetting
82 // the fuction by not more than the corresponding value
83 void alignMaxBytes(BinaryFunction &Function) {
84   Function.setAlignment(opts::AlignFunctions);
85   Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes);
86   Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes);
87 }
88 
89 // Align function to the specified byte-boundary (typically, 64) offsetting
90 // the fuction by not more than the minimum over
91 // -- the size of the function
92 // -- the specified number of bytes
93 void alignCompact(BinaryFunction &Function, const MCCodeEmitter *Emitter) {
94   const BinaryContext &BC = Function.getBinaryContext();
95   size_t HotSize = 0;
96   size_t ColdSize = 0;
97 
98   for (const BinaryBasicBlock *BB : Function.layout())
99     if (BB->isCold())
100       ColdSize += BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
101     else
102       HotSize += BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
103 
104   Function.setAlignment(opts::AlignFunctions);
105   if (HotSize > 0)
106     Function.setMaxAlignmentBytes(
107       std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize));
108 
109   // using the same option, max-align-bytes, both for cold and hot parts of the
110   // functions, as aligning cold functions typically does not affect performance
111   if (ColdSize > 0)
112     Function.setMaxColdAlignmentBytes(
113       std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize));
114 }
115 
116 } // end anonymous namespace
117 
118 void AlignerPass::alignBlocks(BinaryFunction &Function,
119                               const MCCodeEmitter *Emitter) {
120   if (!Function.hasValidProfile() || !Function.isSimple())
121     return;
122 
123   const BinaryContext &BC = Function.getBinaryContext();
124 
125   const uint64_t FuncCount =
126       std::max<uint64_t>(1, Function.getKnownExecutionCount());
127   BinaryBasicBlock *PrevBB = nullptr;
128   for (BinaryBasicBlock *BB : Function.layout()) {
129     uint64_t Count = BB->getKnownExecutionCount();
130 
131     if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) {
132       PrevBB = BB;
133       continue;
134     }
135 
136     uint64_t FTCount = 0;
137     if (PrevBB && PrevBB->getFallthrough() == BB)
138       FTCount = PrevBB->getBranchInfo(*BB).Count;
139 
140     PrevBB = BB;
141 
142     if (Count < FTCount * 2)
143       continue;
144 
145     const uint64_t BlockSize =
146         BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
147     const uint64_t BytesToUse =
148         std::min<uint64_t>(opts::BlockAlignment - 1, BlockSize);
149 
150     if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize)
151       continue;
152 
153     BB->setAlignment(opts::BlockAlignment);
154     BB->setAlignmentMaxBytes(BytesToUse);
155 
156     // Update stats.
157     LLVM_DEBUG(
158       std::unique_lock<std::shared_timed_mutex> Lock(AlignHistogramMtx);
159       AlignHistogram[BytesToUse]++;
160       AlignedBlocksCount += BB->getKnownExecutionCount();
161     );
162   }
163 }
164 
165 void AlignerPass::runOnFunctions(BinaryContext &BC) {
166   if (!BC.HasRelocations)
167     return;
168 
169   AlignHistogram.resize(opts::BlockAlignment);
170 
171   ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
172     // Create a separate MCCodeEmitter to allow lock free execution
173     BinaryContext::IndependentCodeEmitter Emitter =
174         BC.createIndependentMCCodeEmitter();
175 
176     if (opts::UseCompactAligner)
177       alignCompact(BF, Emitter.MCE.get());
178     else
179       alignMaxBytes(BF);
180 
181     if (opts::AlignBlocks && !opts::PreserveBlocksAlignment)
182       alignBlocks(BF, Emitter.MCE.get());
183   };
184 
185   ParallelUtilities::runOnEachFunction(
186       BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun,
187       ParallelUtilities::PredicateTy(nullptr), "AlignerPass");
188 
189   LLVM_DEBUG(
190     dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n";
191     for (unsigned I = 1; I < AlignHistogram.size(); ++I)
192       dbgs() << "  " << I << " : " << AlignHistogram[I] << '\n';
193 
194     dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: "
195            << AlignedBlocksCount << '\n';
196   );
197 }
198 
199 } // end namespace bolt
200 } // end namespace llvm
201