1 //===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the AlignerPass class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/Aligner.h" 14 #include "bolt/Core/ParallelUtilities.h" 15 16 #define DEBUG_TYPE "bolt-aligner" 17 18 using namespace llvm; 19 20 namespace opts { 21 22 extern cl::OptionCategory BoltOptCategory; 23 24 extern cl::opt<bool> AlignBlocks; 25 extern cl::opt<bool> PreserveBlocksAlignment; 26 extern cl::opt<unsigned> AlignFunctions; 27 28 cl::opt<unsigned> 29 AlignBlocksMinSize("align-blocks-min-size", 30 cl::desc("minimal size of the basic block that should be aligned"), 31 cl::init(0), 32 cl::ZeroOrMore, 33 cl::Hidden, 34 cl::cat(BoltOptCategory)); 35 36 cl::opt<unsigned> AlignBlocksThreshold( 37 "align-blocks-threshold", 38 cl::desc( 39 "align only blocks with frequency larger than containing function " 40 "execution frequency specified in percent. E.g. 1000 means aligning " 41 "blocks that are 10 times more frequently executed than the " 42 "containing function."), 43 cl::init(800), cl::Hidden, cl::cat(BoltOptCategory)); 44 45 cl::opt<unsigned> AlignFunctionsMaxBytes( 46 "align-functions-max-bytes", 47 cl::desc("maximum number of bytes to use to align functions"), cl::init(32), 48 cl::cat(BoltOptCategory)); 49 50 cl::opt<unsigned> 51 BlockAlignment("block-alignment", 52 cl::desc("boundary to use for alignment of basic blocks"), 53 cl::init(16), 54 cl::ZeroOrMore, 55 cl::cat(BoltOptCategory)); 56 57 cl::opt<bool> 58 UseCompactAligner("use-compact-aligner", 59 cl::desc("Use compact approach for aligning functions"), 60 cl::init(true), cl::cat(BoltOptCategory)); 61 62 } // end namespace opts 63 64 namespace llvm { 65 namespace bolt { 66 67 namespace { 68 69 // Align function to the specified byte-boundary (typically, 64) offsetting 70 // the fuction by not more than the corresponding value 71 void alignMaxBytes(BinaryFunction &Function) { 72 Function.setAlignment(opts::AlignFunctions); 73 Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes); 74 Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes); 75 } 76 77 // Align function to the specified byte-boundary (typically, 64) offsetting 78 // the fuction by not more than the minimum over 79 // -- the size of the function 80 // -- the specified number of bytes 81 void alignCompact(BinaryFunction &Function, const MCCodeEmitter *Emitter) { 82 const BinaryContext &BC = Function.getBinaryContext(); 83 size_t HotSize = 0; 84 size_t ColdSize = 0; 85 86 for (const BinaryBasicBlock *BB : Function.layout()) 87 if (BB->isCold()) 88 ColdSize += BC.computeCodeSize(BB->begin(), BB->end(), Emitter); 89 else 90 HotSize += BC.computeCodeSize(BB->begin(), BB->end(), Emitter); 91 92 Function.setAlignment(opts::AlignFunctions); 93 if (HotSize > 0) 94 Function.setMaxAlignmentBytes( 95 std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize)); 96 97 // using the same option, max-align-bytes, both for cold and hot parts of the 98 // functions, as aligning cold functions typically does not affect performance 99 if (ColdSize > 0) 100 Function.setMaxColdAlignmentBytes( 101 std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize)); 102 } 103 104 } // end anonymous namespace 105 106 void AlignerPass::alignBlocks(BinaryFunction &Function, 107 const MCCodeEmitter *Emitter) { 108 if (!Function.hasValidProfile() || !Function.isSimple()) 109 return; 110 111 const BinaryContext &BC = Function.getBinaryContext(); 112 113 const uint64_t FuncCount = 114 std::max<uint64_t>(1, Function.getKnownExecutionCount()); 115 BinaryBasicBlock *PrevBB = nullptr; 116 for (BinaryBasicBlock *BB : Function.layout()) { 117 uint64_t Count = BB->getKnownExecutionCount(); 118 119 if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) { 120 PrevBB = BB; 121 continue; 122 } 123 124 uint64_t FTCount = 0; 125 if (PrevBB && PrevBB->getFallthrough() == BB) 126 FTCount = PrevBB->getBranchInfo(*BB).Count; 127 128 PrevBB = BB; 129 130 if (Count < FTCount * 2) 131 continue; 132 133 const uint64_t BlockSize = 134 BC.computeCodeSize(BB->begin(), BB->end(), Emitter); 135 const uint64_t BytesToUse = 136 std::min<uint64_t>(opts::BlockAlignment - 1, BlockSize); 137 138 if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize) 139 continue; 140 141 BB->setAlignment(opts::BlockAlignment); 142 BB->setAlignmentMaxBytes(BytesToUse); 143 144 // Update stats. 145 LLVM_DEBUG( 146 std::unique_lock<std::shared_timed_mutex> Lock(AlignHistogramMtx); 147 AlignHistogram[BytesToUse]++; 148 AlignedBlocksCount += BB->getKnownExecutionCount(); 149 ); 150 } 151 } 152 153 void AlignerPass::runOnFunctions(BinaryContext &BC) { 154 if (!BC.HasRelocations) 155 return; 156 157 AlignHistogram.resize(opts::BlockAlignment); 158 159 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 160 // Create a separate MCCodeEmitter to allow lock free execution 161 BinaryContext::IndependentCodeEmitter Emitter = 162 BC.createIndependentMCCodeEmitter(); 163 164 if (opts::UseCompactAligner) 165 alignCompact(BF, Emitter.MCE.get()); 166 else 167 alignMaxBytes(BF); 168 169 // Align objects that contains constant islands and no code 170 // to at least 8 bytes. 171 if (!BF.size() && BF.hasIslandsInfo()) { 172 const uint16_t Alignment = BF.getConstantIslandAlignment(); 173 if (BF.getAlignment() < Alignment) 174 BF.setAlignment(Alignment); 175 176 if (BF.getMaxAlignmentBytes() < Alignment) 177 BF.setMaxAlignmentBytes(Alignment); 178 179 if (BF.getMaxColdAlignmentBytes() < Alignment) 180 BF.setMaxColdAlignmentBytes(Alignment); 181 } 182 183 if (opts::AlignBlocks && !opts::PreserveBlocksAlignment) 184 alignBlocks(BF, Emitter.MCE.get()); 185 }; 186 187 ParallelUtilities::runOnEachFunction( 188 BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, 189 ParallelUtilities::PredicateTy(nullptr), "AlignerPass"); 190 191 LLVM_DEBUG( 192 dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n"; 193 for (unsigned I = 1; I < AlignHistogram.size(); ++I) 194 dbgs() << " " << I << " : " << AlignHistogram[I] << '\n'; 195 196 dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: " 197 << AlignedBlocksCount << '\n'; 198 ); 199 } 200 201 } // end namespace bolt 202 } // end namespace llvm 203