1 //===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the AlignerPass class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/Aligner.h" 14 #include "bolt/Core/ParallelUtilities.h" 15 16 #define DEBUG_TYPE "bolt-aligner" 17 18 using namespace llvm; 19 20 namespace opts { 21 22 extern cl::OptionCategory BoltOptCategory; 23 24 extern cl::opt<bool> AlignBlocks; 25 extern cl::opt<bool> PreserveBlocksAlignment; 26 27 cl::opt<unsigned> 28 AlignBlocksMinSize("align-blocks-min-size", 29 cl::desc("minimal size of the basic block that should be aligned"), 30 cl::init(0), 31 cl::ZeroOrMore, 32 cl::Hidden, 33 cl::cat(BoltOptCategory)); 34 35 cl::opt<unsigned> 36 AlignBlocksThreshold("align-blocks-threshold", 37 cl::desc("align only blocks with frequency larger than containing function " 38 "execution frequency specified in percent. E.g. 1000 means aligning " 39 "blocks that are 10 times more frequently executed than the " 40 "containing function."), 41 cl::init(800), 42 cl::ZeroOrMore, 43 cl::Hidden, 44 cl::cat(BoltOptCategory)); 45 46 cl::opt<unsigned> 47 AlignFunctions("align-functions", 48 cl::desc("align functions at a given value (relocation mode)"), 49 cl::init(64), 50 cl::ZeroOrMore, 51 cl::cat(BoltOptCategory)); 52 53 cl::opt<unsigned> 54 AlignFunctionsMaxBytes("align-functions-max-bytes", 55 cl::desc("maximum number of bytes to use to align functions"), 56 cl::init(32), 57 cl::ZeroOrMore, 58 cl::cat(BoltOptCategory)); 59 60 cl::opt<unsigned> 61 BlockAlignment("block-alignment", 62 cl::desc("boundary to use for alignment of basic blocks"), 63 cl::init(16), 64 cl::ZeroOrMore, 65 cl::cat(BoltOptCategory)); 66 67 cl::opt<bool> 68 UseCompactAligner("use-compact-aligner", 69 cl::desc("Use compact approach for aligning functions"), 70 cl::init(true), 71 cl::ZeroOrMore, 72 cl::cat(BoltOptCategory)); 73 74 } // end namespace opts 75 76 namespace llvm { 77 namespace bolt { 78 79 namespace { 80 81 // Align function to the specified byte-boundary (typically, 64) offsetting 82 // the fuction by not more than the corresponding value 83 void alignMaxBytes(BinaryFunction &Function) { 84 Function.setAlignment(opts::AlignFunctions); 85 Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes); 86 Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes); 87 } 88 89 // Align function to the specified byte-boundary (typically, 64) offsetting 90 // the fuction by not more than the minimum over 91 // -- the size of the function 92 // -- the specified number of bytes 93 void alignCompact(BinaryFunction &Function, const MCCodeEmitter *Emitter) { 94 const BinaryContext &BC = Function.getBinaryContext(); 95 size_t HotSize = 0; 96 size_t ColdSize = 0; 97 98 for (const BinaryBasicBlock *BB : Function.layout()) 99 if (BB->isCold()) 100 ColdSize += BC.computeCodeSize(BB->begin(), BB->end(), Emitter); 101 else 102 HotSize += BC.computeCodeSize(BB->begin(), BB->end(), Emitter); 103 104 Function.setAlignment(opts::AlignFunctions); 105 if (HotSize > 0) 106 Function.setMaxAlignmentBytes( 107 std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize)); 108 109 // using the same option, max-align-bytes, both for cold and hot parts of the 110 // functions, as aligning cold functions typically does not affect performance 111 if (ColdSize > 0) 112 Function.setMaxColdAlignmentBytes( 113 std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize)); 114 } 115 116 } // end anonymous namespace 117 118 void AlignerPass::alignBlocks(BinaryFunction &Function, 119 const MCCodeEmitter *Emitter) { 120 if (!Function.hasValidProfile() || !Function.isSimple()) 121 return; 122 123 const BinaryContext &BC = Function.getBinaryContext(); 124 125 const uint64_t FuncCount = 126 std::max<uint64_t>(1, Function.getKnownExecutionCount()); 127 BinaryBasicBlock *PrevBB = nullptr; 128 for (BinaryBasicBlock *BB : Function.layout()) { 129 uint64_t Count = BB->getKnownExecutionCount(); 130 131 if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) { 132 PrevBB = BB; 133 continue; 134 } 135 136 uint64_t FTCount = 0; 137 if (PrevBB && PrevBB->getFallthrough() == BB) 138 FTCount = PrevBB->getBranchInfo(*BB).Count; 139 140 PrevBB = BB; 141 142 if (Count < FTCount * 2) 143 continue; 144 145 const uint64_t BlockSize = 146 BC.computeCodeSize(BB->begin(), BB->end(), Emitter); 147 const uint64_t BytesToUse = 148 std::min<uint64_t>(opts::BlockAlignment - 1, BlockSize); 149 150 if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize) 151 continue; 152 153 BB->setAlignment(opts::BlockAlignment); 154 BB->setAlignmentMaxBytes(BytesToUse); 155 156 // Update stats. 157 LLVM_DEBUG( 158 std::unique_lock<std::shared_timed_mutex> Lock(AlignHistogramMtx); 159 AlignHistogram[BytesToUse]++; 160 AlignedBlocksCount += BB->getKnownExecutionCount(); 161 ); 162 } 163 } 164 165 void AlignerPass::runOnFunctions(BinaryContext &BC) { 166 if (!BC.HasRelocations) 167 return; 168 169 AlignHistogram.resize(opts::BlockAlignment); 170 171 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 172 // Create a separate MCCodeEmitter to allow lock free execution 173 BinaryContext::IndependentCodeEmitter Emitter = 174 BC.createIndependentMCCodeEmitter(); 175 176 if (opts::UseCompactAligner) 177 alignCompact(BF, Emitter.MCE.get()); 178 else 179 alignMaxBytes(BF); 180 181 if (opts::AlignBlocks && !opts::PreserveBlocksAlignment) 182 alignBlocks(BF, Emitter.MCE.get()); 183 }; 184 185 ParallelUtilities::runOnEachFunction( 186 BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, 187 ParallelUtilities::PredicateTy(nullptr), "AlignerPass"); 188 189 LLVM_DEBUG( 190 dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n"; 191 for (unsigned I = 1; I < AlignHistogram.size(); ++I) 192 dbgs() << " " << I << " : " << AlignHistogram[I] << '\n'; 193 194 dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: " 195 << AlignedBlocksCount << '\n'; 196 ); 197 } 198 199 } // end namespace bolt 200 } // end namespace llvm 201