1 //===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the AlignerPass class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/Aligner.h" 14 #include "bolt/Core/ParallelUtilities.h" 15 16 #define DEBUG_TYPE "bolt-aligner" 17 18 using namespace llvm; 19 20 namespace opts { 21 22 extern cl::OptionCategory BoltOptCategory; 23 24 extern cl::opt<bool> AlignBlocks; 25 extern cl::opt<bool> PreserveBlocksAlignment; 26 extern cl::opt<unsigned> AlignFunctions; 27 28 cl::opt<unsigned> 29 AlignBlocksMinSize("align-blocks-min-size", 30 cl::desc("minimal size of the basic block that should be aligned"), 31 cl::init(0), 32 cl::ZeroOrMore, 33 cl::Hidden, 34 cl::cat(BoltOptCategory)); 35 36 cl::opt<unsigned> 37 AlignBlocksThreshold("align-blocks-threshold", 38 cl::desc("align only blocks with frequency larger than containing function " 39 "execution frequency specified in percent. E.g. 1000 means aligning " 40 "blocks that are 10 times more frequently executed than the " 41 "containing function."), 42 cl::init(800), 43 cl::ZeroOrMore, 44 cl::Hidden, 45 cl::cat(BoltOptCategory)); 46 47 cl::opt<unsigned> 48 AlignFunctionsMaxBytes("align-functions-max-bytes", 49 cl::desc("maximum number of bytes to use to align functions"), 50 cl::init(32), 51 cl::ZeroOrMore, 52 cl::cat(BoltOptCategory)); 53 54 cl::opt<unsigned> 55 BlockAlignment("block-alignment", 56 cl::desc("boundary to use for alignment of basic blocks"), 57 cl::init(16), 58 cl::ZeroOrMore, 59 cl::cat(BoltOptCategory)); 60 61 cl::opt<bool> 62 UseCompactAligner("use-compact-aligner", 63 cl::desc("Use compact approach for aligning functions"), 64 cl::init(true), 65 cl::ZeroOrMore, 66 cl::cat(BoltOptCategory)); 67 68 } // end namespace opts 69 70 namespace llvm { 71 namespace bolt { 72 73 namespace { 74 75 // Align function to the specified byte-boundary (typically, 64) offsetting 76 // the fuction by not more than the corresponding value 77 void alignMaxBytes(BinaryFunction &Function) { 78 Function.setAlignment(opts::AlignFunctions); 79 Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes); 80 Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes); 81 } 82 83 // Align function to the specified byte-boundary (typically, 64) offsetting 84 // the fuction by not more than the minimum over 85 // -- the size of the function 86 // -- the specified number of bytes 87 void alignCompact(BinaryFunction &Function, const MCCodeEmitter *Emitter) { 88 const BinaryContext &BC = Function.getBinaryContext(); 89 size_t HotSize = 0; 90 size_t ColdSize = 0; 91 92 for (const BinaryBasicBlock *BB : Function.layout()) 93 if (BB->isCold()) 94 ColdSize += BC.computeCodeSize(BB->begin(), BB->end(), Emitter); 95 else 96 HotSize += BC.computeCodeSize(BB->begin(), BB->end(), Emitter); 97 98 Function.setAlignment(opts::AlignFunctions); 99 if (HotSize > 0) 100 Function.setMaxAlignmentBytes( 101 std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize)); 102 103 // using the same option, max-align-bytes, both for cold and hot parts of the 104 // functions, as aligning cold functions typically does not affect performance 105 if (ColdSize > 0) 106 Function.setMaxColdAlignmentBytes( 107 std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize)); 108 } 109 110 } // end anonymous namespace 111 112 void AlignerPass::alignBlocks(BinaryFunction &Function, 113 const MCCodeEmitter *Emitter) { 114 if (!Function.hasValidProfile() || !Function.isSimple()) 115 return; 116 117 const BinaryContext &BC = Function.getBinaryContext(); 118 119 const uint64_t FuncCount = 120 std::max<uint64_t>(1, Function.getKnownExecutionCount()); 121 BinaryBasicBlock *PrevBB = nullptr; 122 for (BinaryBasicBlock *BB : Function.layout()) { 123 uint64_t Count = BB->getKnownExecutionCount(); 124 125 if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) { 126 PrevBB = BB; 127 continue; 128 } 129 130 uint64_t FTCount = 0; 131 if (PrevBB && PrevBB->getFallthrough() == BB) 132 FTCount = PrevBB->getBranchInfo(*BB).Count; 133 134 PrevBB = BB; 135 136 if (Count < FTCount * 2) 137 continue; 138 139 const uint64_t BlockSize = 140 BC.computeCodeSize(BB->begin(), BB->end(), Emitter); 141 const uint64_t BytesToUse = 142 std::min<uint64_t>(opts::BlockAlignment - 1, BlockSize); 143 144 if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize) 145 continue; 146 147 BB->setAlignment(opts::BlockAlignment); 148 BB->setAlignmentMaxBytes(BytesToUse); 149 150 // Update stats. 151 LLVM_DEBUG( 152 std::unique_lock<std::shared_timed_mutex> Lock(AlignHistogramMtx); 153 AlignHistogram[BytesToUse]++; 154 AlignedBlocksCount += BB->getKnownExecutionCount(); 155 ); 156 } 157 } 158 159 void AlignerPass::runOnFunctions(BinaryContext &BC) { 160 if (!BC.HasRelocations) 161 return; 162 163 AlignHistogram.resize(opts::BlockAlignment); 164 165 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 166 // Create a separate MCCodeEmitter to allow lock free execution 167 BinaryContext::IndependentCodeEmitter Emitter = 168 BC.createIndependentMCCodeEmitter(); 169 170 if (opts::UseCompactAligner) 171 alignCompact(BF, Emitter.MCE.get()); 172 else 173 alignMaxBytes(BF); 174 175 if (opts::AlignBlocks && !opts::PreserveBlocksAlignment) 176 alignBlocks(BF, Emitter.MCE.get()); 177 }; 178 179 ParallelUtilities::runOnEachFunction( 180 BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, 181 ParallelUtilities::PredicateTy(nullptr), "AlignerPass"); 182 183 LLVM_DEBUG( 184 dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n"; 185 for (unsigned I = 1; I < AlignHistogram.size(); ++I) 186 dbgs() << " " << I << " : " << AlignHistogram[I] << '\n'; 187 188 dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: " 189 << AlignedBlocksCount << '\n'; 190 ); 191 } 192 193 } // end namespace bolt 194 } // end namespace llvm 195