1 //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SplitFunctions pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/SplitFunctions.h" 14 #include "bolt/Core/BinaryFunction.h" 15 #include "bolt/Core/ParallelUtilities.h" 16 #include "llvm/Support/CommandLine.h" 17 18 #include <vector> 19 20 #define DEBUG_TYPE "bolt-opts" 21 22 using namespace llvm; 23 using namespace bolt; 24 25 namespace opts { 26 27 extern cl::OptionCategory BoltOptCategory; 28 29 extern cl::opt<bool> SplitEH; 30 extern cl::opt<unsigned> ExecutionCountThreshold; 31 32 static cl::opt<bool> AggressiveSplitting( 33 "split-all-cold", cl::desc("outline as many cold basic blocks as possible"), 34 cl::cat(BoltOptCategory)); 35 36 static cl::opt<unsigned> SplitAlignThreshold( 37 "split-align-threshold", 38 cl::desc("when deciding to split a function, apply this alignment " 39 "while doing the size comparison (see -split-threshold). " 40 "Default value: 2."), 41 cl::init(2), 42 43 cl::Hidden, cl::cat(BoltOptCategory)); 44 45 static cl::opt<SplitFunctions::SplittingType> 46 SplitFunctions("split-functions", 47 cl::desc("split functions into hot and cold regions"), 48 cl::init(SplitFunctions::ST_NONE), 49 cl::values(clEnumValN(SplitFunctions::ST_NONE, "0", 50 "do not split any function"), 51 clEnumValN(SplitFunctions::ST_LARGE, "1", 52 "in non-relocation mode only split functions too large " 53 "to fit into original code space"), 54 clEnumValN(SplitFunctions::ST_LARGE, "2", 55 "same as 1 (backwards compatibility)"), 56 clEnumValN(SplitFunctions::ST_ALL, "3", 57 "split all functions")), 58 cl::ZeroOrMore, 59 cl::cat(BoltOptCategory)); 60 61 static cl::opt<unsigned> SplitThreshold( 62 "split-threshold", 63 cl::desc("split function only if its main size is reduced by more than " 64 "given amount of bytes. Default value: 0, i.e. split iff the " 65 "size is reduced. Note that on some architectures the size can " 66 "increase after splitting."), 67 cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); 68 69 void syncOptions(BinaryContext &BC) { 70 if (!BC.HasRelocations && opts::SplitFunctions == SplitFunctions::ST_LARGE) 71 opts::SplitFunctions = SplitFunctions::ST_ALL; 72 } 73 74 } // namespace opts 75 76 namespace llvm { 77 namespace bolt { 78 79 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const { 80 // Apply execution count threshold 81 if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold) 82 return false; 83 84 return BinaryFunctionPass::shouldOptimize(BF); 85 } 86 87 void SplitFunctions::runOnFunctions(BinaryContext &BC) { 88 opts::syncOptions(BC); 89 90 if (opts::SplitFunctions == SplitFunctions::ST_NONE) 91 return; 92 93 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 94 splitFunction(BF); 95 }; 96 97 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 98 return !shouldOptimize(BF); 99 }; 100 101 ParallelUtilities::runOnEachFunction( 102 BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, WorkFun, SkipFunc, 103 "SplitFunctions"); 104 105 if (SplitBytesHot + SplitBytesCold > 0) 106 outs() << "BOLT-INFO: splitting separates " << SplitBytesHot 107 << " hot bytes from " << SplitBytesCold << " cold bytes " 108 << format("(%.2lf%% of split functions is hot).\n", 109 100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold)); 110 } 111 112 void SplitFunctions::splitFunction(BinaryFunction &BF) { 113 if (!BF.size()) 114 return; 115 116 if (!BF.hasValidProfile()) 117 return; 118 119 bool AllCold = true; 120 for (BinaryBasicBlock *BB : BF.layout()) { 121 const uint64_t ExecCount = BB->getExecutionCount(); 122 if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) 123 return; 124 if (ExecCount != 0) 125 AllCold = false; 126 } 127 128 if (AllCold) 129 return; 130 131 BinaryFunction::BasicBlockOrderType PreSplitLayout = BF.getLayout(); 132 133 BinaryContext &BC = BF.getBinaryContext(); 134 size_t OriginalHotSize; 135 size_t HotSize; 136 size_t ColdSize; 137 if (BC.isX86()) { 138 std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF); 139 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 140 << " pre-split is <0x" 141 << Twine::utohexstr(OriginalHotSize) << ", 0x" 142 << Twine::utohexstr(ColdSize) << ">\n"); 143 if (opts::SplitFunctions == SplitFunctions::ST_LARGE && 144 !BC.HasRelocations) { 145 // Split only if the function wouldn't fit. 146 if (OriginalHotSize <= BF.getMaxSize()) 147 return; 148 } 149 } 150 151 // Never outline the first basic block. 152 BF.layout_front()->setCanOutline(false); 153 for (BinaryBasicBlock *BB : BF.layout()) { 154 if (!BB->canOutline()) 155 continue; 156 if (BB->getExecutionCount() != 0) { 157 BB->setCanOutline(false); 158 continue; 159 } 160 // Do not split extra entry points in aarch64. They can be referred by 161 // using ADRs and when this happens, these blocks cannot be placed far 162 // away due to the limited range in ADR instruction. 163 if (BC.isAArch64() && BB->isEntryPoint()) { 164 BB->setCanOutline(false); 165 continue; 166 } 167 168 if (BF.hasEHRanges() && !opts::SplitEH) { 169 // We cannot move landing pads (or rather entry points for landing pads). 170 if (BB->isLandingPad()) { 171 BB->setCanOutline(false); 172 continue; 173 } 174 // We cannot move a block that can throw since exception-handling 175 // runtime cannot deal with split functions. However, if we can guarantee 176 // that the block never throws, it is safe to move the block to 177 // decrease the size of the function. 178 for (MCInst &Instr : *BB) { 179 if (BC.MIB->isInvoke(Instr)) { 180 BB->setCanOutline(false); 181 break; 182 } 183 } 184 } 185 } 186 187 if (opts::AggressiveSplitting) { 188 // All blocks with 0 count that we can move go to the end of the function. 189 // Even if they were natural to cluster formation and were seen in-between 190 // hot basic blocks. 191 std::stable_sort(BF.layout_begin(), BF.layout_end(), 192 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 193 return A->canOutline() < B->canOutline(); 194 }); 195 } else if (BF.hasEHRanges() && !opts::SplitEH) { 196 // Typically functions with exception handling have landing pads at the end. 197 // We cannot move beginning of landing pads, but we can move 0-count blocks 198 // comprising landing pads to the end and thus facilitate splitting. 199 auto FirstLP = BF.layout_begin(); 200 while ((*FirstLP)->isLandingPad()) 201 ++FirstLP; 202 203 std::stable_sort(FirstLP, BF.layout_end(), 204 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 205 return A->canOutline() < B->canOutline(); 206 }); 207 } 208 209 // Separate hot from cold starting from the bottom. 210 for (auto I = BF.layout_rbegin(), E = BF.layout_rend(); I != E; ++I) { 211 BinaryBasicBlock *BB = *I; 212 if (!BB->canOutline()) 213 break; 214 BB->setIsCold(true); 215 } 216 217 // For shared objects, place invoke instructions and corresponding landing 218 // pads in the same fragment. To reduce hot code size, create trampoline 219 // landing pads that will redirect the execution to the real LP. 220 if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) 221 createEHTrampolines(BF); 222 223 // Check the new size to see if it's worth splitting the function. 224 if (BC.isX86() && BF.isSplit()) { 225 std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF); 226 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 227 << " post-split is <0x" << Twine::utohexstr(HotSize) 228 << ", 0x" << Twine::utohexstr(ColdSize) << ">\n"); 229 if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <= 230 alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) { 231 LLVM_DEBUG(dbgs() << "Reversing splitting of function " << BF << ":\n 0x" 232 << Twine::utohexstr(HotSize) << ", 0x" 233 << Twine::utohexstr(ColdSize) << " -> 0x" 234 << Twine::utohexstr(OriginalHotSize) << '\n'); 235 236 BF.updateBasicBlockLayout(PreSplitLayout); 237 for (BinaryBasicBlock &BB : BF) 238 BB.setIsCold(false); 239 } else { 240 SplitBytesHot += HotSize; 241 SplitBytesCold += ColdSize; 242 } 243 } 244 } 245 246 void SplitFunctions::createEHTrampolines(BinaryFunction &BF) const { 247 const auto &MIB = BF.getBinaryContext().MIB; 248 249 // Map real landing pads to the corresponding trampolines. 250 std::unordered_map<const MCSymbol *, const MCSymbol *> LPTrampolines; 251 252 // Iterate over the copy of basic blocks since we are adding new blocks to the 253 // function which will invalidate its iterators. 254 std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend()); 255 for (BinaryBasicBlock *BB : Blocks) { 256 for (MCInst &Instr : *BB) { 257 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instr); 258 if (!EHInfo || !EHInfo->first) 259 continue; 260 261 const MCSymbol *LPLabel = EHInfo->first; 262 BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel); 263 if (BB->isCold() == LPBlock->isCold()) 264 continue; 265 266 const MCSymbol *TrampolineLabel = nullptr; 267 auto Iter = LPTrampolines.find(LPLabel); 268 if (Iter != LPTrampolines.end()) { 269 TrampolineLabel = Iter->second; 270 } else { 271 // Create a trampoline basic block in the same fragment as the thrower. 272 // Note: there's no need to insert the jump instruction, it will be 273 // added by fixBranches(). 274 BinaryBasicBlock *TrampolineBB = BF.addBasicBlock(); 275 TrampolineBB->setIsCold(BB->isCold()); 276 TrampolineBB->setExecutionCount(LPBlock->getExecutionCount()); 277 TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount()); 278 TrampolineBB->setCFIState(LPBlock->getCFIState()); 279 TrampolineLabel = TrampolineBB->getLabel(); 280 LPTrampolines.emplace(std::make_pair(LPLabel, TrampolineLabel)); 281 } 282 283 // Substitute the landing pad with the trampoline. 284 MIB->updateEHInfo(Instr, 285 MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second)); 286 } 287 } 288 289 if (LPTrampolines.empty()) 290 return; 291 292 // All trampoline blocks were added to the end of the function. Place them at 293 // the end of corresponding fragments. 294 std::stable_sort(BF.layout_begin(), BF.layout_end(), 295 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 296 return A->isCold() < B->isCold(); 297 }); 298 299 // Conservatively introduce branch instructions. 300 BF.fixBranches(); 301 302 // Update exception-handling CFG for the function. 303 BF.recomputeLandingPads(); 304 } 305 306 } // namespace bolt 307 } // namespace llvm 308