1 //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SplitFunctions pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/SplitFunctions.h" 14 #include "bolt/Core/BinaryFunction.h" 15 #include "bolt/Core/ParallelUtilities.h" 16 #include "llvm/Support/CommandLine.h" 17 #include "llvm/Support/FormatVariadic.h" 18 19 #include <vector> 20 21 #define DEBUG_TYPE "bolt-opts" 22 23 using namespace llvm; 24 using namespace bolt; 25 26 namespace { 27 class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> { 28 public: 29 explicit DeprecatedSplitFunctionOptionParser(cl::Option &O) 30 : cl::parser<bool>(O) {} 31 32 bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) { 33 if (Arg == "2" || Arg == "3") { 34 Value = true; 35 errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" " 36 "for option -{1} is deprecated\n", 37 Arg, ArgName); 38 return false; 39 } 40 return cl::parser<bool>::parse(O, ArgName, Arg, Value); 41 } 42 }; 43 } // namespace 44 45 namespace opts { 46 47 extern cl::OptionCategory BoltOptCategory; 48 49 extern cl::opt<bool> SplitEH; 50 extern cl::opt<unsigned> ExecutionCountThreshold; 51 52 static cl::opt<bool> AggressiveSplitting( 53 "split-all-cold", cl::desc("outline as many cold basic blocks as possible"), 54 cl::cat(BoltOptCategory)); 55 56 static cl::opt<unsigned> SplitAlignThreshold( 57 "split-align-threshold", 58 cl::desc("when deciding to split a function, apply this alignment " 59 "while doing the size comparison (see -split-threshold). " 60 "Default value: 2."), 61 cl::init(2), 62 63 cl::Hidden, cl::cat(BoltOptCategory)); 64 65 static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser> 66 SplitFunctions("split-functions", 67 cl::desc("split functions into hot and cold regions"), 68 cl::cat(BoltOptCategory)); 69 70 static cl::opt<unsigned> SplitThreshold( 71 "split-threshold", 72 cl::desc("split function only if its main size is reduced by more than " 73 "given amount of bytes. Default value: 0, i.e. split iff the " 74 "size is reduced. Note that on some architectures the size can " 75 "increase after splitting."), 76 cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); 77 78 } // namespace opts 79 80 namespace llvm { 81 namespace bolt { 82 83 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const { 84 // Apply execution count threshold 85 if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold) 86 return false; 87 88 return BinaryFunctionPass::shouldOptimize(BF); 89 } 90 91 void SplitFunctions::runOnFunctions(BinaryContext &BC) { 92 if (!opts::SplitFunctions) 93 return; 94 95 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 96 splitFunction(BF); 97 }; 98 99 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 100 return !shouldOptimize(BF); 101 }; 102 103 ParallelUtilities::runOnEachFunction( 104 BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, WorkFun, SkipFunc, 105 "SplitFunctions"); 106 107 if (SplitBytesHot + SplitBytesCold > 0) 108 outs() << "BOLT-INFO: splitting separates " << SplitBytesHot 109 << " hot bytes from " << SplitBytesCold << " cold bytes " 110 << format("(%.2lf%% of split functions is hot).\n", 111 100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold)); 112 } 113 114 void SplitFunctions::splitFunction(BinaryFunction &BF) { 115 if (!BF.size()) 116 return; 117 118 if (!BF.hasValidProfile()) 119 return; 120 121 bool AllCold = true; 122 for (BinaryBasicBlock *BB : BF.layout()) { 123 const uint64_t ExecCount = BB->getExecutionCount(); 124 if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) 125 return; 126 if (ExecCount != 0) 127 AllCold = false; 128 } 129 130 if (AllCold) 131 return; 132 133 BinaryFunction::BasicBlockOrderType PreSplitLayout = BF.getLayout(); 134 135 BinaryContext &BC = BF.getBinaryContext(); 136 size_t OriginalHotSize; 137 size_t HotSize; 138 size_t ColdSize; 139 if (BC.isX86()) { 140 std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF); 141 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 142 << " pre-split is <0x" 143 << Twine::utohexstr(OriginalHotSize) << ", 0x" 144 << Twine::utohexstr(ColdSize) << ">\n"); 145 } 146 147 // Never outline the first basic block. 148 BF.layout_front()->setCanOutline(false); 149 for (BinaryBasicBlock *BB : BF.layout()) { 150 if (!BB->canOutline()) 151 continue; 152 if (BB->getExecutionCount() != 0) { 153 BB->setCanOutline(false); 154 continue; 155 } 156 // Do not split extra entry points in aarch64. They can be referred by 157 // using ADRs and when this happens, these blocks cannot be placed far 158 // away due to the limited range in ADR instruction. 159 if (BC.isAArch64() && BB->isEntryPoint()) { 160 BB->setCanOutline(false); 161 continue; 162 } 163 164 if (BF.hasEHRanges() && !opts::SplitEH) { 165 // We cannot move landing pads (or rather entry points for landing pads). 166 if (BB->isLandingPad()) { 167 BB->setCanOutline(false); 168 continue; 169 } 170 // We cannot move a block that can throw since exception-handling 171 // runtime cannot deal with split functions. However, if we can guarantee 172 // that the block never throws, it is safe to move the block to 173 // decrease the size of the function. 174 for (MCInst &Instr : *BB) { 175 if (BC.MIB->isInvoke(Instr)) { 176 BB->setCanOutline(false); 177 break; 178 } 179 } 180 } 181 } 182 183 if (opts::AggressiveSplitting) { 184 // All blocks with 0 count that we can move go to the end of the function. 185 // Even if they were natural to cluster formation and were seen in-between 186 // hot basic blocks. 187 llvm::stable_sort(BF.layout(), 188 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 189 return A->canOutline() < B->canOutline(); 190 }); 191 } else if (BF.hasEHRanges() && !opts::SplitEH) { 192 // Typically functions with exception handling have landing pads at the end. 193 // We cannot move beginning of landing pads, but we can move 0-count blocks 194 // comprising landing pads to the end and thus facilitate splitting. 195 auto FirstLP = BF.layout_begin(); 196 while ((*FirstLP)->isLandingPad()) 197 ++FirstLP; 198 199 std::stable_sort(FirstLP, BF.layout_end(), 200 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 201 return A->canOutline() < B->canOutline(); 202 }); 203 } 204 205 // Separate hot from cold starting from the bottom. 206 for (auto I = BF.layout_rbegin(), E = BF.layout_rend(); I != E; ++I) { 207 BinaryBasicBlock *BB = *I; 208 if (!BB->canOutline()) 209 break; 210 BB->setIsCold(true); 211 } 212 213 // For shared objects, place invoke instructions and corresponding landing 214 // pads in the same fragment. To reduce hot code size, create trampoline 215 // landing pads that will redirect the execution to the real LP. 216 if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) 217 createEHTrampolines(BF); 218 219 // Check the new size to see if it's worth splitting the function. 220 if (BC.isX86() && BF.isSplit()) { 221 std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF); 222 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 223 << " post-split is <0x" << Twine::utohexstr(HotSize) 224 << ", 0x" << Twine::utohexstr(ColdSize) << ">\n"); 225 if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <= 226 alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) { 227 LLVM_DEBUG(dbgs() << "Reversing splitting of function " << BF << ":\n 0x" 228 << Twine::utohexstr(HotSize) << ", 0x" 229 << Twine::utohexstr(ColdSize) << " -> 0x" 230 << Twine::utohexstr(OriginalHotSize) << '\n'); 231 232 BF.updateBasicBlockLayout(PreSplitLayout); 233 for (BinaryBasicBlock &BB : BF) 234 BB.setIsCold(false); 235 } else { 236 SplitBytesHot += HotSize; 237 SplitBytesCold += ColdSize; 238 } 239 } 240 } 241 242 void SplitFunctions::createEHTrampolines(BinaryFunction &BF) const { 243 const auto &MIB = BF.getBinaryContext().MIB; 244 245 // Map real landing pads to the corresponding trampolines. 246 std::unordered_map<const MCSymbol *, const MCSymbol *> LPTrampolines; 247 248 // Iterate over the copy of basic blocks since we are adding new blocks to the 249 // function which will invalidate its iterators. 250 std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend()); 251 for (BinaryBasicBlock *BB : Blocks) { 252 for (MCInst &Instr : *BB) { 253 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instr); 254 if (!EHInfo || !EHInfo->first) 255 continue; 256 257 const MCSymbol *LPLabel = EHInfo->first; 258 BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel); 259 if (BB->isCold() == LPBlock->isCold()) 260 continue; 261 262 const MCSymbol *TrampolineLabel = nullptr; 263 auto Iter = LPTrampolines.find(LPLabel); 264 if (Iter != LPTrampolines.end()) { 265 TrampolineLabel = Iter->second; 266 } else { 267 // Create a trampoline basic block in the same fragment as the thrower. 268 // Note: there's no need to insert the jump instruction, it will be 269 // added by fixBranches(). 270 BinaryBasicBlock *TrampolineBB = BF.addBasicBlock(); 271 TrampolineBB->setIsCold(BB->isCold()); 272 TrampolineBB->setExecutionCount(LPBlock->getExecutionCount()); 273 TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount()); 274 TrampolineBB->setCFIState(LPBlock->getCFIState()); 275 TrampolineLabel = TrampolineBB->getLabel(); 276 LPTrampolines.emplace(std::make_pair(LPLabel, TrampolineLabel)); 277 } 278 279 // Substitute the landing pad with the trampoline. 280 MIB->updateEHInfo(Instr, 281 MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second)); 282 } 283 } 284 285 if (LPTrampolines.empty()) 286 return; 287 288 // All trampoline blocks were added to the end of the function. Place them at 289 // the end of corresponding fragments. 290 std::stable_sort(BF.layout_begin(), BF.layout_end(), 291 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 292 return A->isCold() < B->isCold(); 293 }); 294 295 // Conservatively introduce branch instructions. 296 BF.fixBranches(); 297 298 // Update exception-handling CFG for the function. 299 BF.recomputeLandingPads(); 300 } 301 302 } // namespace bolt 303 } // namespace llvm 304