1 //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SplitFunctions pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/SplitFunctions.h" 14 #include "bolt/Core/BinaryFunction.h" 15 #include "bolt/Core/ParallelUtilities.h" 16 #include "llvm/Support/CommandLine.h" 17 #include "llvm/Support/FormatVariadic.h" 18 #include <algorithm> 19 #include <random> 20 #include <vector> 21 22 #define DEBUG_TYPE "bolt-opts" 23 24 using namespace llvm; 25 using namespace bolt; 26 27 namespace { 28 class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> { 29 public: 30 explicit DeprecatedSplitFunctionOptionParser(cl::Option &O) 31 : cl::parser<bool>(O) {} 32 33 bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) { 34 if (Arg == "2" || Arg == "3") { 35 Value = true; 36 errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" " 37 "for option -{1} is deprecated\n", 38 Arg, ArgName); 39 return false; 40 } 41 return cl::parser<bool>::parse(O, ArgName, Arg, Value); 42 } 43 }; 44 } // namespace 45 46 namespace opts { 47 48 extern cl::OptionCategory BoltOptCategory; 49 50 extern cl::opt<bool> SplitEH; 51 extern cl::opt<unsigned> ExecutionCountThreshold; 52 extern cl::opt<uint32_t> RandomSeed; 53 54 static cl::opt<bool> AggressiveSplitting( 55 "split-all-cold", cl::desc("outline as many cold basic blocks as possible"), 56 cl::cat(BoltOptCategory)); 57 58 static cl::opt<unsigned> SplitAlignThreshold( 59 "split-align-threshold", 60 cl::desc("when deciding to split a function, apply this alignment " 61 "while doing the size comparison (see -split-threshold). " 62 "Default value: 2."), 63 cl::init(2), 64 65 cl::Hidden, cl::cat(BoltOptCategory)); 66 67 static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser> 68 SplitFunctions("split-functions", 69 cl::desc("split functions into hot and cold regions"), 70 cl::cat(BoltOptCategory)); 71 72 static cl::opt<unsigned> SplitThreshold( 73 "split-threshold", 74 cl::desc("split function only if its main size is reduced by more than " 75 "given amount of bytes. Default value: 0, i.e. split iff the " 76 "size is reduced. Note that on some architectures the size can " 77 "increase after splitting."), 78 cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); 79 80 static cl::opt<bool> 81 RandomSplit("split-random", 82 cl::desc("split functions randomly into hot/cold regions"), 83 cl::Hidden); 84 } // namespace opts 85 86 namespace { 87 struct SplitCold { 88 bool canSplit(const BinaryFunction &BF) { 89 if (!BF.hasValidProfile()) 90 return false; 91 92 bool AllCold = true; 93 for (const BinaryBasicBlock &BB : BF) { 94 const uint64_t ExecCount = BB.getExecutionCount(); 95 if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) 96 return false; 97 if (ExecCount != 0) 98 AllCold = false; 99 } 100 101 return !AllCold; 102 } 103 104 bool canOutline(const BinaryBasicBlock &BB) { 105 return BB.getExecutionCount() == 0; 106 } 107 108 template <typename It> void partition(const It Start, const It End) const { 109 for (auto I = Start; I != End; ++I) { 110 BinaryBasicBlock *BB = *I; 111 if (!BB->canOutline()) 112 break; 113 BB->setIsCold(true); 114 } 115 } 116 }; 117 118 struct SplitRandom { 119 std::minstd_rand0 *Gen; 120 121 explicit SplitRandom(std::minstd_rand0 &Gen) : Gen(&Gen) {} 122 123 bool canSplit(const BinaryFunction &BF) { return true; } 124 bool canOutline(const BinaryBasicBlock &BB) { return true; } 125 126 template <typename It> void partition(It Start, It End) const { 127 using DiffT = typename It::difference_type; 128 129 const It OutlineableBegin = Start; 130 const It OutlineableEnd = 131 std::find_if(OutlineableBegin, End, [](const BinaryBasicBlock *BB) { 132 return !BB->canOutline(); 133 }); 134 const DiffT NumOutlineableBlocks = OutlineableEnd - OutlineableBegin; 135 136 // We want to split at least one block unless there are not blocks that can 137 // be outlined 138 const auto MinimumSplit = std::min<DiffT>(NumOutlineableBlocks, 1); 139 std::uniform_int_distribution<DiffT> Dist(MinimumSplit, 140 NumOutlineableBlocks); 141 const DiffT NumColdBlocks = Dist(*Gen); 142 const It ColdEnd = OutlineableBegin + NumColdBlocks; 143 144 LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of " 145 "{1} possible) blocks to split\n", 146 ColdEnd - OutlineableBegin, 147 OutlineableEnd - OutlineableBegin)); 148 149 std::for_each(OutlineableBegin, ColdEnd, 150 [](BinaryBasicBlock *BB) { BB->setIsCold(true); }); 151 } 152 }; 153 } // namespace 154 155 namespace llvm { 156 namespace bolt { 157 158 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const { 159 // Apply execution count threshold 160 if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold) 161 return false; 162 163 return BinaryFunctionPass::shouldOptimize(BF); 164 } 165 166 void SplitFunctions::runOnFunctions(BinaryContext &BC) { 167 if (!opts::SplitFunctions) 168 return; 169 170 ParallelUtilities::WorkFuncTy WorkFun; 171 std::minstd_rand0 RandGen(opts::RandomSeed.getValue()); 172 if (opts::RandomSplit) 173 WorkFun = [&](BinaryFunction &BF) { 174 splitFunction(BF, SplitRandom(RandGen)); 175 }; 176 else 177 WorkFun = [&](BinaryFunction &BF) { splitFunction<SplitCold>(BF); }; 178 179 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 180 return !shouldOptimize(BF); 181 }; 182 183 // If we split functions randomly, we need to ensure that across runs with the 184 // same input, we generate random numbers for each function in the same order. 185 const bool ForceSequential = opts::RandomSplit; 186 187 ParallelUtilities::runOnEachFunction( 188 BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, WorkFun, SkipFunc, 189 "SplitFunctions", ForceSequential); 190 191 if (SplitBytesHot + SplitBytesCold > 0) 192 outs() << "BOLT-INFO: splitting separates " << SplitBytesHot 193 << " hot bytes from " << SplitBytesCold << " cold bytes " 194 << format("(%.2lf%% of split functions is hot).\n", 195 100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold)); 196 } 197 198 template <typename SplitStrategy> 199 void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy Strategy) { 200 if (BF.empty()) 201 return; 202 203 if (!Strategy.canSplit(BF)) 204 return; 205 206 FunctionLayout &Layout = BF.getLayout(); 207 BinaryFunction::BasicBlockOrderType PreSplitLayout(Layout.block_begin(), 208 Layout.block_end()); 209 210 BinaryContext &BC = BF.getBinaryContext(); 211 size_t OriginalHotSize; 212 size_t HotSize; 213 size_t ColdSize; 214 if (BC.isX86()) { 215 std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF); 216 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 217 << " pre-split is <0x" 218 << Twine::utohexstr(OriginalHotSize) << ", 0x" 219 << Twine::utohexstr(ColdSize) << ">\n"); 220 } 221 222 BinaryFunction::BasicBlockOrderType NewLayout(Layout.block_begin(), 223 Layout.block_end()); 224 // Never outline the first basic block. 225 NewLayout.front()->setCanOutline(false); 226 for (BinaryBasicBlock *const BB : NewLayout) { 227 if (!BB->canOutline()) 228 continue; 229 if (!Strategy.canOutline(*BB)) { 230 BB->setCanOutline(false); 231 continue; 232 } 233 // Do not split extra entry points in aarch64. They can be referred by 234 // using ADRs and when this happens, these blocks cannot be placed far 235 // away due to the limited range in ADR instruction. 236 if (BC.isAArch64() && BB->isEntryPoint()) { 237 BB->setCanOutline(false); 238 continue; 239 } 240 241 if (BF.hasEHRanges() && !opts::SplitEH) { 242 // We cannot move landing pads (or rather entry points for landing pads). 243 if (BB->isLandingPad()) { 244 BB->setCanOutline(false); 245 continue; 246 } 247 // We cannot move a block that can throw since exception-handling 248 // runtime cannot deal with split functions. However, if we can guarantee 249 // that the block never throws, it is safe to move the block to 250 // decrease the size of the function. 251 for (MCInst &Instr : *BB) { 252 if (BC.MIB->isInvoke(Instr)) { 253 BB->setCanOutline(false); 254 break; 255 } 256 } 257 } 258 } 259 260 if (opts::AggressiveSplitting) { 261 // All blocks with 0 count that we can move go to the end of the function. 262 // Even if they were natural to cluster formation and were seen in-between 263 // hot basic blocks. 264 stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 265 return A->canOutline() < B->canOutline(); 266 }); 267 } else if (BF.hasEHRanges() && !opts::SplitEH) { 268 // Typically functions with exception handling have landing pads at the end. 269 // We cannot move beginning of landing pads, but we can move 0-count blocks 270 // comprising landing pads to the end and thus facilitate splitting. 271 auto FirstLP = NewLayout.begin(); 272 while ((*FirstLP)->isLandingPad()) 273 ++FirstLP; 274 275 std::stable_sort(FirstLP, NewLayout.end(), 276 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 277 return A->canOutline() < B->canOutline(); 278 }); 279 } 280 281 // Separate hot from cold starting from the bottom. 282 Strategy.partition(NewLayout.rbegin(), NewLayout.rend()); 283 BF.getLayout().update(NewLayout); 284 285 // For shared objects, invoke instructions and corresponding landing pads 286 // have to be placed in the same fragment. When we split them, create 287 // trampoline landing pads that will redirect the execution to real LPs. 288 TrampolineSetType Trampolines; 289 if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) 290 Trampolines = createEHTrampolines(BF); 291 292 // Check the new size to see if it's worth splitting the function. 293 if (BC.isX86() && BF.isSplit()) { 294 std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF); 295 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 296 << " post-split is <0x" << Twine::utohexstr(HotSize) 297 << ", 0x" << Twine::utohexstr(ColdSize) << ">\n"); 298 if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <= 299 alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) { 300 LLVM_DEBUG(dbgs() << "Reversing splitting of function " << BF << ":\n 0x" 301 << Twine::utohexstr(HotSize) << ", 0x" 302 << Twine::utohexstr(ColdSize) << " -> 0x" 303 << Twine::utohexstr(OriginalHotSize) << '\n'); 304 305 // Reverse the action of createEHTrampolines(). The trampolines will be 306 // placed immediately before the matching destination resulting in no 307 // extra code. 308 if (PreSplitLayout.size() != BF.size()) 309 PreSplitLayout = mergeEHTrampolines(BF, PreSplitLayout, Trampolines); 310 311 for (BinaryBasicBlock &BB : BF) 312 BB.setIsCold(false); 313 BF.getLayout().update(PreSplitLayout); 314 } else { 315 SplitBytesHot += HotSize; 316 SplitBytesCold += ColdSize; 317 } 318 } 319 } 320 321 SplitFunctions::TrampolineSetType 322 SplitFunctions::createEHTrampolines(BinaryFunction &BF) const { 323 const auto &MIB = BF.getBinaryContext().MIB; 324 325 // Map real landing pads to the corresponding trampolines. 326 TrampolineSetType LPTrampolines; 327 328 // Iterate over the copy of basic blocks since we are adding new blocks to the 329 // function which will invalidate its iterators. 330 std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend()); 331 for (BinaryBasicBlock *BB : Blocks) { 332 for (MCInst &Instr : *BB) { 333 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instr); 334 if (!EHInfo || !EHInfo->first) 335 continue; 336 337 const MCSymbol *LPLabel = EHInfo->first; 338 BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel); 339 if (BB->isCold() == LPBlock->isCold()) 340 continue; 341 342 const MCSymbol *TrampolineLabel = nullptr; 343 auto Iter = LPTrampolines.find(LPLabel); 344 if (Iter != LPTrampolines.end()) { 345 TrampolineLabel = Iter->second; 346 } else { 347 // Create a trampoline basic block in the same fragment as the thrower. 348 // Note: there's no need to insert the jump instruction, it will be 349 // added by fixBranches(). 350 BinaryBasicBlock *TrampolineBB = BF.addBasicBlock(); 351 TrampolineBB->setIsCold(BB->isCold()); 352 TrampolineBB->setExecutionCount(LPBlock->getExecutionCount()); 353 TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount()); 354 TrampolineBB->setCFIState(LPBlock->getCFIState()); 355 TrampolineLabel = TrampolineBB->getLabel(); 356 LPTrampolines.insert(std::make_pair(LPLabel, TrampolineLabel)); 357 } 358 359 // Substitute the landing pad with the trampoline. 360 MIB->updateEHInfo(Instr, 361 MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second)); 362 } 363 } 364 365 if (LPTrampolines.empty()) 366 return LPTrampolines; 367 368 // All trampoline blocks were added to the end of the function. Place them at 369 // the end of corresponding fragments. 370 BinaryFunction::BasicBlockOrderType NewLayout(BF.getLayout().block_begin(), 371 BF.getLayout().block_end()); 372 stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 373 return A->isCold() < B->isCold(); 374 }); 375 BF.getLayout().update(NewLayout); 376 377 // Conservatively introduce branch instructions. 378 BF.fixBranches(); 379 380 // Update exception-handling CFG for the function. 381 BF.recomputeLandingPads(); 382 383 return LPTrampolines; 384 } 385 386 SplitFunctions::BasicBlockOrderType SplitFunctions::mergeEHTrampolines( 387 BinaryFunction &BF, SplitFunctions::BasicBlockOrderType &Layout, 388 const SplitFunctions::TrampolineSetType &Trampolines) const { 389 BasicBlockOrderType MergedLayout; 390 for (BinaryBasicBlock *BB : Layout) { 391 auto Iter = Trampolines.find(BB->getLabel()); 392 if (Iter != Trampolines.end()) { 393 BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Iter->second); 394 assert(LPBlock && "Could not find matching landing pad block."); 395 MergedLayout.push_back(LPBlock); 396 } 397 MergedLayout.push_back(BB); 398 } 399 400 return MergedLayout; 401 } 402 403 } // namespace bolt 404 } // namespace llvm 405