1 //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SplitFunctions pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/SplitFunctions.h" 14 #include "bolt/Core/BinaryFunction.h" 15 #include "bolt/Core/ParallelUtilities.h" 16 #include "llvm/Support/CommandLine.h" 17 18 #include <vector> 19 20 #define DEBUG_TYPE "bolt-opts" 21 22 using namespace llvm; 23 using namespace bolt; 24 25 namespace opts { 26 27 extern cl::OptionCategory BoltOptCategory; 28 29 extern cl::opt<bool> SplitEH; 30 extern cl::opt<unsigned> ExecutionCountThreshold; 31 32 static cl::opt<bool> 33 AggressiveSplitting("split-all-cold", 34 cl::desc("outline as many cold basic blocks as possible"), 35 cl::ZeroOrMore, 36 cl::cat(BoltOptCategory)); 37 38 static cl::opt<unsigned> 39 SplitAlignThreshold("split-align-threshold", 40 cl::desc("when deciding to split a function, apply this alignment " 41 "while doing the size comparison (see -split-threshold). " 42 "Default value: 2."), 43 cl::init(2), 44 cl::ZeroOrMore, 45 cl::Hidden, 46 cl::cat(BoltOptCategory)); 47 48 static cl::opt<SplitFunctions::SplittingType> 49 SplitFunctions("split-functions", 50 cl::desc("split functions into hot and cold regions"), 51 cl::init(SplitFunctions::ST_NONE), 52 cl::values(clEnumValN(SplitFunctions::ST_NONE, "0", 53 "do not split any function"), 54 clEnumValN(SplitFunctions::ST_LARGE, "1", 55 "in non-relocation mode only split functions too large " 56 "to fit into original code space"), 57 clEnumValN(SplitFunctions::ST_LARGE, "2", 58 "same as 1 (backwards compatibility)"), 59 clEnumValN(SplitFunctions::ST_ALL, "3", 60 "split all functions")), 61 cl::ZeroOrMore, 62 cl::cat(BoltOptCategory)); 63 64 static cl::opt<unsigned> 65 SplitThreshold("split-threshold", 66 cl::desc("split function only if its main size is reduced by more than " 67 "given amount of bytes. Default value: 0, i.e. split iff the " 68 "size is reduced. Note that on some architectures the size can " 69 "increase after splitting."), 70 cl::init(0), 71 cl::ZeroOrMore, 72 cl::Hidden, 73 cl::cat(BoltOptCategory)); 74 75 void syncOptions(BinaryContext &BC) { 76 if (!BC.HasRelocations && opts::SplitFunctions == SplitFunctions::ST_LARGE) 77 opts::SplitFunctions = SplitFunctions::ST_ALL; 78 } 79 80 } // namespace opts 81 82 namespace llvm { 83 namespace bolt { 84 85 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const { 86 // Apply execution count threshold 87 if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold) 88 return false; 89 90 return BinaryFunctionPass::shouldOptimize(BF); 91 } 92 93 void SplitFunctions::runOnFunctions(BinaryContext &BC) { 94 opts::syncOptions(BC); 95 96 if (opts::SplitFunctions == SplitFunctions::ST_NONE) 97 return; 98 99 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 100 splitFunction(BF); 101 }; 102 103 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 104 return !shouldOptimize(BF); 105 }; 106 107 ParallelUtilities::runOnEachFunction( 108 BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, WorkFun, SkipFunc, 109 "SplitFunctions"); 110 111 if (SplitBytesHot + SplitBytesCold > 0) { 112 outs() << "BOLT-INFO: splitting separates " << SplitBytesHot 113 << " hot bytes from " << SplitBytesCold << " cold bytes " 114 << format("(%.2lf%% of split functions is hot).\n", 115 100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold)); 116 } 117 } 118 119 void SplitFunctions::splitFunction(BinaryFunction &BF) { 120 if (!BF.size()) 121 return; 122 123 if (!BF.hasValidProfile()) 124 return; 125 126 bool AllCold = true; 127 for (BinaryBasicBlock *BB : BF.layout()) { 128 uint64_t ExecCount = BB->getExecutionCount(); 129 if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) 130 return; 131 if (ExecCount != 0) 132 AllCold = false; 133 } 134 135 if (AllCold) 136 return; 137 138 BinaryFunction::BasicBlockOrderType PreSplitLayout = BF.getLayout(); 139 140 BinaryContext &BC = BF.getBinaryContext(); 141 size_t OriginalHotSize; 142 size_t HotSize; 143 size_t ColdSize; 144 if (BC.isX86()) { 145 std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF); 146 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 147 << " pre-split is <0x" 148 << Twine::utohexstr(OriginalHotSize) << ", 0x" 149 << Twine::utohexstr(ColdSize) << ">\n"); 150 } 151 152 if (opts::SplitFunctions == SplitFunctions::ST_LARGE && !BC.HasRelocations) { 153 // Split only if the function wouldn't fit. 154 if (OriginalHotSize <= BF.getMaxSize()) 155 return; 156 } 157 158 // Never outline the first basic block. 159 BF.layout_front()->setCanOutline(false); 160 for (BinaryBasicBlock *BB : BF.layout()) { 161 if (!BB->canOutline()) 162 continue; 163 if (BB->getExecutionCount() != 0) { 164 BB->setCanOutline(false); 165 continue; 166 } 167 // Do not split extra entry points in aarch64. They can be referred by 168 // using ADRs and when this happens, these blocks cannot be placed far 169 // away due to the limited range in ADR instruction. 170 if (BC.isAArch64() && BB->isEntryPoint()) { 171 BB->setCanOutline(false); 172 continue; 173 } 174 if (BF.hasEHRanges() && !opts::SplitEH) { 175 // We cannot move landing pads (or rather entry points for landing 176 // pads). 177 if (BB->isLandingPad()) { 178 BB->setCanOutline(false); 179 continue; 180 } 181 // We cannot move a block that can throw since exception-handling 182 // runtime cannot deal with split functions. However, if we can guarantee 183 // that the block never throws, it is safe to move the block to 184 // decrease the size of the function. 185 for (MCInst &Instr : *BB) { 186 if (BF.getBinaryContext().MIB->isInvoke(Instr)) { 187 BB->setCanOutline(false); 188 break; 189 } 190 } 191 } 192 } 193 194 if (opts::AggressiveSplitting) { 195 // All blocks with 0 count that we can move go to the end of the function. 196 // Even if they were natural to cluster formation and were seen in-between 197 // hot basic blocks. 198 std::stable_sort(BF.layout_begin(), BF.layout_end(), 199 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 200 return A->canOutline() < B->canOutline(); 201 }); 202 } else if (BF.hasEHRanges() && !opts::SplitEH) { 203 // Typically functions with exception handling have landing pads at the end. 204 // We cannot move beginning of landing pads, but we can move 0-count blocks 205 // comprising landing pads to the end and thus facilitate splitting. 206 auto FirstLP = BF.layout_begin(); 207 while ((*FirstLP)->isLandingPad()) 208 ++FirstLP; 209 210 std::stable_sort(FirstLP, BF.layout_end(), 211 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 212 return A->canOutline() < B->canOutline(); 213 }); 214 } 215 216 // Separate hot from cold starting from the bottom. 217 for (auto I = BF.layout_rbegin(), E = BF.layout_rend(); I != E; ++I) { 218 BinaryBasicBlock *BB = *I; 219 if (!BB->canOutline()) 220 break; 221 BB->setIsCold(true); 222 } 223 224 // Check the new size to see if it's worth splitting the function. 225 if (BC.isX86() && BF.isSplit()) { 226 std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF); 227 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 228 << " post-split is <0x" << Twine::utohexstr(HotSize) 229 << ", 0x" << Twine::utohexstr(ColdSize) << ">\n"); 230 if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <= 231 alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) { 232 LLVM_DEBUG(dbgs() << "Reversing splitting of function " << BF << ":\n 0x" 233 << Twine::utohexstr(HotSize) << ", 0x" 234 << Twine::utohexstr(ColdSize) << " -> 0x" 235 << Twine::utohexstr(OriginalHotSize) << '\n'); 236 237 BF.updateBasicBlockLayout(PreSplitLayout); 238 for (BinaryBasicBlock &BB : BF) { 239 BB.setIsCold(false); 240 } 241 } else { 242 SplitBytesHot += HotSize; 243 SplitBytesCold += ColdSize; 244 } 245 } 246 } 247 248 } // namespace bolt 249 } // namespace llvm 250