1 //===--- SplitFunctions.cpp - pass for splitting function code ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "bolt/Passes/SplitFunctions.h" 12 #include "bolt/Core/BinaryFunction.h" 13 #include "bolt/Core/ParallelUtilities.h" 14 #include "llvm/Support/CommandLine.h" 15 16 #include <vector> 17 18 #define DEBUG_TYPE "bolt-opts" 19 20 using namespace llvm; 21 using namespace bolt; 22 23 namespace opts { 24 25 extern cl::OptionCategory BoltOptCategory; 26 27 extern cl::opt<bool> SplitEH; 28 extern cl::opt<unsigned> ExecutionCountThreshold; 29 30 static cl::opt<bool> 31 AggressiveSplitting("split-all-cold", 32 cl::desc("outline as many cold basic blocks as possible"), 33 cl::ZeroOrMore, 34 cl::cat(BoltOptCategory)); 35 36 static cl::opt<unsigned> 37 SplitAlignThreshold("split-align-threshold", 38 cl::desc("when deciding to split a function, apply this alignment " 39 "while doing the size comparison (see -split-threshold). " 40 "Default value: 2."), 41 cl::init(2), 42 cl::ZeroOrMore, 43 cl::Hidden, 44 cl::cat(BoltOptCategory)); 45 46 static cl::opt<SplitFunctions::SplittingType> 47 SplitFunctions("split-functions", 48 cl::desc("split functions into hot and cold regions"), 49 cl::init(SplitFunctions::ST_NONE), 50 cl::values(clEnumValN(SplitFunctions::ST_NONE, "0", 51 "do not split any function"), 52 clEnumValN(SplitFunctions::ST_LARGE, "1", 53 "in non-relocation mode only split functions too large " 54 "to fit into original code space"), 55 clEnumValN(SplitFunctions::ST_LARGE, "2", 56 "same as 1 (backwards compatibility)"), 57 clEnumValN(SplitFunctions::ST_ALL, "3", 58 "split all functions")), 59 cl::ZeroOrMore, 60 cl::cat(BoltOptCategory)); 61 62 static cl::opt<unsigned> 63 SplitThreshold("split-threshold", 64 cl::desc("split function only if its main size is reduced by more than " 65 "given amount of bytes. Default value: 0, i.e. split iff the " 66 "size is reduced. Note that on some architectures the size can " 67 "increase after splitting."), 68 cl::init(0), 69 cl::ZeroOrMore, 70 cl::Hidden, 71 cl::cat(BoltOptCategory)); 72 73 void syncOptions(BinaryContext &BC) { 74 if (!BC.HasRelocations && opts::SplitFunctions == SplitFunctions::ST_LARGE) 75 opts::SplitFunctions = SplitFunctions::ST_ALL; 76 } 77 78 } // namespace opts 79 80 namespace llvm { 81 namespace bolt { 82 83 bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const { 84 // Apply execution count threshold 85 if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold) 86 return false; 87 88 return BinaryFunctionPass::shouldOptimize(BF); 89 } 90 91 void SplitFunctions::runOnFunctions(BinaryContext &BC) { 92 opts::syncOptions(BC); 93 94 if (opts::SplitFunctions == SplitFunctions::ST_NONE) 95 return; 96 97 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 98 splitFunction(BF); 99 }; 100 101 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 102 return !shouldOptimize(BF); 103 }; 104 105 ParallelUtilities::runOnEachFunction( 106 BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, WorkFun, SkipFunc, 107 "SplitFunctions"); 108 109 if (SplitBytesHot + SplitBytesCold > 0) { 110 outs() << "BOLT-INFO: splitting separates " << SplitBytesHot 111 << " hot bytes from " << SplitBytesCold << " cold bytes " 112 << format("(%.2lf%% of split functions is hot).\n", 113 100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold)); 114 } 115 } 116 117 void SplitFunctions::splitFunction(BinaryFunction &BF) { 118 if (!BF.size()) 119 return; 120 121 if (!BF.hasValidProfile()) 122 return; 123 124 bool AllCold = true; 125 for (BinaryBasicBlock *BB : BF.layout()) { 126 uint64_t ExecCount = BB->getExecutionCount(); 127 if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) 128 return; 129 if (ExecCount != 0) 130 AllCold = false; 131 } 132 133 if (AllCold) 134 return; 135 136 BinaryFunction::BasicBlockOrderType PreSplitLayout = BF.getLayout(); 137 138 BinaryContext &BC = BF.getBinaryContext(); 139 size_t OriginalHotSize; 140 size_t HotSize; 141 size_t ColdSize; 142 if (BC.isX86()) { 143 std::tie(OriginalHotSize, ColdSize) = BC.calculateEmittedSize(BF); 144 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 145 << " pre-split is <0x" 146 << Twine::utohexstr(OriginalHotSize) << ", 0x" 147 << Twine::utohexstr(ColdSize) << ">\n"); 148 } 149 150 if (opts::SplitFunctions == SplitFunctions::ST_LARGE && !BC.HasRelocations) { 151 // Split only if the function wouldn't fit. 152 if (OriginalHotSize <= BF.getMaxSize()) 153 return; 154 } 155 156 // Never outline the first basic block. 157 BF.layout_front()->setCanOutline(false); 158 for (BinaryBasicBlock *BB : BF.layout()) { 159 if (!BB->canOutline()) 160 continue; 161 if (BB->getExecutionCount() != 0) { 162 BB->setCanOutline(false); 163 continue; 164 } 165 // Do not split extra entry points in aarch64. They can be referred by 166 // using ADRs and when this happens, these blocks cannot be placed far 167 // away due to the limited range in ADR instruction. 168 if (BC.isAArch64() && BB->isEntryPoint()) { 169 BB->setCanOutline(false); 170 continue; 171 } 172 if (BF.hasEHRanges() && !opts::SplitEH) { 173 // We cannot move landing pads (or rather entry points for landing 174 // pads). 175 if (BB->isLandingPad()) { 176 BB->setCanOutline(false); 177 continue; 178 } 179 // We cannot move a block that can throw since exception-handling 180 // runtime cannot deal with split functions. However, if we can guarantee 181 // that the block never throws, it is safe to move the block to 182 // decrease the size of the function. 183 for (MCInst &Instr : *BB) { 184 if (BF.getBinaryContext().MIB->isInvoke(Instr)) { 185 BB->setCanOutline(false); 186 break; 187 } 188 } 189 } 190 } 191 192 if (opts::AggressiveSplitting) { 193 // All blocks with 0 count that we can move go to the end of the function. 194 // Even if they were natural to cluster formation and were seen in-between 195 // hot basic blocks. 196 std::stable_sort(BF.layout_begin(), BF.layout_end(), 197 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 198 return A->canOutline() < B->canOutline(); 199 }); 200 } else if (BF.hasEHRanges() && !opts::SplitEH) { 201 // Typically functions with exception handling have landing pads at the end. 202 // We cannot move beginning of landing pads, but we can move 0-count blocks 203 // comprising landing pads to the end and thus facilitate splitting. 204 auto FirstLP = BF.layout_begin(); 205 while ((*FirstLP)->isLandingPad()) 206 ++FirstLP; 207 208 std::stable_sort(FirstLP, BF.layout_end(), 209 [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { 210 return A->canOutline() < B->canOutline(); 211 }); 212 } 213 214 // Separate hot from cold starting from the bottom. 215 for (auto I = BF.layout_rbegin(), E = BF.layout_rend(); I != E; ++I) { 216 BinaryBasicBlock *BB = *I; 217 if (!BB->canOutline()) 218 break; 219 BB->setIsCold(true); 220 } 221 222 // Check the new size to see if it's worth splitting the function. 223 if (BC.isX86() && BF.isSplit()) { 224 std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF); 225 LLVM_DEBUG(dbgs() << "Estimated size for function " << BF 226 << " post-split is <0x" << Twine::utohexstr(HotSize) 227 << ", 0x" << Twine::utohexstr(ColdSize) << ">\n"); 228 if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <= 229 alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) { 230 LLVM_DEBUG(dbgs() << "Reversing splitting of function " << BF << ":\n 0x" 231 << Twine::utohexstr(HotSize) << ", 0x" 232 << Twine::utohexstr(ColdSize) << " -> 0x" 233 << Twine::utohexstr(OriginalHotSize) << '\n'); 234 235 BF.updateBasicBlockLayout(PreSplitLayout); 236 for (BinaryBasicBlock &BB : BF) { 237 BB.setIsCold(false); 238 } 239 } else { 240 SplitBytesHot += HotSize; 241 SplitBytesCold += ColdSize; 242 } 243 } 244 } 245 246 } // namespace bolt 247 } // namespace llvm 248