1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements ReorderFunctions class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/ReorderFunctions.h" 14 #include "bolt/Passes/HFSort.h" 15 #include "llvm/Support/CommandLine.h" 16 #include <fstream> 17 18 #define DEBUG_TYPE "hfsort" 19 20 using namespace llvm; 21 22 namespace opts { 23 24 extern cl::OptionCategory BoltOptCategory; 25 extern cl::opt<unsigned> Verbosity; 26 extern cl::opt<uint32_t> RandomSeed; 27 28 extern size_t padFunction(const bolt::BinaryFunction &Function); 29 30 cl::opt<bolt::ReorderFunctions::ReorderType> 31 ReorderFunctions("reorder-functions", 32 cl::desc("reorder and cluster functions (works only with relocations)"), 33 cl::init(bolt::ReorderFunctions::RT_NONE), 34 cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, 35 "none", 36 "do not reorder functions"), 37 clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, 38 "exec-count", 39 "order by execution count"), 40 clEnumValN(bolt::ReorderFunctions::RT_HFSORT, 41 "hfsort", 42 "use hfsort algorithm"), 43 clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, 44 "hfsort+", 45 "use hfsort+ algorithm"), 46 clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, 47 "pettis-hansen", 48 "use Pettis-Hansen algorithm"), 49 clEnumValN(bolt::ReorderFunctions::RT_RANDOM, 50 "random", 51 "reorder functions randomly"), 52 clEnumValN(bolt::ReorderFunctions::RT_USER, 53 "user", 54 "use function order specified by -function-order")), 55 cl::ZeroOrMore, 56 cl::cat(BoltOptCategory)); 57 58 static cl::opt<bool> 59 ReorderFunctionsUseHotSize("reorder-functions-use-hot-size", 60 cl::desc("use a function's hot size when doing clustering"), 61 cl::init(true), 62 cl::ZeroOrMore, 63 cl::cat(BoltOptCategory)); 64 65 static cl::opt<std::string> 66 FunctionOrderFile("function-order", 67 cl::desc("file containing an ordered list of functions to use for function " 68 "reordering"), 69 cl::cat(BoltOptCategory)); 70 71 static cl::opt<std::string> 72 GenerateFunctionOrderFile("generate-function-order", 73 cl::desc("file to dump the ordered list of functions to use for function " 74 "reordering"), 75 cl::cat(BoltOptCategory)); 76 77 static cl::opt<std::string> 78 LinkSectionsFile("generate-link-sections", 79 cl::desc("generate a list of function sections in a format suitable for " 80 "inclusion in a linker script"), 81 cl::cat(BoltOptCategory)); 82 83 static cl::opt<bool> 84 UseEdgeCounts("use-edge-counts", 85 cl::desc("use edge count data when doing clustering"), 86 cl::init(true), 87 cl::ZeroOrMore, 88 cl::cat(BoltOptCategory)); 89 90 static cl::opt<bool> 91 CgFromPerfData("cg-from-perf-data", 92 cl::desc("use perf data directly when constructing the call graph" 93 " for stale functions"), 94 cl::init(true), 95 cl::ZeroOrMore, 96 cl::cat(BoltOptCategory)); 97 98 static cl::opt<bool> 99 CgIgnoreRecursiveCalls("cg-ignore-recursive-calls", 100 cl::desc("ignore recursive calls when constructing the call graph"), 101 cl::init(true), 102 cl::ZeroOrMore, 103 cl::cat(BoltOptCategory)); 104 105 static cl::opt<bool> 106 CgUseSplitHotSize("cg-use-split-hot-size", 107 cl::desc("use hot/cold data on basic blocks to determine hot sizes for " 108 "call graph functions"), 109 cl::init(false), 110 cl::ZeroOrMore, 111 cl::cat(BoltOptCategory)); 112 113 } // namespace opts 114 115 namespace llvm { 116 namespace bolt { 117 118 using NodeId = CallGraph::NodeId; 119 using Arc = CallGraph::Arc; 120 using Node = CallGraph::Node; 121 122 void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters, 123 std::map<uint64_t, BinaryFunction> &BFs) { 124 std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats 125 uint64_t TotalSize = 0; 126 uint32_t Index = 0; 127 128 // Set order of hot functions based on clusters. 129 for (const Cluster &Cluster : Clusters) { 130 for (const NodeId FuncId : Cluster.targets()) { 131 Cg.nodeIdToFunc(FuncId)->setIndex(Index++); 132 FuncAddr[FuncId] = TotalSize; 133 TotalSize += Cg.size(FuncId); 134 } 135 } 136 137 if (opts::ReorderFunctions == RT_NONE) 138 return; 139 140 if (opts::Verbosity == 0) { 141 #ifndef NDEBUG 142 if (!DebugFlag || !isCurrentDebugType("hfsort")) 143 return; 144 #else 145 return; 146 #endif 147 } 148 149 bool PrintDetailed = opts::Verbosity > 1; 150 #ifndef NDEBUG 151 PrintDetailed |= 152 (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0); 153 #endif 154 TotalSize = 0; 155 uint64_t CurPage = 0; 156 uint64_t Hotfuncs = 0; 157 double TotalDistance = 0; 158 double TotalCalls = 0; 159 double TotalCalls64B = 0; 160 double TotalCalls4KB = 0; 161 double TotalCalls2MB = 0; 162 if (PrintDetailed) 163 outs() << "BOLT-INFO: Function reordering page layout\n" 164 << "BOLT-INFO: ============== page 0 ==============\n"; 165 for (Cluster &Cluster : Clusters) { 166 if (PrintDetailed) 167 outs() << format( 168 "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n", 169 Cluster.density(), Cluster.samples(), Cluster.size()); 170 171 for (NodeId FuncId : Cluster.targets()) { 172 if (Cg.samples(FuncId) > 0) { 173 Hotfuncs++; 174 175 if (PrintDetailed) 176 outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " (" 177 << Cg.size(FuncId) << ")\n"; 178 179 uint64_t Dist = 0; 180 uint64_t Calls = 0; 181 for (NodeId Dst : Cg.successors(FuncId)) { 182 if (FuncId == Dst) // ignore recursive calls in stats 183 continue; 184 const Arc &Arc = *Cg.findArc(FuncId, Dst); 185 const auto D = std::abs(FuncAddr[Arc.dst()] - 186 (FuncAddr[FuncId] + Arc.avgCallOffset())); 187 const double W = Arc.weight(); 188 if (D < 64 && PrintDetailed && opts::Verbosity > 2) 189 outs() << "BOLT-INFO: short (" << D << "B) call:\n" 190 << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(FuncId) << "\n" 191 << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Dst) << "\n" 192 << "BOLT-INFO: Weight = " << W << "\n" 193 << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset() << "\n"; 194 Calls += W; 195 if (D < 64) TotalCalls64B += W; 196 if (D < 4096) TotalCalls4KB += W; 197 if (D < (2 << 20)) TotalCalls2MB += W; 198 Dist += Arc.weight() * D; 199 if (PrintDetailed) 200 outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: " 201 "weight = %.0lf, callDist = %f\n", 202 Arc.src(), 203 FuncAddr[Arc.src()], 204 Arc.avgCallOffset(), 205 Arc.dst(), 206 FuncAddr[Arc.dst()], 207 Arc.weight(), D); 208 } 209 TotalCalls += Calls; 210 TotalDistance += Dist; 211 TotalSize += Cg.size(FuncId); 212 213 if (PrintDetailed) { 214 outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ", 215 TotalSize, Calls ? Dist / Calls : 0) 216 << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n'; 217 const uint64_t NewPage = TotalSize / HugePageSize; 218 if (NewPage != CurPage) { 219 CurPage = NewPage; 220 outs() << format( 221 "BOLT-INFO: ============== page %u ==============\n", CurPage); 222 } 223 } 224 } 225 } 226 } 227 outs() << "BOLT-INFO: Function reordering stats\n" 228 << format("BOLT-INFO: Number of hot functions: %u\n" 229 "BOLT-INFO: Number of clusters: %lu\n", 230 Hotfuncs, Clusters.size()) 231 << format("BOLT-INFO: Final average call distance = %.1lf " 232 "(%.0lf / %.0lf)\n", 233 TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance, 234 TotalCalls) 235 << format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls); 236 if (TotalCalls) 237 outs() << format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n", 238 TotalCalls64B, 100 * TotalCalls64B / TotalCalls) 239 << format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n", 240 TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls) 241 << format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n", 242 TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls); 243 } 244 245 namespace { 246 247 std::vector<std::string> readFunctionOrderFile() { 248 std::vector<std::string> FunctionNames; 249 std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in); 250 if (!FuncsFile) { 251 errs() << "Ordered functions file \"" << opts::FunctionOrderFile 252 << "\" can't be opened.\n"; 253 exit(1); 254 } 255 std::string FuncName; 256 while (std::getline(FuncsFile, FuncName)) 257 FunctionNames.push_back(FuncName); 258 return FunctionNames; 259 } 260 261 } 262 263 void ReorderFunctions::runOnFunctions(BinaryContext &BC) { 264 auto &BFs = BC.getBinaryFunctions(); 265 if (opts::ReorderFunctions != RT_NONE && 266 opts::ReorderFunctions != RT_EXEC_COUNT && 267 opts::ReorderFunctions != RT_USER) { 268 Cg = buildCallGraph(BC, 269 [](const BinaryFunction &BF) { 270 if (!BF.hasProfile()) 271 return true; 272 if (BF.getState() != BinaryFunction::State::CFG) 273 return true; 274 return false; 275 }, 276 opts::CgFromPerfData, 277 false, // IncludeColdCalls 278 opts::ReorderFunctionsUseHotSize, 279 opts::CgUseSplitHotSize, 280 opts::UseEdgeCounts, 281 opts::CgIgnoreRecursiveCalls); 282 Cg.normalizeArcWeights(); 283 } 284 285 std::vector<Cluster> Clusters; 286 287 switch (opts::ReorderFunctions) { 288 case RT_NONE: 289 break; 290 case RT_EXEC_COUNT: 291 { 292 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 293 uint32_t Index = 0; 294 std::transform(BFs.begin(), 295 BFs.end(), 296 SortedFunctions.begin(), 297 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 298 return &BFI.second; 299 }); 300 std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(), 301 [&](const BinaryFunction *A, const BinaryFunction *B) { 302 if (A->isIgnored()) 303 return false; 304 const size_t PadA = opts::padFunction(*A); 305 const size_t PadB = opts::padFunction(*B); 306 if (!PadA || !PadB) { 307 if (PadA) 308 return true; 309 if (PadB) 310 return false; 311 } 312 return !A->hasProfile() && 313 (B->hasProfile() || 314 (A->getExecutionCount() > B->getExecutionCount())); 315 }); 316 for (BinaryFunction *BF : SortedFunctions) 317 if (BF->hasProfile()) 318 BF->setIndex(Index++); 319 } 320 break; 321 case RT_HFSORT: 322 Clusters = clusterize(Cg); 323 break; 324 case RT_HFSORT_PLUS: 325 Clusters = hfsortPlus(Cg); 326 break; 327 case RT_PETTIS_HANSEN: 328 Clusters = pettisAndHansen(Cg); 329 break; 330 case RT_RANDOM: 331 std::srand(opts::RandomSeed); 332 Clusters = randomClusters(Cg); 333 break; 334 case RT_USER: 335 { 336 uint32_t Index = 0; 337 for (const std::string &Function : readFunctionOrderFile()) { 338 std::vector<uint64_t> FuncAddrs; 339 340 BinaryData *BD = BC.getBinaryDataByName(Function); 341 if (!BD) { 342 uint32_t LocalID = 1; 343 while(1) { 344 // If we can't find the main symbol name, look for alternates. 345 const std::string FuncName = 346 Function + "/" + std::to_string(LocalID); 347 BD = BC.getBinaryDataByName(FuncName); 348 if (BD) 349 FuncAddrs.push_back(BD->getAddress()); 350 else 351 break; 352 LocalID++; 353 } 354 } else { 355 FuncAddrs.push_back(BD->getAddress()); 356 } 357 358 if (FuncAddrs.empty()) { 359 errs() << "BOLT-WARNING: Reorder functions: can't find function for " 360 << Function << ".\n"; 361 continue; 362 } 363 364 for (const uint64_t FuncAddr : FuncAddrs) { 365 const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr); 366 assert(FuncBD); 367 368 BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol()); 369 if (!BF) { 370 errs() << "BOLT-WARNING: Reorder functions: can't find function for " 371 << Function << ".\n"; 372 break; 373 } 374 if (!BF->hasValidIndex()) 375 BF->setIndex(Index++); 376 else if (opts::Verbosity > 0) 377 errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function 378 << ".\n"; 379 } 380 } 381 } 382 break; 383 } 384 385 reorder(std::move(Clusters), BFs); 386 387 std::unique_ptr<std::ofstream> FuncsFile; 388 if (!opts::GenerateFunctionOrderFile.empty()) { 389 FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile, 390 std::ios::out); 391 if (!FuncsFile) { 392 errs() << "BOLT-ERROR: ordered functions file " 393 << opts::GenerateFunctionOrderFile << " cannot be opened\n"; 394 exit(1); 395 } 396 } 397 398 std::unique_ptr<std::ofstream> LinkSectionsFile; 399 if (!opts::LinkSectionsFile.empty()) { 400 LinkSectionsFile = 401 std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out); 402 if (!LinkSectionsFile) { 403 errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile 404 << " cannot be opened\n"; 405 exit(1); 406 } 407 } 408 409 if (FuncsFile || LinkSectionsFile) { 410 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 411 std::transform(BFs.begin(), BFs.end(), SortedFunctions.begin(), 412 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 413 return &BFI.second; 414 }); 415 416 // Sort functions by index. 417 std::stable_sort( 418 SortedFunctions.begin(), 419 SortedFunctions.end(), 420 [](const BinaryFunction *A, const BinaryFunction *B) { 421 if (A->hasValidIndex() && B->hasValidIndex()) 422 return A->getIndex() < B->getIndex(); 423 if (A->hasValidIndex() && !B->hasValidIndex()) 424 return true; 425 if (!A->hasValidIndex() && B->hasValidIndex()) 426 return false; 427 return A->getAddress() < B->getAddress(); 428 }); 429 430 for (const BinaryFunction *Func : SortedFunctions) { 431 if (!Func->hasValidIndex()) 432 break; 433 if (Func->isPLTFunction()) 434 continue; 435 436 if (FuncsFile) 437 *FuncsFile << Func->getOneName().str() << '\n'; 438 439 if (LinkSectionsFile) { 440 const char *Indent = ""; 441 std::vector<StringRef> AllNames = Func->getNames(); 442 std::sort(AllNames.begin(), AllNames.end()); 443 for (StringRef Name : AllNames) { 444 const size_t SlashPos = Name.find('/'); 445 if (SlashPos != std::string::npos) { 446 // Avoid duplicates for local functions. 447 if (Name.find('/', SlashPos + 1) != std::string::npos) 448 continue; 449 Name = Name.substr(0, SlashPos); 450 } 451 *LinkSectionsFile << Indent << ".text." << Name.str() << '\n'; 452 Indent = " "; 453 } 454 } 455 } 456 457 if (FuncsFile) { 458 FuncsFile->close(); 459 outs() << "BOLT-INFO: dumped function order to " 460 << opts::GenerateFunctionOrderFile << '\n'; 461 } 462 463 if (LinkSectionsFile) { 464 LinkSectionsFile->close(); 465 outs() << "BOLT-INFO: dumped linker section order to " 466 << opts::LinkSectionsFile << '\n'; 467 } 468 } 469 } 470 471 } // namespace bolt 472 } // namespace llvm 473