1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements ReorderFunctions class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/ReorderFunctions.h" 14 #include "bolt/Passes/HFSort.h" 15 #include "llvm/Support/CommandLine.h" 16 #include <fstream> 17 18 #define DEBUG_TYPE "hfsort" 19 20 using namespace llvm; 21 22 namespace opts { 23 24 extern cl::OptionCategory BoltOptCategory; 25 extern cl::opt<unsigned> Verbosity; 26 extern cl::opt<uint32_t> RandomSeed; 27 28 extern size_t padFunction(const bolt::BinaryFunction &Function); 29 30 cl::opt<bolt::ReorderFunctions::ReorderType> 31 ReorderFunctions("reorder-functions", 32 cl::desc("reorder and cluster functions (works only with relocations)"), 33 cl::init(bolt::ReorderFunctions::RT_NONE), 34 cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, 35 "none", 36 "do not reorder functions"), 37 clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, 38 "exec-count", 39 "order by execution count"), 40 clEnumValN(bolt::ReorderFunctions::RT_HFSORT, 41 "hfsort", 42 "use hfsort algorithm"), 43 clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, 44 "hfsort+", 45 "use hfsort+ algorithm"), 46 clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, 47 "pettis-hansen", 48 "use Pettis-Hansen algorithm"), 49 clEnumValN(bolt::ReorderFunctions::RT_RANDOM, 50 "random", 51 "reorder functions randomly"), 52 clEnumValN(bolt::ReorderFunctions::RT_USER, 53 "user", 54 "use function order specified by -function-order")), 55 cl::ZeroOrMore, 56 cl::cat(BoltOptCategory)); 57 58 static cl::opt<bool> ReorderFunctionsUseHotSize( 59 "reorder-functions-use-hot-size", 60 cl::desc("use a function's hot size when doing clustering"), cl::init(true), 61 cl::cat(BoltOptCategory)); 62 63 static cl::opt<std::string> 64 FunctionOrderFile("function-order", 65 cl::desc("file containing an ordered list of functions to use for function " 66 "reordering"), 67 cl::cat(BoltOptCategory)); 68 69 static cl::opt<std::string> 70 GenerateFunctionOrderFile("generate-function-order", 71 cl::desc("file to dump the ordered list of functions to use for function " 72 "reordering"), 73 cl::cat(BoltOptCategory)); 74 75 static cl::opt<std::string> 76 LinkSectionsFile("generate-link-sections", 77 cl::desc("generate a list of function sections in a format suitable for " 78 "inclusion in a linker script"), 79 cl::cat(BoltOptCategory)); 80 81 static cl::opt<bool> 82 UseEdgeCounts("use-edge-counts", 83 cl::desc("use edge count data when doing clustering"), 84 cl::init(true), cl::cat(BoltOptCategory)); 85 86 static cl::opt<bool> 87 CgFromPerfData("cg-from-perf-data", 88 cl::desc("use perf data directly when constructing the call graph" 89 " for stale functions"), 90 cl::init(true), 91 cl::ZeroOrMore, 92 cl::cat(BoltOptCategory)); 93 94 static cl::opt<bool> CgIgnoreRecursiveCalls( 95 "cg-ignore-recursive-calls", 96 cl::desc("ignore recursive calls when constructing the call graph"), 97 cl::init(true), cl::cat(BoltOptCategory)); 98 99 static cl::opt<bool> 100 CgUseSplitHotSize("cg-use-split-hot-size", 101 cl::desc("use hot/cold data on basic blocks to determine hot sizes for " 102 "call graph functions"), 103 cl::init(false), 104 cl::ZeroOrMore, 105 cl::cat(BoltOptCategory)); 106 107 } // namespace opts 108 109 namespace llvm { 110 namespace bolt { 111 112 using NodeId = CallGraph::NodeId; 113 using Arc = CallGraph::Arc; 114 using Node = CallGraph::Node; 115 116 void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters, 117 std::map<uint64_t, BinaryFunction> &BFs) { 118 std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats 119 uint64_t TotalSize = 0; 120 uint32_t Index = 0; 121 122 // Set order of hot functions based on clusters. 123 for (const Cluster &Cluster : Clusters) { 124 for (const NodeId FuncId : Cluster.targets()) { 125 Cg.nodeIdToFunc(FuncId)->setIndex(Index++); 126 FuncAddr[FuncId] = TotalSize; 127 TotalSize += Cg.size(FuncId); 128 } 129 } 130 131 // Assign valid index for functions with valid profile. 132 for (auto &It : BFs) { 133 BinaryFunction &BF = It.second; 134 if (!BF.hasValidIndex() && BF.hasValidProfile()) 135 BF.setIndex(Index++); 136 } 137 138 if (opts::ReorderFunctions == RT_NONE) 139 return; 140 141 if (opts::Verbosity == 0) { 142 #ifndef NDEBUG 143 if (!DebugFlag || !isCurrentDebugType("hfsort")) 144 return; 145 #else 146 return; 147 #endif 148 } 149 150 bool PrintDetailed = opts::Verbosity > 1; 151 #ifndef NDEBUG 152 PrintDetailed |= 153 (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0); 154 #endif 155 TotalSize = 0; 156 uint64_t CurPage = 0; 157 uint64_t Hotfuncs = 0; 158 double TotalDistance = 0; 159 double TotalCalls = 0; 160 double TotalCalls64B = 0; 161 double TotalCalls4KB = 0; 162 double TotalCalls2MB = 0; 163 if (PrintDetailed) 164 outs() << "BOLT-INFO: Function reordering page layout\n" 165 << "BOLT-INFO: ============== page 0 ==============\n"; 166 for (Cluster &Cluster : Clusters) { 167 if (PrintDetailed) 168 outs() << format( 169 "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n", 170 Cluster.density(), Cluster.samples(), Cluster.size()); 171 172 for (NodeId FuncId : Cluster.targets()) { 173 if (Cg.samples(FuncId) > 0) { 174 Hotfuncs++; 175 176 if (PrintDetailed) 177 outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " (" 178 << Cg.size(FuncId) << ")\n"; 179 180 uint64_t Dist = 0; 181 uint64_t Calls = 0; 182 for (NodeId Dst : Cg.successors(FuncId)) { 183 if (FuncId == Dst) // ignore recursive calls in stats 184 continue; 185 const Arc &Arc = *Cg.findArc(FuncId, Dst); 186 const auto D = std::abs(FuncAddr[Arc.dst()] - 187 (FuncAddr[FuncId] + Arc.avgCallOffset())); 188 const double W = Arc.weight(); 189 if (D < 64 && PrintDetailed && opts::Verbosity > 2) 190 outs() << "BOLT-INFO: short (" << D << "B) call:\n" 191 << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(FuncId) << "\n" 192 << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Dst) << "\n" 193 << "BOLT-INFO: Weight = " << W << "\n" 194 << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset() << "\n"; 195 Calls += W; 196 if (D < 64) TotalCalls64B += W; 197 if (D < 4096) TotalCalls4KB += W; 198 if (D < (2 << 20)) TotalCalls2MB += W; 199 Dist += Arc.weight() * D; 200 if (PrintDetailed) 201 outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: " 202 "weight = %.0lf, callDist = %f\n", 203 Arc.src(), 204 FuncAddr[Arc.src()], 205 Arc.avgCallOffset(), 206 Arc.dst(), 207 FuncAddr[Arc.dst()], 208 Arc.weight(), D); 209 } 210 TotalCalls += Calls; 211 TotalDistance += Dist; 212 TotalSize += Cg.size(FuncId); 213 214 if (PrintDetailed) { 215 outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ", 216 TotalSize, Calls ? Dist / Calls : 0) 217 << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n'; 218 const uint64_t NewPage = TotalSize / HugePageSize; 219 if (NewPage != CurPage) { 220 CurPage = NewPage; 221 outs() << format( 222 "BOLT-INFO: ============== page %u ==============\n", CurPage); 223 } 224 } 225 } 226 } 227 } 228 outs() << "BOLT-INFO: Function reordering stats\n" 229 << format("BOLT-INFO: Number of hot functions: %u\n" 230 "BOLT-INFO: Number of clusters: %lu\n", 231 Hotfuncs, Clusters.size()) 232 << format("BOLT-INFO: Final average call distance = %.1lf " 233 "(%.0lf / %.0lf)\n", 234 TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance, 235 TotalCalls) 236 << format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls); 237 if (TotalCalls) 238 outs() << format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n", 239 TotalCalls64B, 100 * TotalCalls64B / TotalCalls) 240 << format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n", 241 TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls) 242 << format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n", 243 TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls); 244 } 245 246 namespace { 247 248 std::vector<std::string> readFunctionOrderFile() { 249 std::vector<std::string> FunctionNames; 250 std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in); 251 if (!FuncsFile) { 252 errs() << "Ordered functions file \"" << opts::FunctionOrderFile 253 << "\" can't be opened.\n"; 254 exit(1); 255 } 256 std::string FuncName; 257 while (std::getline(FuncsFile, FuncName)) 258 FunctionNames.push_back(FuncName); 259 return FunctionNames; 260 } 261 262 } 263 264 void ReorderFunctions::runOnFunctions(BinaryContext &BC) { 265 auto &BFs = BC.getBinaryFunctions(); 266 if (opts::ReorderFunctions != RT_NONE && 267 opts::ReorderFunctions != RT_EXEC_COUNT && 268 opts::ReorderFunctions != RT_USER) { 269 Cg = buildCallGraph(BC, 270 [](const BinaryFunction &BF) { 271 if (!BF.hasProfile()) 272 return true; 273 if (BF.getState() != BinaryFunction::State::CFG) 274 return true; 275 return false; 276 }, 277 opts::CgFromPerfData, 278 false, // IncludeColdCalls 279 opts::ReorderFunctionsUseHotSize, 280 opts::CgUseSplitHotSize, 281 opts::UseEdgeCounts, 282 opts::CgIgnoreRecursiveCalls); 283 Cg.normalizeArcWeights(); 284 } 285 286 std::vector<Cluster> Clusters; 287 288 switch (opts::ReorderFunctions) { 289 case RT_NONE: 290 break; 291 case RT_EXEC_COUNT: 292 { 293 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 294 uint32_t Index = 0; 295 std::transform(BFs.begin(), 296 BFs.end(), 297 SortedFunctions.begin(), 298 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 299 return &BFI.second; 300 }); 301 std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(), 302 [&](const BinaryFunction *A, const BinaryFunction *B) { 303 if (A->isIgnored()) 304 return false; 305 const size_t PadA = opts::padFunction(*A); 306 const size_t PadB = opts::padFunction(*B); 307 if (!PadA || !PadB) { 308 if (PadA) 309 return true; 310 if (PadB) 311 return false; 312 } 313 return !A->hasProfile() && 314 (B->hasProfile() || 315 (A->getExecutionCount() > B->getExecutionCount())); 316 }); 317 for (BinaryFunction *BF : SortedFunctions) 318 if (BF->hasProfile()) 319 BF->setIndex(Index++); 320 } 321 break; 322 case RT_HFSORT: 323 Clusters = clusterize(Cg); 324 break; 325 case RT_HFSORT_PLUS: 326 Clusters = hfsortPlus(Cg); 327 break; 328 case RT_PETTIS_HANSEN: 329 Clusters = pettisAndHansen(Cg); 330 break; 331 case RT_RANDOM: 332 std::srand(opts::RandomSeed); 333 Clusters = randomClusters(Cg); 334 break; 335 case RT_USER: 336 { 337 uint32_t Index = 0; 338 for (const std::string &Function : readFunctionOrderFile()) { 339 std::vector<uint64_t> FuncAddrs; 340 341 BinaryData *BD = BC.getBinaryDataByName(Function); 342 if (!BD) { 343 uint32_t LocalID = 1; 344 while(1) { 345 // If we can't find the main symbol name, look for alternates. 346 const std::string FuncName = 347 Function + "/" + std::to_string(LocalID); 348 BD = BC.getBinaryDataByName(FuncName); 349 if (BD) 350 FuncAddrs.push_back(BD->getAddress()); 351 else 352 break; 353 LocalID++; 354 } 355 } else { 356 FuncAddrs.push_back(BD->getAddress()); 357 } 358 359 if (FuncAddrs.empty()) { 360 errs() << "BOLT-WARNING: Reorder functions: can't find function for " 361 << Function << ".\n"; 362 continue; 363 } 364 365 for (const uint64_t FuncAddr : FuncAddrs) { 366 const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr); 367 assert(FuncBD); 368 369 BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol()); 370 if (!BF) { 371 errs() << "BOLT-WARNING: Reorder functions: can't find function for " 372 << Function << ".\n"; 373 break; 374 } 375 if (!BF->hasValidIndex()) 376 BF->setIndex(Index++); 377 else if (opts::Verbosity > 0) 378 errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function 379 << ".\n"; 380 } 381 } 382 } 383 break; 384 } 385 386 reorder(std::move(Clusters), BFs); 387 388 std::unique_ptr<std::ofstream> FuncsFile; 389 if (!opts::GenerateFunctionOrderFile.empty()) { 390 FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile, 391 std::ios::out); 392 if (!FuncsFile) { 393 errs() << "BOLT-ERROR: ordered functions file " 394 << opts::GenerateFunctionOrderFile << " cannot be opened\n"; 395 exit(1); 396 } 397 } 398 399 std::unique_ptr<std::ofstream> LinkSectionsFile; 400 if (!opts::LinkSectionsFile.empty()) { 401 LinkSectionsFile = 402 std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out); 403 if (!LinkSectionsFile) { 404 errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile 405 << " cannot be opened\n"; 406 exit(1); 407 } 408 } 409 410 if (FuncsFile || LinkSectionsFile) { 411 std::vector<BinaryFunction *> SortedFunctions(BFs.size()); 412 std::transform(BFs.begin(), BFs.end(), SortedFunctions.begin(), 413 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 414 return &BFI.second; 415 }); 416 417 // Sort functions by index. 418 std::stable_sort( 419 SortedFunctions.begin(), 420 SortedFunctions.end(), 421 [](const BinaryFunction *A, const BinaryFunction *B) { 422 if (A->hasValidIndex() && B->hasValidIndex()) 423 return A->getIndex() < B->getIndex(); 424 if (A->hasValidIndex() && !B->hasValidIndex()) 425 return true; 426 if (!A->hasValidIndex() && B->hasValidIndex()) 427 return false; 428 return A->getAddress() < B->getAddress(); 429 }); 430 431 for (const BinaryFunction *Func : SortedFunctions) { 432 if (!Func->hasValidIndex()) 433 break; 434 if (Func->isPLTFunction()) 435 continue; 436 437 if (FuncsFile) 438 *FuncsFile << Func->getOneName().str() << '\n'; 439 440 if (LinkSectionsFile) { 441 const char *Indent = ""; 442 std::vector<StringRef> AllNames = Func->getNames(); 443 std::sort(AllNames.begin(), AllNames.end()); 444 for (StringRef Name : AllNames) { 445 const size_t SlashPos = Name.find('/'); 446 if (SlashPos != std::string::npos) { 447 // Avoid duplicates for local functions. 448 if (Name.find('/', SlashPos + 1) != std::string::npos) 449 continue; 450 Name = Name.substr(0, SlashPos); 451 } 452 *LinkSectionsFile << Indent << ".text." << Name.str() << '\n'; 453 Indent = " "; 454 } 455 } 456 } 457 458 if (FuncsFile) { 459 FuncsFile->close(); 460 outs() << "BOLT-INFO: dumped function order to " 461 << opts::GenerateFunctionOrderFile << '\n'; 462 } 463 464 if (LinkSectionsFile) { 465 LinkSectionsFile->close(); 466 outs() << "BOLT-INFO: dumped linker section order to " 467 << opts::LinkSectionsFile << '\n'; 468 } 469 } 470 } 471 472 } // namespace bolt 473 } // namespace llvm 474