1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements ReorderFunctions class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "bolt/Passes/ReorderFunctions.h"
14 #include "bolt/Passes/HFSort.h"
15 #include "llvm/Support/CommandLine.h"
16 #include <fstream>
17
18 #define DEBUG_TYPE "hfsort"
19
20 using namespace llvm;
21
22 namespace opts {
23
24 extern cl::OptionCategory BoltOptCategory;
25 extern cl::opt<unsigned> Verbosity;
26 extern cl::opt<uint32_t> RandomSeed;
27
28 extern size_t padFunction(const bolt::BinaryFunction &Function);
29
30 cl::opt<bolt::ReorderFunctions::ReorderType>
31 ReorderFunctions("reorder-functions",
32 cl::desc("reorder and cluster functions (works only with relocations)"),
33 cl::init(bolt::ReorderFunctions::RT_NONE),
34 cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE,
35 "none",
36 "do not reorder functions"),
37 clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT,
38 "exec-count",
39 "order by execution count"),
40 clEnumValN(bolt::ReorderFunctions::RT_HFSORT,
41 "hfsort",
42 "use hfsort algorithm"),
43 clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS,
44 "hfsort+",
45 "use hfsort+ algorithm"),
46 clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
47 "pettis-hansen",
48 "use Pettis-Hansen algorithm"),
49 clEnumValN(bolt::ReorderFunctions::RT_RANDOM,
50 "random",
51 "reorder functions randomly"),
52 clEnumValN(bolt::ReorderFunctions::RT_USER,
53 "user",
54 "use function order specified by -function-order")),
55 cl::ZeroOrMore,
56 cl::cat(BoltOptCategory));
57
58 static cl::opt<bool> ReorderFunctionsUseHotSize(
59 "reorder-functions-use-hot-size",
60 cl::desc("use a function's hot size when doing clustering"), cl::init(true),
61 cl::cat(BoltOptCategory));
62
63 static cl::opt<std::string>
64 FunctionOrderFile("function-order",
65 cl::desc("file containing an ordered list of functions to use for function "
66 "reordering"),
67 cl::cat(BoltOptCategory));
68
69 static cl::opt<std::string>
70 GenerateFunctionOrderFile("generate-function-order",
71 cl::desc("file to dump the ordered list of functions to use for function "
72 "reordering"),
73 cl::cat(BoltOptCategory));
74
75 static cl::opt<std::string>
76 LinkSectionsFile("generate-link-sections",
77 cl::desc("generate a list of function sections in a format suitable for "
78 "inclusion in a linker script"),
79 cl::cat(BoltOptCategory));
80
81 static cl::opt<bool>
82 UseEdgeCounts("use-edge-counts",
83 cl::desc("use edge count data when doing clustering"),
84 cl::init(true), cl::cat(BoltOptCategory));
85
86 static cl::opt<bool>
87 CgFromPerfData("cg-from-perf-data",
88 cl::desc("use perf data directly when constructing the call graph"
89 " for stale functions"),
90 cl::init(true),
91 cl::ZeroOrMore,
92 cl::cat(BoltOptCategory));
93
94 static cl::opt<bool> CgIgnoreRecursiveCalls(
95 "cg-ignore-recursive-calls",
96 cl::desc("ignore recursive calls when constructing the call graph"),
97 cl::init(true), cl::cat(BoltOptCategory));
98
99 static cl::opt<bool>
100 CgUseSplitHotSize("cg-use-split-hot-size",
101 cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
102 "call graph functions"),
103 cl::init(false),
104 cl::ZeroOrMore,
105 cl::cat(BoltOptCategory));
106
107 } // namespace opts
108
109 namespace llvm {
110 namespace bolt {
111
112 using NodeId = CallGraph::NodeId;
113 using Arc = CallGraph::Arc;
114 using Node = CallGraph::Node;
115
reorder(std::vector<Cluster> && Clusters,std::map<uint64_t,BinaryFunction> & BFs)116 void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
117 std::map<uint64_t, BinaryFunction> &BFs) {
118 std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
119 uint64_t TotalSize = 0;
120 uint32_t Index = 0;
121
122 // Set order of hot functions based on clusters.
123 for (const Cluster &Cluster : Clusters) {
124 for (const NodeId FuncId : Cluster.targets()) {
125 Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
126 FuncAddr[FuncId] = TotalSize;
127 TotalSize += Cg.size(FuncId);
128 }
129 }
130
131 // Assign valid index for functions with valid profile.
132 for (auto &It : BFs) {
133 BinaryFunction &BF = It.second;
134 if (!BF.hasValidIndex() && BF.hasValidProfile())
135 BF.setIndex(Index++);
136 }
137
138 if (opts::ReorderFunctions == RT_NONE)
139 return;
140
141 if (opts::Verbosity == 0) {
142 #ifndef NDEBUG
143 if (!DebugFlag || !isCurrentDebugType("hfsort"))
144 return;
145 #else
146 return;
147 #endif
148 }
149
150 bool PrintDetailed = opts::Verbosity > 1;
151 #ifndef NDEBUG
152 PrintDetailed |=
153 (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
154 #endif
155 TotalSize = 0;
156 uint64_t CurPage = 0;
157 uint64_t Hotfuncs = 0;
158 double TotalDistance = 0;
159 double TotalCalls = 0;
160 double TotalCalls64B = 0;
161 double TotalCalls4KB = 0;
162 double TotalCalls2MB = 0;
163 if (PrintDetailed)
164 outs() << "BOLT-INFO: Function reordering page layout\n"
165 << "BOLT-INFO: ============== page 0 ==============\n";
166 for (Cluster &Cluster : Clusters) {
167 if (PrintDetailed)
168 outs() << format(
169 "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
170 Cluster.density(), Cluster.samples(), Cluster.size());
171
172 for (NodeId FuncId : Cluster.targets()) {
173 if (Cg.samples(FuncId) > 0) {
174 Hotfuncs++;
175
176 if (PrintDetailed)
177 outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " ("
178 << Cg.size(FuncId) << ")\n";
179
180 uint64_t Dist = 0;
181 uint64_t Calls = 0;
182 for (NodeId Dst : Cg.successors(FuncId)) {
183 if (FuncId == Dst) // ignore recursive calls in stats
184 continue;
185 const Arc &Arc = *Cg.findArc(FuncId, Dst);
186 const auto D = std::abs(FuncAddr[Arc.dst()] -
187 (FuncAddr[FuncId] + Arc.avgCallOffset()));
188 const double W = Arc.weight();
189 if (D < 64 && PrintDetailed && opts::Verbosity > 2)
190 outs() << "BOLT-INFO: short (" << D << "B) call:\n"
191 << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(FuncId) << "\n"
192 << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Dst) << "\n"
193 << "BOLT-INFO: Weight = " << W << "\n"
194 << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset() << "\n";
195 Calls += W;
196 if (D < 64) TotalCalls64B += W;
197 if (D < 4096) TotalCalls4KB += W;
198 if (D < (2 << 20)) TotalCalls2MB += W;
199 Dist += Arc.weight() * D;
200 if (PrintDetailed)
201 outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
202 "weight = %.0lf, callDist = %f\n",
203 Arc.src(),
204 FuncAddr[Arc.src()],
205 Arc.avgCallOffset(),
206 Arc.dst(),
207 FuncAddr[Arc.dst()],
208 Arc.weight(), D);
209 }
210 TotalCalls += Calls;
211 TotalDistance += Dist;
212 TotalSize += Cg.size(FuncId);
213
214 if (PrintDetailed) {
215 outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
216 TotalSize, Calls ? Dist / Calls : 0)
217 << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n';
218 const uint64_t NewPage = TotalSize / HugePageSize;
219 if (NewPage != CurPage) {
220 CurPage = NewPage;
221 outs() << format(
222 "BOLT-INFO: ============== page %u ==============\n", CurPage);
223 }
224 }
225 }
226 }
227 }
228 outs() << "BOLT-INFO: Function reordering stats\n"
229 << format("BOLT-INFO: Number of hot functions: %u\n"
230 "BOLT-INFO: Number of clusters: %lu\n",
231 Hotfuncs, Clusters.size())
232 << format("BOLT-INFO: Final average call distance = %.1lf "
233 "(%.0lf / %.0lf)\n",
234 TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance,
235 TotalCalls)
236 << format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls);
237 if (TotalCalls)
238 outs() << format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n",
239 TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
240 << format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n",
241 TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
242 << format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n",
243 TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
244 }
245
246 namespace {
247
readFunctionOrderFile()248 std::vector<std::string> readFunctionOrderFile() {
249 std::vector<std::string> FunctionNames;
250 std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
251 if (!FuncsFile) {
252 errs() << "Ordered functions file \"" << opts::FunctionOrderFile
253 << "\" can't be opened.\n";
254 exit(1);
255 }
256 std::string FuncName;
257 while (std::getline(FuncsFile, FuncName))
258 FunctionNames.push_back(FuncName);
259 return FunctionNames;
260 }
261
262 }
263
runOnFunctions(BinaryContext & BC)264 void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
265 auto &BFs = BC.getBinaryFunctions();
266 if (opts::ReorderFunctions != RT_NONE &&
267 opts::ReorderFunctions != RT_EXEC_COUNT &&
268 opts::ReorderFunctions != RT_USER) {
269 Cg = buildCallGraph(BC,
270 [](const BinaryFunction &BF) {
271 if (!BF.hasProfile())
272 return true;
273 if (BF.getState() != BinaryFunction::State::CFG)
274 return true;
275 return false;
276 },
277 opts::CgFromPerfData,
278 false, // IncludeColdCalls
279 opts::ReorderFunctionsUseHotSize,
280 opts::CgUseSplitHotSize,
281 opts::UseEdgeCounts,
282 opts::CgIgnoreRecursiveCalls);
283 Cg.normalizeArcWeights();
284 }
285
286 std::vector<Cluster> Clusters;
287
288 switch (opts::ReorderFunctions) {
289 case RT_NONE:
290 break;
291 case RT_EXEC_COUNT:
292 {
293 std::vector<BinaryFunction *> SortedFunctions(BFs.size());
294 uint32_t Index = 0;
295 llvm::transform(BFs, SortedFunctions.begin(),
296 [](std::pair<const uint64_t, BinaryFunction> &BFI) {
297 return &BFI.second;
298 });
299 llvm::stable_sort(SortedFunctions, [&](const BinaryFunction *A,
300 const BinaryFunction *B) {
301 if (A->isIgnored())
302 return false;
303 const size_t PadA = opts::padFunction(*A);
304 const size_t PadB = opts::padFunction(*B);
305 if (!PadA || !PadB) {
306 if (PadA)
307 return true;
308 if (PadB)
309 return false;
310 }
311 return !A->hasProfile() &&
312 (B->hasProfile() ||
313 (A->getExecutionCount() > B->getExecutionCount()));
314 });
315 for (BinaryFunction *BF : SortedFunctions)
316 if (BF->hasProfile())
317 BF->setIndex(Index++);
318 }
319 break;
320 case RT_HFSORT:
321 Clusters = clusterize(Cg);
322 break;
323 case RT_HFSORT_PLUS:
324 Clusters = hfsortPlus(Cg);
325 break;
326 case RT_PETTIS_HANSEN:
327 Clusters = pettisAndHansen(Cg);
328 break;
329 case RT_RANDOM:
330 std::srand(opts::RandomSeed);
331 Clusters = randomClusters(Cg);
332 break;
333 case RT_USER:
334 {
335 uint32_t Index = 0;
336 for (const std::string &Function : readFunctionOrderFile()) {
337 std::vector<uint64_t> FuncAddrs;
338
339 BinaryData *BD = BC.getBinaryDataByName(Function);
340 if (!BD) {
341 uint32_t LocalID = 1;
342 while(1) {
343 // If we can't find the main symbol name, look for alternates.
344 const std::string FuncName =
345 Function + "/" + std::to_string(LocalID);
346 BD = BC.getBinaryDataByName(FuncName);
347 if (BD)
348 FuncAddrs.push_back(BD->getAddress());
349 else
350 break;
351 LocalID++;
352 }
353 } else {
354 FuncAddrs.push_back(BD->getAddress());
355 }
356
357 if (FuncAddrs.empty()) {
358 errs() << "BOLT-WARNING: Reorder functions: can't find function for "
359 << Function << ".\n";
360 continue;
361 }
362
363 for (const uint64_t FuncAddr : FuncAddrs) {
364 const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr);
365 assert(FuncBD);
366
367 BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol());
368 if (!BF) {
369 errs() << "BOLT-WARNING: Reorder functions: can't find function for "
370 << Function << ".\n";
371 break;
372 }
373 if (!BF->hasValidIndex())
374 BF->setIndex(Index++);
375 else if (opts::Verbosity > 0)
376 errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
377 << ".\n";
378 }
379 }
380 }
381 break;
382 }
383
384 reorder(std::move(Clusters), BFs);
385
386 std::unique_ptr<std::ofstream> FuncsFile;
387 if (!opts::GenerateFunctionOrderFile.empty()) {
388 FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
389 std::ios::out);
390 if (!FuncsFile) {
391 errs() << "BOLT-ERROR: ordered functions file "
392 << opts::GenerateFunctionOrderFile << " cannot be opened\n";
393 exit(1);
394 }
395 }
396
397 std::unique_ptr<std::ofstream> LinkSectionsFile;
398 if (!opts::LinkSectionsFile.empty()) {
399 LinkSectionsFile =
400 std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out);
401 if (!LinkSectionsFile) {
402 errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
403 << " cannot be opened\n";
404 exit(1);
405 }
406 }
407
408 if (FuncsFile || LinkSectionsFile) {
409 std::vector<BinaryFunction *> SortedFunctions(BFs.size());
410 llvm::transform(BFs, SortedFunctions.begin(),
411 [](std::pair<const uint64_t, BinaryFunction> &BFI) {
412 return &BFI.second;
413 });
414
415 // Sort functions by index.
416 llvm::stable_sort(SortedFunctions,
417 [](const BinaryFunction *A, const BinaryFunction *B) {
418 if (A->hasValidIndex() && B->hasValidIndex())
419 return A->getIndex() < B->getIndex();
420 if (A->hasValidIndex() && !B->hasValidIndex())
421 return true;
422 if (!A->hasValidIndex() && B->hasValidIndex())
423 return false;
424 return A->getAddress() < B->getAddress();
425 });
426
427 for (const BinaryFunction *Func : SortedFunctions) {
428 if (!Func->hasValidIndex())
429 break;
430 if (Func->isPLTFunction())
431 continue;
432
433 if (FuncsFile)
434 *FuncsFile << Func->getOneName().str() << '\n';
435
436 if (LinkSectionsFile) {
437 const char *Indent = "";
438 std::vector<StringRef> AllNames = Func->getNames();
439 llvm::sort(AllNames);
440 for (StringRef Name : AllNames) {
441 const size_t SlashPos = Name.find('/');
442 if (SlashPos != std::string::npos) {
443 // Avoid duplicates for local functions.
444 if (Name.find('/', SlashPos + 1) != std::string::npos)
445 continue;
446 Name = Name.substr(0, SlashPos);
447 }
448 *LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
449 Indent = " ";
450 }
451 }
452 }
453
454 if (FuncsFile) {
455 FuncsFile->close();
456 outs() << "BOLT-INFO: dumped function order to "
457 << opts::GenerateFunctionOrderFile << '\n';
458 }
459
460 if (LinkSectionsFile) {
461 LinkSectionsFile->close();
462 outs() << "BOLT-INFO: dumped linker section order to "
463 << opts::LinkSectionsFile << '\n';
464 }
465 }
466 }
467
468 } // namespace bolt
469 } // namespace llvm
470