1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements ReorderFunctions class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/ReorderFunctions.h"
14 #include "bolt/Passes/HFSort.h"
15 #include "llvm/Support/CommandLine.h"
16 #include <fstream>
17 
18 #define DEBUG_TYPE "hfsort"
19 
20 using namespace llvm;
21 
22 namespace opts {
23 
24 extern cl::OptionCategory BoltOptCategory;
25 extern cl::opt<unsigned> Verbosity;
26 extern cl::opt<uint32_t> RandomSeed;
27 
28 extern size_t padFunction(const bolt::BinaryFunction &Function);
29 
30 cl::opt<bolt::ReorderFunctions::ReorderType>
31 ReorderFunctions("reorder-functions",
32   cl::desc("reorder and cluster functions (works only with relocations)"),
33   cl::init(bolt::ReorderFunctions::RT_NONE),
34   cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE,
35       "none",
36       "do not reorder functions"),
37     clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT,
38       "exec-count",
39       "order by execution count"),
40     clEnumValN(bolt::ReorderFunctions::RT_HFSORT,
41       "hfsort",
42       "use hfsort algorithm"),
43     clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS,
44       "hfsort+",
45       "use hfsort+ algorithm"),
46     clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
47       "pettis-hansen",
48       "use Pettis-Hansen algorithm"),
49     clEnumValN(bolt::ReorderFunctions::RT_RANDOM,
50       "random",
51       "reorder functions randomly"),
52     clEnumValN(bolt::ReorderFunctions::RT_USER,
53       "user",
54       "use function order specified by -function-order")),
55   cl::ZeroOrMore,
56   cl::cat(BoltOptCategory));
57 
58 static cl::opt<bool> ReorderFunctionsUseHotSize(
59     "reorder-functions-use-hot-size",
60     cl::desc("use a function's hot size when doing clustering"), cl::init(true),
61     cl::cat(BoltOptCategory));
62 
63 static cl::opt<std::string>
64 FunctionOrderFile("function-order",
65   cl::desc("file containing an ordered list of functions to use for function "
66            "reordering"),
67   cl::cat(BoltOptCategory));
68 
69 static cl::opt<std::string>
70 GenerateFunctionOrderFile("generate-function-order",
71   cl::desc("file to dump the ordered list of functions to use for function "
72            "reordering"),
73   cl::cat(BoltOptCategory));
74 
75 static cl::opt<std::string>
76 LinkSectionsFile("generate-link-sections",
77   cl::desc("generate a list of function sections in a format suitable for "
78            "inclusion in a linker script"),
79   cl::cat(BoltOptCategory));
80 
81 static cl::opt<bool>
82     UseEdgeCounts("use-edge-counts",
83                   cl::desc("use edge count data when doing clustering"),
84                   cl::init(true), cl::cat(BoltOptCategory));
85 
86 static cl::opt<bool>
87 CgFromPerfData("cg-from-perf-data",
88   cl::desc("use perf data directly when constructing the call graph"
89            " for stale functions"),
90   cl::init(true),
91   cl::ZeroOrMore,
92   cl::cat(BoltOptCategory));
93 
94 static cl::opt<bool> CgIgnoreRecursiveCalls(
95     "cg-ignore-recursive-calls",
96     cl::desc("ignore recursive calls when constructing the call graph"),
97     cl::init(true), cl::cat(BoltOptCategory));
98 
99 static cl::opt<bool>
100 CgUseSplitHotSize("cg-use-split-hot-size",
101   cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
102            "call graph functions"),
103   cl::init(false),
104   cl::ZeroOrMore,
105   cl::cat(BoltOptCategory));
106 
107 } // namespace opts
108 
109 namespace llvm {
110 namespace bolt {
111 
112 using NodeId = CallGraph::NodeId;
113 using Arc = CallGraph::Arc;
114 using Node = CallGraph::Node;
115 
reorder(std::vector<Cluster> && Clusters,std::map<uint64_t,BinaryFunction> & BFs)116 void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
117                                std::map<uint64_t, BinaryFunction> &BFs) {
118   std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
119   uint64_t TotalSize = 0;
120   uint32_t Index = 0;
121 
122   // Set order of hot functions based on clusters.
123   for (const Cluster &Cluster : Clusters) {
124     for (const NodeId FuncId : Cluster.targets()) {
125       Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
126       FuncAddr[FuncId] = TotalSize;
127       TotalSize += Cg.size(FuncId);
128     }
129   }
130 
131   // Assign valid index for functions with valid profile.
132   for (auto &It : BFs) {
133     BinaryFunction &BF = It.second;
134     if (!BF.hasValidIndex() && BF.hasValidProfile())
135       BF.setIndex(Index++);
136   }
137 
138   if (opts::ReorderFunctions == RT_NONE)
139     return;
140 
141   if (opts::Verbosity == 0) {
142 #ifndef NDEBUG
143     if (!DebugFlag || !isCurrentDebugType("hfsort"))
144       return;
145 #else
146     return;
147 #endif
148   }
149 
150   bool PrintDetailed = opts::Verbosity > 1;
151 #ifndef NDEBUG
152   PrintDetailed |=
153     (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
154 #endif
155   TotalSize   = 0;
156   uint64_t CurPage     = 0;
157   uint64_t Hotfuncs    = 0;
158   double TotalDistance = 0;
159   double TotalCalls    = 0;
160   double TotalCalls64B = 0;
161   double TotalCalls4KB = 0;
162   double TotalCalls2MB = 0;
163   if (PrintDetailed)
164     outs() << "BOLT-INFO: Function reordering page layout\n"
165            << "BOLT-INFO: ============== page 0 ==============\n";
166   for (Cluster &Cluster : Clusters) {
167     if (PrintDetailed)
168       outs() << format(
169           "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
170           Cluster.density(), Cluster.samples(), Cluster.size());
171 
172     for (NodeId FuncId : Cluster.targets()) {
173       if (Cg.samples(FuncId) > 0) {
174         Hotfuncs++;
175 
176         if (PrintDetailed)
177           outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " ("
178                  << Cg.size(FuncId) << ")\n";
179 
180         uint64_t Dist = 0;
181         uint64_t Calls = 0;
182         for (NodeId Dst : Cg.successors(FuncId)) {
183           if (FuncId == Dst) // ignore recursive calls in stats
184             continue;
185           const Arc &Arc = *Cg.findArc(FuncId, Dst);
186           const auto D = std::abs(FuncAddr[Arc.dst()] -
187                                   (FuncAddr[FuncId] + Arc.avgCallOffset()));
188           const double W = Arc.weight();
189           if (D < 64 && PrintDetailed && opts::Verbosity > 2)
190             outs() << "BOLT-INFO: short (" << D << "B) call:\n"
191                    << "BOLT-INFO:   Src: " << *Cg.nodeIdToFunc(FuncId) << "\n"
192                    << "BOLT-INFO:   Dst: " << *Cg.nodeIdToFunc(Dst) << "\n"
193                    << "BOLT-INFO:   Weight = " << W << "\n"
194                    << "BOLT-INFO:   AvgOffset = " << Arc.avgCallOffset() << "\n";
195           Calls += W;
196           if (D < 64)        TotalCalls64B += W;
197           if (D < 4096)      TotalCalls4KB += W;
198           if (D < (2 << 20)) TotalCalls2MB += W;
199           Dist += Arc.weight() * D;
200           if (PrintDetailed)
201             outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
202                              "weight = %.0lf, callDist = %f\n",
203                              Arc.src(),
204                              FuncAddr[Arc.src()],
205                              Arc.avgCallOffset(),
206                              Arc.dst(),
207                              FuncAddr[Arc.dst()],
208                              Arc.weight(), D);
209         }
210         TotalCalls += Calls;
211         TotalDistance += Dist;
212         TotalSize += Cg.size(FuncId);
213 
214         if (PrintDetailed) {
215           outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
216                            TotalSize, Calls ? Dist / Calls : 0)
217                  << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n';
218           const uint64_t NewPage = TotalSize / HugePageSize;
219           if (NewPage != CurPage) {
220             CurPage = NewPage;
221             outs() << format(
222                 "BOLT-INFO: ============== page %u ==============\n", CurPage);
223           }
224         }
225       }
226     }
227   }
228   outs() << "BOLT-INFO: Function reordering stats\n"
229          << format("BOLT-INFO:  Number of hot functions: %u\n"
230                    "BOLT-INFO:  Number of clusters: %lu\n",
231                    Hotfuncs, Clusters.size())
232          << format("BOLT-INFO:  Final average call distance = %.1lf "
233                    "(%.0lf / %.0lf)\n",
234                    TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance,
235                    TotalCalls)
236          << format("BOLT-INFO:  Total Calls = %.0lf\n", TotalCalls);
237   if (TotalCalls)
238     outs() << format("BOLT-INFO:  Total Calls within 64B = %.0lf (%.2lf%%)\n",
239                      TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
240            << format("BOLT-INFO:  Total Calls within 4KB = %.0lf (%.2lf%%)\n",
241                      TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
242            << format("BOLT-INFO:  Total Calls within 2MB = %.0lf (%.2lf%%)\n",
243                      TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
244 }
245 
246 namespace {
247 
readFunctionOrderFile()248 std::vector<std::string> readFunctionOrderFile() {
249   std::vector<std::string> FunctionNames;
250   std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
251   if (!FuncsFile) {
252     errs() << "Ordered functions file \"" << opts::FunctionOrderFile
253            << "\" can't be opened.\n";
254     exit(1);
255   }
256   std::string FuncName;
257   while (std::getline(FuncsFile, FuncName))
258     FunctionNames.push_back(FuncName);
259   return FunctionNames;
260 }
261 
262 }
263 
runOnFunctions(BinaryContext & BC)264 void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
265   auto &BFs = BC.getBinaryFunctions();
266   if (opts::ReorderFunctions != RT_NONE &&
267       opts::ReorderFunctions != RT_EXEC_COUNT &&
268       opts::ReorderFunctions != RT_USER) {
269     Cg = buildCallGraph(BC,
270                         [](const BinaryFunction &BF) {
271                           if (!BF.hasProfile())
272                             return true;
273                           if (BF.getState() != BinaryFunction::State::CFG)
274                             return true;
275                           return false;
276                         },
277                         opts::CgFromPerfData,
278                         false, // IncludeColdCalls
279                         opts::ReorderFunctionsUseHotSize,
280                         opts::CgUseSplitHotSize,
281                         opts::UseEdgeCounts,
282                         opts::CgIgnoreRecursiveCalls);
283     Cg.normalizeArcWeights();
284   }
285 
286   std::vector<Cluster> Clusters;
287 
288   switch (opts::ReorderFunctions) {
289   case RT_NONE:
290     break;
291   case RT_EXEC_COUNT:
292     {
293       std::vector<BinaryFunction *> SortedFunctions(BFs.size());
294       uint32_t Index = 0;
295       llvm::transform(BFs, SortedFunctions.begin(),
296                       [](std::pair<const uint64_t, BinaryFunction> &BFI) {
297                         return &BFI.second;
298                       });
299       llvm::stable_sort(SortedFunctions, [&](const BinaryFunction *A,
300                                              const BinaryFunction *B) {
301         if (A->isIgnored())
302           return false;
303         const size_t PadA = opts::padFunction(*A);
304         const size_t PadB = opts::padFunction(*B);
305         if (!PadA || !PadB) {
306           if (PadA)
307             return true;
308           if (PadB)
309             return false;
310         }
311         return !A->hasProfile() &&
312                (B->hasProfile() ||
313                 (A->getExecutionCount() > B->getExecutionCount()));
314       });
315       for (BinaryFunction *BF : SortedFunctions)
316         if (BF->hasProfile())
317           BF->setIndex(Index++);
318     }
319     break;
320   case RT_HFSORT:
321     Clusters = clusterize(Cg);
322     break;
323   case RT_HFSORT_PLUS:
324     Clusters = hfsortPlus(Cg);
325     break;
326   case RT_PETTIS_HANSEN:
327     Clusters = pettisAndHansen(Cg);
328     break;
329   case RT_RANDOM:
330     std::srand(opts::RandomSeed);
331     Clusters = randomClusters(Cg);
332     break;
333   case RT_USER:
334     {
335       uint32_t Index = 0;
336       for (const std::string &Function : readFunctionOrderFile()) {
337         std::vector<uint64_t> FuncAddrs;
338 
339         BinaryData *BD = BC.getBinaryDataByName(Function);
340         if (!BD) {
341           uint32_t LocalID = 1;
342           while(1) {
343             // If we can't find the main symbol name, look for alternates.
344             const std::string FuncName =
345                 Function + "/" + std::to_string(LocalID);
346             BD = BC.getBinaryDataByName(FuncName);
347             if (BD)
348               FuncAddrs.push_back(BD->getAddress());
349             else
350               break;
351             LocalID++;
352           }
353         } else {
354           FuncAddrs.push_back(BD->getAddress());
355         }
356 
357         if (FuncAddrs.empty()) {
358           errs() << "BOLT-WARNING: Reorder functions: can't find function for "
359                  << Function << ".\n";
360           continue;
361         }
362 
363         for (const uint64_t FuncAddr : FuncAddrs) {
364           const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr);
365           assert(FuncBD);
366 
367           BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol());
368           if (!BF) {
369             errs() << "BOLT-WARNING: Reorder functions: can't find function for "
370                    << Function << ".\n";
371             break;
372           }
373           if (!BF->hasValidIndex())
374             BF->setIndex(Index++);
375           else if (opts::Verbosity > 0)
376             errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
377                    << ".\n";
378         }
379       }
380     }
381     break;
382   }
383 
384   reorder(std::move(Clusters), BFs);
385 
386   std::unique_ptr<std::ofstream> FuncsFile;
387   if (!opts::GenerateFunctionOrderFile.empty()) {
388     FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
389                                                 std::ios::out);
390     if (!FuncsFile) {
391       errs() << "BOLT-ERROR: ordered functions file "
392              << opts::GenerateFunctionOrderFile << " cannot be opened\n";
393       exit(1);
394     }
395   }
396 
397   std::unique_ptr<std::ofstream> LinkSectionsFile;
398   if (!opts::LinkSectionsFile.empty()) {
399     LinkSectionsFile =
400         std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out);
401     if (!LinkSectionsFile) {
402       errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
403              << " cannot be opened\n";
404       exit(1);
405     }
406   }
407 
408   if (FuncsFile || LinkSectionsFile) {
409     std::vector<BinaryFunction *> SortedFunctions(BFs.size());
410     llvm::transform(BFs, SortedFunctions.begin(),
411                     [](std::pair<const uint64_t, BinaryFunction> &BFI) {
412                       return &BFI.second;
413                     });
414 
415     // Sort functions by index.
416     llvm::stable_sort(SortedFunctions,
417                       [](const BinaryFunction *A, const BinaryFunction *B) {
418                         if (A->hasValidIndex() && B->hasValidIndex())
419                           return A->getIndex() < B->getIndex();
420                         if (A->hasValidIndex() && !B->hasValidIndex())
421                           return true;
422                         if (!A->hasValidIndex() && B->hasValidIndex())
423                           return false;
424                         return A->getAddress() < B->getAddress();
425                       });
426 
427     for (const BinaryFunction *Func : SortedFunctions) {
428       if (!Func->hasValidIndex())
429         break;
430       if (Func->isPLTFunction())
431         continue;
432 
433       if (FuncsFile)
434         *FuncsFile << Func->getOneName().str() << '\n';
435 
436       if (LinkSectionsFile) {
437         const char *Indent = "";
438         std::vector<StringRef> AllNames = Func->getNames();
439         llvm::sort(AllNames);
440         for (StringRef Name : AllNames) {
441           const size_t SlashPos = Name.find('/');
442           if (SlashPos != std::string::npos) {
443             // Avoid duplicates for local functions.
444             if (Name.find('/', SlashPos + 1) != std::string::npos)
445               continue;
446             Name = Name.substr(0, SlashPos);
447           }
448           *LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
449           Indent = " ";
450         }
451       }
452     }
453 
454     if (FuncsFile) {
455       FuncsFile->close();
456       outs() << "BOLT-INFO: dumped function order to "
457              << opts::GenerateFunctionOrderFile << '\n';
458     }
459 
460     if (LinkSectionsFile) {
461       LinkSectionsFile->close();
462       outs() << "BOLT-INFO: dumped linker section order to "
463              << opts::LinkSectionsFile << '\n';
464     }
465   }
466 }
467 
468 } // namespace bolt
469 } // namespace llvm
470