1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements ReorderFunctions class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/ReorderFunctions.h"
14 #include "bolt/Passes/HFSort.h"
15 #include "llvm/Support/CommandLine.h"
16 #include <fstream>
17 
18 #define DEBUG_TYPE "hfsort"
19 
20 using namespace llvm;
21 
22 namespace opts {
23 
24 extern cl::OptionCategory BoltOptCategory;
25 extern cl::opt<unsigned> Verbosity;
26 extern cl::opt<uint32_t> RandomSeed;
27 
28 extern size_t padFunction(const bolt::BinaryFunction &Function);
29 
30 cl::opt<bolt::ReorderFunctions::ReorderType>
31 ReorderFunctions("reorder-functions",
32   cl::desc("reorder and cluster functions (works only with relocations)"),
33   cl::init(bolt::ReorderFunctions::RT_NONE),
34   cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE,
35       "none",
36       "do not reorder functions"),
37     clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT,
38       "exec-count",
39       "order by execution count"),
40     clEnumValN(bolt::ReorderFunctions::RT_HFSORT,
41       "hfsort",
42       "use hfsort algorithm"),
43     clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS,
44       "hfsort+",
45       "use hfsort+ algorithm"),
46     clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
47       "pettis-hansen",
48       "use Pettis-Hansen algorithm"),
49     clEnumValN(bolt::ReorderFunctions::RT_RANDOM,
50       "random",
51       "reorder functions randomly"),
52     clEnumValN(bolt::ReorderFunctions::RT_USER,
53       "user",
54       "use function order specified by -function-order")),
55   cl::ZeroOrMore,
56   cl::cat(BoltOptCategory));
57 
58 static cl::opt<bool>
59 ReorderFunctionsUseHotSize("reorder-functions-use-hot-size",
60   cl::desc("use a function's hot size when doing clustering"),
61   cl::init(true),
62   cl::ZeroOrMore,
63   cl::cat(BoltOptCategory));
64 
65 static cl::opt<std::string>
66 FunctionOrderFile("function-order",
67   cl::desc("file containing an ordered list of functions to use for function "
68            "reordering"),
69   cl::cat(BoltOptCategory));
70 
71 static cl::opt<std::string>
72 GenerateFunctionOrderFile("generate-function-order",
73   cl::desc("file to dump the ordered list of functions to use for function "
74            "reordering"),
75   cl::cat(BoltOptCategory));
76 
77 static cl::opt<std::string>
78 LinkSectionsFile("generate-link-sections",
79   cl::desc("generate a list of function sections in a format suitable for "
80            "inclusion in a linker script"),
81   cl::cat(BoltOptCategory));
82 
83 static cl::opt<bool>
84 UseEdgeCounts("use-edge-counts",
85   cl::desc("use edge count data when doing clustering"),
86   cl::init(true),
87   cl::ZeroOrMore,
88   cl::cat(BoltOptCategory));
89 
90 static cl::opt<bool>
91 CgFromPerfData("cg-from-perf-data",
92   cl::desc("use perf data directly when constructing the call graph"
93            " for stale functions"),
94   cl::init(true),
95   cl::ZeroOrMore,
96   cl::cat(BoltOptCategory));
97 
98 static cl::opt<bool>
99 CgIgnoreRecursiveCalls("cg-ignore-recursive-calls",
100   cl::desc("ignore recursive calls when constructing the call graph"),
101   cl::init(true),
102   cl::ZeroOrMore,
103   cl::cat(BoltOptCategory));
104 
105 static cl::opt<bool>
106 CgUseSplitHotSize("cg-use-split-hot-size",
107   cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
108            "call graph functions"),
109   cl::init(false),
110   cl::ZeroOrMore,
111   cl::cat(BoltOptCategory));
112 
113 } // namespace opts
114 
115 namespace llvm {
116 namespace bolt {
117 
118 using NodeId = CallGraph::NodeId;
119 using Arc = CallGraph::Arc;
120 using Node = CallGraph::Node;
121 
122 void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
123                                std::map<uint64_t, BinaryFunction> &BFs) {
124   std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
125   uint64_t TotalSize = 0;
126   uint32_t Index = 0;
127 
128   // Set order of hot functions based on clusters.
129   for (const Cluster &Cluster : Clusters) {
130     for (const NodeId FuncId : Cluster.targets()) {
131       Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
132       FuncAddr[FuncId] = TotalSize;
133       TotalSize += Cg.size(FuncId);
134     }
135   }
136 
137   if (opts::ReorderFunctions == RT_NONE)
138     return;
139 
140   if (opts::Verbosity == 0) {
141 #ifndef NDEBUG
142     if (!DebugFlag || !isCurrentDebugType("hfsort"))
143       return;
144 #else
145     return;
146 #endif
147   }
148 
149   bool PrintDetailed = opts::Verbosity > 1;
150 #ifndef NDEBUG
151   PrintDetailed |=
152     (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
153 #endif
154   TotalSize   = 0;
155   uint64_t CurPage     = 0;
156   uint64_t Hotfuncs    = 0;
157   double TotalDistance = 0;
158   double TotalCalls    = 0;
159   double TotalCalls64B = 0;
160   double TotalCalls4KB = 0;
161   double TotalCalls2MB = 0;
162   if (PrintDetailed)
163     outs() << "BOLT-INFO: Function reordering page layout\n"
164            << "BOLT-INFO: ============== page 0 ==============\n";
165   for (Cluster &Cluster : Clusters) {
166     if (PrintDetailed)
167       outs() << format(
168           "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
169           Cluster.density(), Cluster.samples(), Cluster.size());
170 
171     for (NodeId FuncId : Cluster.targets()) {
172       if (Cg.samples(FuncId) > 0) {
173         Hotfuncs++;
174 
175         if (PrintDetailed)
176           outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " ("
177                  << Cg.size(FuncId) << ")\n";
178 
179         uint64_t Dist = 0;
180         uint64_t Calls = 0;
181         for (NodeId Dst : Cg.successors(FuncId)) {
182           if (FuncId == Dst) // ignore recursive calls in stats
183             continue;
184           const Arc &Arc = *Cg.findArc(FuncId, Dst);
185           const auto D = std::abs(FuncAddr[Arc.dst()] -
186                                   (FuncAddr[FuncId] + Arc.avgCallOffset()));
187           const double W = Arc.weight();
188           if (D < 64 && PrintDetailed && opts::Verbosity > 2)
189             outs() << "BOLT-INFO: short (" << D << "B) call:\n"
190                    << "BOLT-INFO:   Src: " << *Cg.nodeIdToFunc(FuncId) << "\n"
191                    << "BOLT-INFO:   Dst: " << *Cg.nodeIdToFunc(Dst) << "\n"
192                    << "BOLT-INFO:   Weight = " << W << "\n"
193                    << "BOLT-INFO:   AvgOffset = " << Arc.avgCallOffset() << "\n";
194           Calls += W;
195           if (D < 64)        TotalCalls64B += W;
196           if (D < 4096)      TotalCalls4KB += W;
197           if (D < (2 << 20)) TotalCalls2MB += W;
198           Dist += Arc.weight() * D;
199           if (PrintDetailed)
200             outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
201                              "weight = %.0lf, callDist = %f\n",
202                              Arc.src(),
203                              FuncAddr[Arc.src()],
204                              Arc.avgCallOffset(),
205                              Arc.dst(),
206                              FuncAddr[Arc.dst()],
207                              Arc.weight(), D);
208         }
209         TotalCalls += Calls;
210         TotalDistance += Dist;
211         TotalSize += Cg.size(FuncId);
212 
213         if (PrintDetailed) {
214           outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
215                            TotalSize, Calls ? Dist / Calls : 0)
216                  << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n';
217           const uint64_t NewPage = TotalSize / HugePageSize;
218           if (NewPage != CurPage) {
219             CurPage = NewPage;
220             outs() << format(
221                 "BOLT-INFO: ============== page %u ==============\n", CurPage);
222           }
223         }
224       }
225     }
226   }
227   outs() << "BOLT-INFO: Function reordering stats\n"
228          << format("BOLT-INFO:  Number of hot functions: %u\n"
229                    "BOLT-INFO:  Number of clusters: %lu\n",
230                    Hotfuncs, Clusters.size())
231          << format("BOLT-INFO:  Final average call distance = %.1lf "
232                    "(%.0lf / %.0lf)\n",
233                    TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance,
234                    TotalCalls)
235          << format("BOLT-INFO:  Total Calls = %.0lf\n", TotalCalls);
236   if (TotalCalls)
237     outs() << format("BOLT-INFO:  Total Calls within 64B = %.0lf (%.2lf%%)\n",
238                      TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
239            << format("BOLT-INFO:  Total Calls within 4KB = %.0lf (%.2lf%%)\n",
240                      TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
241            << format("BOLT-INFO:  Total Calls within 2MB = %.0lf (%.2lf%%)\n",
242                      TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
243 }
244 
245 namespace {
246 
247 std::vector<std::string> readFunctionOrderFile() {
248   std::vector<std::string> FunctionNames;
249   std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
250   if (!FuncsFile) {
251     errs() << "Ordered functions file \"" << opts::FunctionOrderFile
252            << "\" can't be opened.\n";
253     exit(1);
254   }
255   std::string FuncName;
256   while (std::getline(FuncsFile, FuncName))
257     FunctionNames.push_back(FuncName);
258   return FunctionNames;
259 }
260 
261 }
262 
263 void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
264   auto &BFs = BC.getBinaryFunctions();
265   if (opts::ReorderFunctions != RT_NONE &&
266       opts::ReorderFunctions != RT_EXEC_COUNT &&
267       opts::ReorderFunctions != RT_USER) {
268     Cg = buildCallGraph(BC,
269                         [](const BinaryFunction &BF) {
270                           if (!BF.hasProfile())
271                             return true;
272                           if (BF.getState() != BinaryFunction::State::CFG)
273                             return true;
274                           return false;
275                         },
276                         opts::CgFromPerfData,
277                         false, // IncludeColdCalls
278                         opts::ReorderFunctionsUseHotSize,
279                         opts::CgUseSplitHotSize,
280                         opts::UseEdgeCounts,
281                         opts::CgIgnoreRecursiveCalls);
282     Cg.normalizeArcWeights();
283   }
284 
285   std::vector<Cluster> Clusters;
286 
287   switch (opts::ReorderFunctions) {
288   case RT_NONE:
289     break;
290   case RT_EXEC_COUNT:
291     {
292       std::vector<BinaryFunction *> SortedFunctions(BFs.size());
293       uint32_t Index = 0;
294       std::transform(BFs.begin(),
295                      BFs.end(),
296                      SortedFunctions.begin(),
297                      [](std::pair<const uint64_t, BinaryFunction> &BFI) {
298                        return &BFI.second;
299                      });
300       std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
301                        [&](const BinaryFunction *A, const BinaryFunction *B) {
302                          if (A->isIgnored())
303                            return false;
304                          const size_t PadA = opts::padFunction(*A);
305                          const size_t PadB = opts::padFunction(*B);
306                          if (!PadA || !PadB) {
307                            if (PadA)
308                              return true;
309                            if (PadB)
310                              return false;
311                          }
312                          return !A->hasProfile() &&
313                            (B->hasProfile() ||
314                             (A->getExecutionCount() > B->getExecutionCount()));
315                        });
316       for (BinaryFunction *BF : SortedFunctions)
317         if (BF->hasProfile())
318           BF->setIndex(Index++);
319     }
320     break;
321   case RT_HFSORT:
322     Clusters = clusterize(Cg);
323     break;
324   case RT_HFSORT_PLUS:
325     Clusters = hfsortPlus(Cg);
326     break;
327   case RT_PETTIS_HANSEN:
328     Clusters = pettisAndHansen(Cg);
329     break;
330   case RT_RANDOM:
331     std::srand(opts::RandomSeed);
332     Clusters = randomClusters(Cg);
333     break;
334   case RT_USER:
335     {
336       uint32_t Index = 0;
337       for (const std::string &Function : readFunctionOrderFile()) {
338         std::vector<uint64_t> FuncAddrs;
339 
340         BinaryData *BD = BC.getBinaryDataByName(Function);
341         if (!BD) {
342           uint32_t LocalID = 1;
343           while(1) {
344             // If we can't find the main symbol name, look for alternates.
345             const std::string FuncName =
346                 Function + "/" + std::to_string(LocalID);
347             BD = BC.getBinaryDataByName(FuncName);
348             if (BD)
349               FuncAddrs.push_back(BD->getAddress());
350             else
351               break;
352             LocalID++;
353           }
354         } else {
355           FuncAddrs.push_back(BD->getAddress());
356         }
357 
358         if (FuncAddrs.empty()) {
359           errs() << "BOLT-WARNING: Reorder functions: can't find function for "
360                  << Function << ".\n";
361           continue;
362         }
363 
364         for (const uint64_t FuncAddr : FuncAddrs) {
365           const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr);
366           assert(FuncBD);
367 
368           BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol());
369           if (!BF) {
370             errs() << "BOLT-WARNING: Reorder functions: can't find function for "
371                    << Function << ".\n";
372             break;
373           }
374           if (!BF->hasValidIndex())
375             BF->setIndex(Index++);
376           else if (opts::Verbosity > 0)
377             errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
378                    << ".\n";
379         }
380       }
381     }
382     break;
383   }
384 
385   reorder(std::move(Clusters), BFs);
386 
387   std::unique_ptr<std::ofstream> FuncsFile;
388   if (!opts::GenerateFunctionOrderFile.empty()) {
389     FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
390                                                 std::ios::out);
391     if (!FuncsFile) {
392       errs() << "BOLT-ERROR: ordered functions file "
393              << opts::GenerateFunctionOrderFile << " cannot be opened\n";
394       exit(1);
395     }
396   }
397 
398   std::unique_ptr<std::ofstream> LinkSectionsFile;
399   if (!opts::LinkSectionsFile.empty()) {
400     LinkSectionsFile =
401         std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out);
402     if (!LinkSectionsFile) {
403       errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
404              << " cannot be opened\n";
405       exit(1);
406     }
407   }
408 
409   if (FuncsFile || LinkSectionsFile) {
410     std::vector<BinaryFunction *> SortedFunctions(BFs.size());
411     std::transform(BFs.begin(), BFs.end(), SortedFunctions.begin(),
412                    [](std::pair<const uint64_t, BinaryFunction> &BFI) {
413                      return &BFI.second;
414                    });
415 
416     // Sort functions by index.
417     std::stable_sort(
418       SortedFunctions.begin(),
419       SortedFunctions.end(),
420       [](const BinaryFunction *A, const BinaryFunction *B) {
421         if (A->hasValidIndex() && B->hasValidIndex())
422           return A->getIndex() < B->getIndex();
423         if (A->hasValidIndex() && !B->hasValidIndex())
424           return true;
425         if (!A->hasValidIndex() && B->hasValidIndex())
426           return false;
427         return A->getAddress() < B->getAddress();
428       });
429 
430     for (const BinaryFunction *Func : SortedFunctions) {
431       if (!Func->hasValidIndex())
432         break;
433       if (Func->isPLTFunction())
434         continue;
435 
436       if (FuncsFile)
437         *FuncsFile << Func->getOneName().str() << '\n';
438 
439       if (LinkSectionsFile) {
440         const char *Indent = "";
441         std::vector<StringRef> AllNames = Func->getNames();
442         std::sort(AllNames.begin(), AllNames.end());
443         for (StringRef Name : AllNames) {
444           const size_t SlashPos = Name.find('/');
445           if (SlashPos != std::string::npos) {
446             // Avoid duplicates for local functions.
447             if (Name.find('/', SlashPos + 1) != std::string::npos)
448               continue;
449             Name = Name.substr(0, SlashPos);
450           }
451           *LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
452           Indent = " ";
453         }
454       }
455     }
456 
457     if (FuncsFile) {
458       FuncsFile->close();
459       outs() << "BOLT-INFO: dumped function order to "
460              << opts::GenerateFunctionOrderFile << '\n';
461     }
462 
463     if (LinkSectionsFile) {
464       LinkSectionsFile->close();
465       outs() << "BOLT-INFO: dumped linker section order to "
466              << opts::LinkSectionsFile << '\n';
467     }
468   }
469 }
470 
471 } // namespace bolt
472 } // namespace llvm
473