1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Profile/BoltAddressTranslation.h"
18 #include "bolt/Profile/Heatmap.h"
19 #include "bolt/Utils/CommandLineOpts.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/ScopeExit.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/Process.h"
27 #include "llvm/Support/Program.h"
28 #include "llvm/Support/Regex.h"
29 #include "llvm/Support/Timer.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <map>
32 #include <unordered_map>
33 #include <utility>
34 
35 #define DEBUG_TYPE "aggregator"
36 
37 using namespace llvm;
38 using namespace bolt;
39 
40 namespace opts {
41 
42 static cl::opt<bool>
43     BasicAggregation("nl",
44                      cl::desc("aggregate basic samples (without LBR info)"),
45                      cl::cat(AggregatorCategory));
46 
47 static cl::opt<bool>
48 FilterMemProfile("filter-mem-profile",
49   cl::desc("if processing a memory profile, filter out stack or heap accesses "
50            "that won't be useful for BOLT to reduce profile file size"),
51   cl::init(true),
52   cl::cat(AggregatorCategory));
53 
54 static cl::opt<unsigned long long>
55 FilterPID("pid",
56   cl::desc("only use samples from process with specified PID"),
57   cl::init(0),
58   cl::Optional,
59   cl::cat(AggregatorCategory));
60 
61 static cl::opt<bool>
62 IgnoreBuildID("ignore-build-id",
63   cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
64   cl::init(false),
65   cl::cat(AggregatorCategory));
66 
67 static cl::opt<bool> IgnoreInterruptLBR(
68     "ignore-interrupt-lbr",
69     cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
70     cl::init(true), cl::cat(AggregatorCategory));
71 
72 static cl::opt<unsigned long long>
73 MaxSamples("max-samples",
74   cl::init(-1ULL),
75   cl::desc("maximum number of samples to read from LBR profile"),
76   cl::Optional,
77   cl::Hidden,
78   cl::cat(AggregatorCategory));
79 
80 cl::opt<bool> ReadPreAggregated(
81     "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
82     cl::cat(AggregatorCategory));
83 
84 static cl::opt<bool>
85 TimeAggregator("time-aggr",
86   cl::desc("time BOLT aggregator"),
87   cl::init(false),
88   cl::ZeroOrMore,
89   cl::cat(AggregatorCategory));
90 
91 static cl::opt<bool>
92     UseEventPC("use-event-pc",
93                cl::desc("use event PC in combination with LBR sampling"),
94                cl::cat(AggregatorCategory));
95 
96 static cl::opt<bool> WriteAutoFDOData(
97     "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
98     cl::cat(AggregatorCategory));
99 
100 } // namespace opts
101 
102 namespace {
103 
104 const char TimerGroupName[] = "aggregator";
105 const char TimerGroupDesc[] = "Aggregator";
106 
getTextSections(const BinaryContext * BC)107 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
108   std::vector<SectionNameAndRange> sections;
109   for (BinarySection &Section : BC->sections()) {
110     if (!Section.isText())
111       continue;
112     if (Section.getSize() == 0)
113       continue;
114     sections.push_back(
115         {Section.getName(), Section.getAddress(), Section.getEndAddress()});
116   }
117   llvm::sort(sections,
118              [](const SectionNameAndRange &A, const SectionNameAndRange &B) {
119                return A.BeginAddress < B.BeginAddress;
120              });
121   return sections;
122 }
123 }
124 
125 constexpr uint64_t DataAggregator::KernelBaseAddr;
126 
~DataAggregator()127 DataAggregator::~DataAggregator() { deleteTempFiles(); }
128 
129 namespace {
deleteTempFile(const std::string & FileName)130 void deleteTempFile(const std::string &FileName) {
131   if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
132     errs() << "PERF2BOLT: failed to delete temporary file " << FileName
133            << " with error " << Errc.message() << "\n";
134 }
135 }
136 
deleteTempFiles()137 void DataAggregator::deleteTempFiles() {
138   for (std::string &FileName : TempFiles)
139     deleteTempFile(FileName);
140   TempFiles.clear();
141 }
142 
findPerfExecutable()143 void DataAggregator::findPerfExecutable() {
144   Optional<std::string> PerfExecutable =
145       sys::Process::FindInEnvPath("PATH", "perf");
146   if (!PerfExecutable) {
147     outs() << "PERF2BOLT: No perf executable found!\n";
148     exit(1);
149   }
150   PerfPath = *PerfExecutable;
151 }
152 
start()153 void DataAggregator::start() {
154   outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
155 
156   // Don't launch perf for pre-aggregated files
157   if (opts::ReadPreAggregated)
158     return;
159 
160   findPerfExecutable();
161 
162   if (opts::BasicAggregation)
163     launchPerfProcess("events without LBR",
164                       MainEventsPPI,
165                       "script -F pid,event,ip",
166                       /*Wait = */false);
167   else
168     launchPerfProcess("branch events",
169                       MainEventsPPI,
170                       "script -F pid,ip,brstack",
171                       /*Wait = */false);
172 
173   // Note: we launch script for mem events regardless of the option, as the
174   //       command fails fairly fast if mem events were not collected.
175   launchPerfProcess("mem events",
176                     MemEventsPPI,
177                     "script -F pid,event,addr,ip",
178                     /*Wait = */false);
179 
180   launchPerfProcess("process events",
181                     MMapEventsPPI,
182                     "script --show-mmap-events",
183                     /*Wait = */false);
184 
185   launchPerfProcess("task events",
186                     TaskEventsPPI,
187                     "script --show-task-events",
188                     /*Wait = */false);
189 }
190 
abort()191 void DataAggregator::abort() {
192   if (opts::ReadPreAggregated)
193     return;
194 
195   std::string Error;
196 
197   // Kill subprocesses in case they are not finished
198   sys::Wait(TaskEventsPPI.PI, 1, false, &Error);
199   sys::Wait(MMapEventsPPI.PI, 1, false, &Error);
200   sys::Wait(MainEventsPPI.PI, 1, false, &Error);
201   sys::Wait(MemEventsPPI.PI, 1, false, &Error);
202 
203   deleteTempFiles();
204 
205   exit(1);
206 }
207 
launchPerfProcess(StringRef Name,PerfProcessInfo & PPI,const char * ArgsString,bool Wait)208 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
209                                        const char *ArgsString, bool Wait) {
210   SmallVector<StringRef, 4> Argv;
211 
212   outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
213   Argv.push_back(PerfPath.data());
214 
215   char *WritableArgsString = strdup(ArgsString);
216   char *Str = WritableArgsString;
217   do {
218     Argv.push_back(Str);
219     while (*Str && *Str != ' ')
220       ++Str;
221     if (!*Str)
222       break;
223     *Str++ = 0;
224   } while (true);
225 
226   Argv.push_back("-f");
227   Argv.push_back("-i");
228   Argv.push_back(Filename.c_str());
229 
230   if (std::error_code Errc =
231           sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
232     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
233            << " with error " << Errc.message() << "\n";
234     exit(1);
235   }
236   TempFiles.push_back(PPI.StdoutPath.data());
237 
238   if (std::error_code Errc =
239           sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
240     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
241            << " with error " << Errc.message() << "\n";
242     exit(1);
243   }
244   TempFiles.push_back(PPI.StderrPath.data());
245 
246   Optional<StringRef> Redirects[] = {
247       llvm::None,                        // Stdin
248       StringRef(PPI.StdoutPath.data()),  // Stdout
249       StringRef(PPI.StderrPath.data())}; // Stderr
250 
251   LLVM_DEBUG({
252     dbgs() << "Launching perf: ";
253     for (StringRef Arg : Argv)
254       dbgs() << Arg << " ";
255     dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
256            << "\n";
257   });
258 
259   if (Wait)
260     PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
261                                             /*envp*/ llvm::None, Redirects);
262   else
263     PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None,
264                                 Redirects);
265 
266   free(WritableArgsString);
267 }
268 
processFileBuildID(StringRef FileBuildID)269 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
270   PerfProcessInfo BuildIDProcessInfo;
271   launchPerfProcess("buildid list",
272                     BuildIDProcessInfo,
273                     "buildid-list",
274                     /*Wait = */true);
275 
276   if (BuildIDProcessInfo.PI.ReturnCode != 0) {
277     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
278         MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
279     StringRef ErrBuf = (*MB)->getBuffer();
280 
281     errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
282            << '\n';
283     errs() << ErrBuf;
284     return;
285   }
286 
287   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
288       MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
289   if (std::error_code EC = MB.getError()) {
290     errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
291            << EC.message() << "\n";
292     return;
293   }
294 
295   FileBuf = std::move(*MB);
296   ParsingBuf = FileBuf->getBuffer();
297 
298   Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
299   if (!FileName) {
300     if (hasAllBuildIDs()) {
301       errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
302                 "This indicates the input binary supplied for data aggregation "
303                 "is not the same recorded by perf when collecting profiling "
304                 "data, or there were no samples recorded for the binary. "
305                 "Use -ignore-build-id option to override.\n";
306       if (!opts::IgnoreBuildID)
307         abort();
308     } else {
309       errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
310                 "data was recorded without it\n";
311       return;
312     }
313   } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
314     errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
315     BuildIDBinaryName = std::string(*FileName);
316   } else {
317     outs() << "PERF2BOLT: matched build-id and file name\n";
318   }
319 
320   return;
321 }
322 
checkPerfDataMagic(StringRef FileName)323 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
324   if (opts::ReadPreAggregated)
325     return true;
326 
327   Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
328   if (!FD)
329     return false;
330 
331   char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
332 
333   auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
334   Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
335       *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0);
336   if (!BytesRead || *BytesRead != 7)
337     return false;
338 
339   if (strncmp(Buf, "PERFILE", 7) == 0)
340     return true;
341   return false;
342 }
343 
parsePreAggregated()344 void DataAggregator::parsePreAggregated() {
345   std::string Error;
346 
347   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
348       MemoryBuffer::getFileOrSTDIN(Filename);
349   if (std::error_code EC = MB.getError()) {
350     errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
351            << EC.message() << "\n";
352     exit(1);
353   }
354 
355   FileBuf = std::move(*MB);
356   ParsingBuf = FileBuf->getBuffer();
357   Col = 0;
358   Line = 1;
359   if (parsePreAggregatedLBRSamples()) {
360     errs() << "PERF2BOLT: failed to parse samples\n";
361     exit(1);
362   }
363 }
364 
writeAutoFDOData(StringRef OutputFilename)365 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
366   outs() << "PERF2BOLT: writing data for autofdo tools...\n";
367   NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
368                      TimerGroupDesc, opts::TimeAggregator);
369 
370   std::error_code EC;
371   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
372   if (EC)
373     return EC;
374 
375   // Format:
376   // number of unique traces
377   // from_1-to_1:count_1
378   // from_2-to_2:count_2
379   // ......
380   // from_n-to_n:count_n
381   // number of unique sample addresses
382   // addr_1:count_1
383   // addr_2:count_2
384   // ......
385   // addr_n:count_n
386   // number of unique LBR entries
387   // src_1->dst_1:count_1
388   // src_2->dst_2:count_2
389   // ......
390   // src_n->dst_n:count_n
391 
392   const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
393 
394   // AutoFDO addresses are relative to the first allocated loadable program
395   // segment
396   auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
397     if (Address < FirstAllocAddress)
398       return 0;
399     return Address - FirstAllocAddress;
400   };
401 
402   OutFile << FallthroughLBRs.size() << "\n";
403   for (const auto &AggrLBR : FallthroughLBRs) {
404     const Trace &Trace = AggrLBR.first;
405     const FTInfo &Info = AggrLBR.second;
406     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-"
407             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
408             << (Info.InternCount + Info.ExternCount) << "\n";
409   }
410 
411   OutFile << BasicSamples.size() << "\n";
412   for (const auto &Sample : BasicSamples) {
413     uint64_t PC = Sample.first;
414     uint64_t HitCount = Sample.second;
415     OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n";
416   }
417 
418   OutFile << BranchLBRs.size() << "\n";
419   for (const auto &AggrLBR : BranchLBRs) {
420     const Trace &Trace = AggrLBR.first;
421     const BranchInfo &Info = AggrLBR.second;
422     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->"
423             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
424             << Info.TakenCount << "\n";
425   }
426 
427   outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
428          << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
429          << " unique branches to " << OutputFilename << "\n";
430 
431   return std::error_code();
432 }
433 
filterBinaryMMapInfo()434 void DataAggregator::filterBinaryMMapInfo() {
435   if (opts::FilterPID) {
436     auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
437     if (MMapInfoIter != BinaryMMapInfo.end()) {
438       MMapInfo MMap = MMapInfoIter->second;
439       BinaryMMapInfo.clear();
440       BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
441     } else {
442       if (errs().has_colors())
443         errs().changeColor(raw_ostream::RED);
444       errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
445              << opts::FilterPID << "\""
446              << " for binary \"" << BC->getFilename() << "\".";
447       assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
448       errs() << " Profile for the following process is available:\n";
449       for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
450         outs() << "  " << MMI.second.PID
451                << (MMI.second.Forked ? " (forked)\n" : "\n");
452 
453       if (errs().has_colors())
454         errs().resetColor();
455 
456       exit(1);
457     }
458   }
459 }
460 
preprocessProfile(BinaryContext & BC)461 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
462   this->BC = &BC;
463 
464   if (opts::ReadPreAggregated) {
465     parsePreAggregated();
466     return Error::success();
467   }
468 
469   if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) {
470     outs() << "BOLT-INFO: binary build-id is:     " << *FileBuildID << "\n";
471     processFileBuildID(*FileBuildID);
472   } else {
473     errs() << "BOLT-WARNING: build-id will not be checked because we could "
474               "not read one from input binary\n";
475   }
476 
477   auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) {
478     std::string Error;
479     outs() << "PERF2BOLT: waiting for perf " << Name
480            << " collection to finish...\n";
481     sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error);
482 
483     if (!Error.empty()) {
484       errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
485       deleteTempFiles();
486       exit(1);
487     }
488 
489     if (PI.ReturnCode != 0) {
490       ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
491           MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
492       StringRef ErrBuf = (*ErrorMB)->getBuffer();
493 
494       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
495       errs() << ErrBuf;
496       deleteTempFiles();
497       exit(1);
498     }
499 
500     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
501         MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
502     if (std::error_code EC = MB.getError()) {
503       errs() << "Cannot open " << Process.StdoutPath.data() << ": "
504              << EC.message() << "\n";
505       deleteTempFiles();
506       exit(1);
507     }
508 
509     FileBuf = std::move(*MB);
510     ParsingBuf = FileBuf->getBuffer();
511     Col = 0;
512     Line = 1;
513   };
514 
515   if (opts::LinuxKernelMode) {
516     // Current MMap parsing logic does not work with linux kernel.
517     // MMap entries for linux kernel uses PERF_RECORD_MMAP
518     // format instead of typical PERF_RECORD_MMAP2 format.
519     // Since linux kernel address mapping is absolute (same as
520     // in the ELF file), we avoid parsing MMap in linux kernel mode.
521     // While generating optimized linux kernel binary, we may need
522     // to parse MMap entries.
523 
524     // In linux kernel mode, we analyze and optimize
525     // all linux kernel binary instructions, irrespective
526     // of whether they are due to system calls or due to
527     // interrupts. Therefore, we cannot ignore interrupt
528     // in Linux kernel mode.
529     opts::IgnoreInterruptLBR = false;
530   } else {
531     prepareToParse("mmap events", MMapEventsPPI);
532     if (parseMMapEvents())
533       errs() << "PERF2BOLT: failed to parse mmap events\n";
534   }
535 
536   prepareToParse("task events", TaskEventsPPI);
537   if (parseTaskEvents())
538     errs() << "PERF2BOLT: failed to parse task events\n";
539 
540   filterBinaryMMapInfo();
541   prepareToParse("events", MainEventsPPI);
542 
543   if (opts::HeatmapMode) {
544     if (std::error_code EC = printLBRHeatMap()) {
545       errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
546       exit(1);
547     }
548     exit(0);
549   }
550 
551   if ((!opts::BasicAggregation && parseBranchEvents()) ||
552       (opts::BasicAggregation && parseBasicEvents()))
553     errs() << "PERF2BOLT: failed to parse samples\n";
554 
555   // We can finish early if the goal is just to generate data for autofdo
556   if (opts::WriteAutoFDOData) {
557     if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
558       errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
559 
560     deleteTempFiles();
561     exit(0);
562   }
563 
564   // Special handling for memory events
565   std::string Error;
566   sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error);
567   if (PI.ReturnCode != 0) {
568     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
569         MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data());
570     StringRef ErrBuf = (*MB)->getBuffer();
571 
572     deleteTempFiles();
573 
574     Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
575                  "Cannot print 'addr' field.");
576     if (!NoData.match(ErrBuf)) {
577       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
578       errs() << ErrBuf;
579       exit(1);
580     }
581     return Error::success();
582   }
583 
584   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
585       MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data());
586   if (std::error_code EC = MB.getError()) {
587     errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": "
588            << EC.message() << "\n";
589     deleteTempFiles();
590     exit(1);
591   }
592 
593   FileBuf = std::move(*MB);
594   ParsingBuf = FileBuf->getBuffer();
595   Col = 0;
596   Line = 1;
597   if (const std::error_code EC = parseMemEvents())
598     errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
599            << '\n';
600 
601   deleteTempFiles();
602 
603   return Error::success();
604 }
605 
readProfile(BinaryContext & BC)606 Error DataAggregator::readProfile(BinaryContext &BC) {
607   processProfile(BC);
608 
609   for (auto &BFI : BC.getBinaryFunctions()) {
610     BinaryFunction &Function = BFI.second;
611     convertBranchData(Function);
612   }
613 
614   if (opts::AggregateOnly) {
615     if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
616       report_error("cannot create output data file", EC);
617   }
618 
619   return Error::success();
620 }
621 
mayHaveProfileData(const BinaryFunction & Function)622 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
623   return Function.hasProfileAvailable();
624 }
625 
processProfile(BinaryContext & BC)626 void DataAggregator::processProfile(BinaryContext &BC) {
627   if (opts::ReadPreAggregated)
628     processPreAggregated();
629   else if (opts::BasicAggregation)
630     processBasicEvents();
631   else
632     processBranchEvents();
633 
634   processMemEvents();
635 
636   // Mark all functions with registered events as having a valid profile.
637   for (auto &BFI : BC.getBinaryFunctions()) {
638     BinaryFunction &BF = BFI.second;
639     if (getBranchData(BF)) {
640       const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
641                                                 : BinaryFunction::PF_LBR;
642       BF.markProfiled(Flags);
643     }
644   }
645 
646   // Release intermediate storage.
647   clear(BranchLBRs);
648   clear(FallthroughLBRs);
649   clear(AggregatedLBRs);
650   clear(BasicSamples);
651   clear(MemSamples);
652 }
653 
654 BinaryFunction *
getBinaryFunctionContainingAddress(uint64_t Address) const655 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
656   if (!BC->containsAddress(Address))
657     return nullptr;
658 
659   return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
660                                                 /*UseMaxSize=*/true);
661 }
662 
getLocationName(BinaryFunction & Func,uint64_t Count)663 StringRef DataAggregator::getLocationName(BinaryFunction &Func,
664                                           uint64_t Count) {
665   if (!BAT)
666     return Func.getOneName();
667 
668   const BinaryFunction *OrigFunc = &Func;
669   if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
670     NumColdSamples += Count;
671     BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
672     if (HotFunc)
673       OrigFunc = HotFunc;
674   }
675   // If it is a local function, prefer the name containing the file name where
676   // the local function was declared
677   for (StringRef AlternativeName : OrigFunc->getNames()) {
678     size_t FileNameIdx = AlternativeName.find('/');
679     // Confirm the alternative name has the pattern Symbol/FileName/1 before
680     // using it
681     if (FileNameIdx == StringRef::npos ||
682         AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
683       continue;
684     return AlternativeName;
685   }
686   return OrigFunc->getOneName();
687 }
688 
doSample(BinaryFunction & Func,uint64_t Address,uint64_t Count)689 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
690                               uint64_t Count) {
691   auto I = NamesToSamples.find(Func.getOneName());
692   if (I == NamesToSamples.end()) {
693     bool Success;
694     StringRef LocName = getLocationName(Func, Count);
695     std::tie(I, Success) = NamesToSamples.insert(
696         std::make_pair(Func.getOneName(),
697                        FuncSampleData(LocName, FuncSampleData::ContainerTy())));
698   }
699 
700   Address -= Func.getAddress();
701   if (BAT)
702     Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false);
703 
704   I->second.bumpCount(Address, Count);
705   return true;
706 }
707 
doIntraBranch(BinaryFunction & Func,uint64_t From,uint64_t To,uint64_t Count,uint64_t Mispreds)708 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
709                                    uint64_t To, uint64_t Count,
710                                    uint64_t Mispreds) {
711   FuncBranchData *AggrData = getBranchData(Func);
712   if (!AggrData) {
713     AggrData = &NamesToBranches[Func.getOneName()];
714     AggrData->Name = getLocationName(Func, Count);
715     setBranchData(Func, AggrData);
716   }
717 
718   From -= Func.getAddress();
719   To -= Func.getAddress();
720   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName()
721                     << " @ " << Twine::utohexstr(From) << " -> "
722                     << Func.getPrintName() << " @ " << Twine::utohexstr(To)
723                     << '\n');
724   if (BAT) {
725     From = BAT->translate(Func, From, /*IsBranchSrc=*/true);
726     To = BAT->translate(Func, To, /*IsBranchSrc=*/false);
727     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
728                       << Func.getPrintName() << " @ " << Twine::utohexstr(From)
729                       << " -> " << Func.getPrintName() << " @ "
730                       << Twine::utohexstr(To) << '\n');
731   }
732 
733   AggrData->bumpBranchCount(From, To, Count, Mispreds);
734   return true;
735 }
736 
doInterBranch(BinaryFunction * FromFunc,BinaryFunction * ToFunc,uint64_t From,uint64_t To,uint64_t Count,uint64_t Mispreds)737 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
738                                    BinaryFunction *ToFunc, uint64_t From,
739                                    uint64_t To, uint64_t Count,
740                                    uint64_t Mispreds) {
741   FuncBranchData *FromAggrData = nullptr;
742   FuncBranchData *ToAggrData = nullptr;
743   StringRef SrcFunc;
744   StringRef DstFunc;
745   if (FromFunc) {
746     SrcFunc = getLocationName(*FromFunc, Count);
747     FromAggrData = getBranchData(*FromFunc);
748     if (!FromAggrData) {
749       FromAggrData = &NamesToBranches[FromFunc->getOneName()];
750       FromAggrData->Name = SrcFunc;
751       setBranchData(*FromFunc, FromAggrData);
752     }
753     From -= FromFunc->getAddress();
754     if (BAT)
755       From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true);
756 
757     recordExit(*FromFunc, From, Mispreds, Count);
758   }
759   if (ToFunc) {
760     DstFunc = getLocationName(*ToFunc, 0);
761     ToAggrData = getBranchData(*ToFunc);
762     if (!ToAggrData) {
763       ToAggrData = &NamesToBranches[ToFunc->getOneName()];
764       ToAggrData->Name = DstFunc;
765       setBranchData(*ToFunc, ToAggrData);
766     }
767     To -= ToFunc->getAddress();
768     if (BAT)
769       To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false);
770 
771     recordEntry(*ToFunc, To, Mispreds, Count);
772   }
773 
774   if (FromAggrData)
775     FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
776                                 Count, Mispreds);
777   if (ToAggrData)
778     ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
779                                Count, Mispreds);
780   return true;
781 }
782 
doBranch(uint64_t From,uint64_t To,uint64_t Count,uint64_t Mispreds)783 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
784                               uint64_t Mispreds) {
785   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
786   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
787   if (!FromFunc && !ToFunc)
788     return false;
789 
790   if (FromFunc == ToFunc) {
791     recordBranch(*FromFunc, From - FromFunc->getAddress(),
792                  To - FromFunc->getAddress(), Count, Mispreds);
793     return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
794   }
795 
796   return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
797 }
798 
doTrace(const LBREntry & First,const LBREntry & Second,uint64_t Count)799 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
800                              uint64_t Count) {
801   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
802   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
803   if (!FromFunc || !ToFunc) {
804     LLVM_DEBUG(
805         dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
806                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
807                << " and ending in " << ToFunc->getPrintName() << " @ "
808                << ToFunc->getPrintName() << " @ "
809                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
810     NumLongRangeTraces += Count;
811     return false;
812   }
813   if (FromFunc != ToFunc) {
814     NumInvalidTraces += Count;
815     LLVM_DEBUG(
816         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
817                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
818                << " and ending in " << ToFunc->getPrintName() << " @ "
819                << ToFunc->getPrintName() << " @ "
820                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
821     return false;
822   }
823 
824   Optional<BoltAddressTranslation::FallthroughListTy> FTs =
825       BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From)
826           : getFallthroughsInTrace(*FromFunc, First, Second, Count);
827   if (!FTs) {
828     LLVM_DEBUG(
829         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
830                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
831                << " and ending in " << ToFunc->getPrintName() << " @ "
832                << ToFunc->getPrintName() << " @ "
833                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
834     NumInvalidTraces += Count;
835     return false;
836   }
837 
838   LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
839                     << FromFunc->getPrintName() << ":"
840                     << Twine::utohexstr(First.To) << " to "
841                     << Twine::utohexstr(Second.From) << ".\n");
842   for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
843     doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
844                   Pair.second + FromFunc->getAddress(), Count, false);
845 
846   return true;
847 }
848 
recordTrace(BinaryFunction & BF,const LBREntry & FirstLBR,const LBREntry & SecondLBR,uint64_t Count,SmallVector<std::pair<uint64_t,uint64_t>,16> * Branches) const849 bool DataAggregator::recordTrace(
850     BinaryFunction &BF,
851     const LBREntry &FirstLBR,
852     const LBREntry &SecondLBR,
853     uint64_t Count,
854     SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const {
855   BinaryContext &BC = BF.getBinaryContext();
856 
857   if (!BF.isSimple())
858     return false;
859 
860   assert(BF.hasCFG() && "can only record traces in CFG state");
861 
862   // Offsets of the trace within this function.
863   const uint64_t From = FirstLBR.To - BF.getAddress();
864   const uint64_t To = SecondLBR.From - BF.getAddress();
865 
866   if (From > To)
867     return false;
868 
869   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
870   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
871 
872   if (!FromBB || !ToBB)
873     return false;
874 
875   // Adjust FromBB if the first LBR is a return from the last instruction in
876   // the previous block (that instruction should be a call).
877   if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
878       !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
879     BinaryBasicBlock *PrevBB = BF.getLayout().getBlock(FromBB->getIndex() - 1);
880     if (PrevBB->getSuccessor(FromBB->getLabel())) {
881       const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
882       if (Instr && BC.MIB->isCall(*Instr))
883         FromBB = PrevBB;
884       else
885         LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
886                           << '\n');
887     } else {
888       LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
889     }
890   }
891 
892   // Fill out information for fall-through edges. The From and To could be
893   // within the same basic block, e.g. when two call instructions are in the
894   // same block. In this case we skip the processing.
895   if (FromBB == ToBB)
896     return true;
897 
898   // Process blocks in the original layout order.
899   BinaryBasicBlock *BB = BF.getLayout().getBlock(FromBB->getIndex());
900   assert(BB == FromBB && "index mismatch");
901   while (BB != ToBB) {
902     BinaryBasicBlock *NextBB = BF.getLayout().getBlock(BB->getIndex() + 1);
903     assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
904 
905     // Check for bad LBRs.
906     if (!BB->getSuccessor(NextBB->getLabel())) {
907       LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
908                         << "  " << FirstLBR << '\n'
909                         << "  " << SecondLBR << '\n');
910       return false;
911     }
912 
913     // Record fall-through jumps
914     BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB);
915     BI.Count += Count;
916 
917     if (Branches) {
918       const MCInst *Instr = BB->getLastNonPseudoInstr();
919       uint64_t Offset = 0;
920       if (Instr)
921         Offset = BC.MIB->getOffsetWithDefault(*Instr, 0);
922       else
923         Offset = BB->getOffset();
924 
925       Branches->emplace_back(Offset, NextBB->getOffset());
926     }
927 
928     BB = NextBB;
929   }
930 
931   return true;
932 }
933 
934 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
getFallthroughsInTrace(BinaryFunction & BF,const LBREntry & FirstLBR,const LBREntry & SecondLBR,uint64_t Count) const935 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
936                                        const LBREntry &FirstLBR,
937                                        const LBREntry &SecondLBR,
938                                        uint64_t Count) const {
939   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
940 
941   if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res))
942     return NoneType();
943 
944   return Res;
945 }
946 
recordEntry(BinaryFunction & BF,uint64_t To,bool Mispred,uint64_t Count) const947 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
948                                  uint64_t Count) const {
949   if (To > BF.getSize())
950     return false;
951 
952   if (!BF.hasProfile())
953     BF.ExecutionCount = 0;
954 
955   BinaryBasicBlock *EntryBB = nullptr;
956   if (To == 0) {
957     BF.ExecutionCount += Count;
958     if (!BF.empty())
959       EntryBB = &BF.front();
960   } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
961     if (BB->isEntryPoint())
962       EntryBB = BB;
963   }
964 
965   if (EntryBB)
966     EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
967 
968   return true;
969 }
970 
recordExit(BinaryFunction & BF,uint64_t From,bool Mispred,uint64_t Count) const971 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
972                                 uint64_t Count) const {
973   if (!BF.isSimple() || From > BF.getSize())
974     return false;
975 
976   if (!BF.hasProfile())
977     BF.ExecutionCount = 0;
978 
979   return true;
980 }
981 
parseLBREntry()982 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
983   LBREntry Res;
984   ErrorOr<StringRef> FromStrRes = parseString('/');
985   if (std::error_code EC = FromStrRes.getError())
986     return EC;
987   StringRef OffsetStr = FromStrRes.get();
988   if (OffsetStr.getAsInteger(0, Res.From)) {
989     reportError("expected hexadecimal number with From address");
990     Diag << "Found: " << OffsetStr << "\n";
991     return make_error_code(llvm::errc::io_error);
992   }
993 
994   ErrorOr<StringRef> ToStrRes = parseString('/');
995   if (std::error_code EC = ToStrRes.getError())
996     return EC;
997   OffsetStr = ToStrRes.get();
998   if (OffsetStr.getAsInteger(0, Res.To)) {
999     reportError("expected hexadecimal number with To address");
1000     Diag << "Found: " << OffsetStr << "\n";
1001     return make_error_code(llvm::errc::io_error);
1002   }
1003 
1004   ErrorOr<StringRef> MispredStrRes = parseString('/');
1005   if (std::error_code EC = MispredStrRes.getError())
1006     return EC;
1007   StringRef MispredStr = MispredStrRes.get();
1008   if (MispredStr.size() != 1 ||
1009       (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1010     reportError("expected single char for mispred bit");
1011     Diag << "Found: " << MispredStr << "\n";
1012     return make_error_code(llvm::errc::io_error);
1013   }
1014   Res.Mispred = MispredStr[0] == 'M';
1015 
1016   static bool MispredWarning = true;
1017   if (MispredStr[0] == '-' && MispredWarning) {
1018     errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1019     MispredWarning = false;
1020   }
1021 
1022   ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1023   if (std::error_code EC = Rest.getError())
1024     return EC;
1025   if (Rest.get().size() < 5) {
1026     reportError("expected rest of LBR entry");
1027     Diag << "Found: " << Rest.get() << "\n";
1028     return make_error_code(llvm::errc::io_error);
1029   }
1030   return Res;
1031 }
1032 
checkAndConsumeFS()1033 bool DataAggregator::checkAndConsumeFS() {
1034   if (ParsingBuf[0] != FieldSeparator)
1035     return false;
1036 
1037   ParsingBuf = ParsingBuf.drop_front(1);
1038   Col += 1;
1039   return true;
1040 }
1041 
consumeRestOfLine()1042 void DataAggregator::consumeRestOfLine() {
1043   size_t LineEnd = ParsingBuf.find_first_of('\n');
1044   if (LineEnd == StringRef::npos) {
1045     ParsingBuf = StringRef();
1046     Col = 0;
1047     Line += 1;
1048     return;
1049   }
1050   ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1051   Col = 0;
1052   Line += 1;
1053 }
1054 
parseBranchSample()1055 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1056   PerfBranchSample Res;
1057 
1058   while (checkAndConsumeFS()) {
1059   }
1060 
1061   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1062   if (std::error_code EC = PIDRes.getError())
1063     return EC;
1064   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1065   if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
1066     consumeRestOfLine();
1067     return make_error_code(errc::no_such_process);
1068   }
1069 
1070   while (checkAndConsumeFS()) {
1071   }
1072 
1073   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1074   if (std::error_code EC = PCRes.getError())
1075     return EC;
1076   Res.PC = PCRes.get();
1077 
1078   if (checkAndConsumeNewLine())
1079     return Res;
1080 
1081   while (!checkAndConsumeNewLine()) {
1082     checkAndConsumeFS();
1083 
1084     ErrorOr<LBREntry> LBRRes = parseLBREntry();
1085     if (std::error_code EC = LBRRes.getError())
1086       return EC;
1087     LBREntry LBR = LBRRes.get();
1088     if (ignoreKernelInterrupt(LBR))
1089       continue;
1090     if (!BC->HasFixedLoadAddress)
1091       adjustLBR(LBR, MMapInfoIter->second);
1092     Res.LBR.push_back(LBR);
1093   }
1094 
1095   return Res;
1096 }
1097 
parseBasicSample()1098 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1099   while (checkAndConsumeFS()) {
1100   }
1101 
1102   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1103   if (std::error_code EC = PIDRes.getError())
1104     return EC;
1105 
1106   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1107   if (MMapInfoIter == BinaryMMapInfo.end()) {
1108     consumeRestOfLine();
1109     return PerfBasicSample{StringRef(), 0};
1110   }
1111 
1112   while (checkAndConsumeFS()) {
1113   }
1114 
1115   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1116   if (std::error_code EC = Event.getError())
1117     return EC;
1118 
1119   while (checkAndConsumeFS()) {
1120   }
1121 
1122   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1123   if (std::error_code EC = AddrRes.getError())
1124     return EC;
1125 
1126   if (!checkAndConsumeNewLine()) {
1127     reportError("expected end of line");
1128     return make_error_code(llvm::errc::io_error);
1129   }
1130 
1131   uint64_t Address = *AddrRes;
1132   if (!BC->HasFixedLoadAddress)
1133     adjustAddress(Address, MMapInfoIter->second);
1134 
1135   return PerfBasicSample{Event.get(), Address};
1136 }
1137 
parseMemSample()1138 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1139   PerfMemSample Res{0, 0};
1140 
1141   while (checkAndConsumeFS()) {
1142   }
1143 
1144   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1145   if (std::error_code EC = PIDRes.getError())
1146     return EC;
1147 
1148   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1149   if (MMapInfoIter == BinaryMMapInfo.end()) {
1150     consumeRestOfLine();
1151     return Res;
1152   }
1153 
1154   while (checkAndConsumeFS()) {
1155   }
1156 
1157   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1158   if (std::error_code EC = Event.getError())
1159     return EC;
1160   if (Event.get().find("mem-loads") == StringRef::npos) {
1161     consumeRestOfLine();
1162     return Res;
1163   }
1164 
1165   while (checkAndConsumeFS()) {
1166   }
1167 
1168   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1169   if (std::error_code EC = AddrRes.getError())
1170     return EC;
1171 
1172   while (checkAndConsumeFS()) {
1173   }
1174 
1175   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1176   if (std::error_code EC = PCRes.getError()) {
1177     consumeRestOfLine();
1178     return EC;
1179   }
1180 
1181   if (!checkAndConsumeNewLine()) {
1182     reportError("expected end of line");
1183     return make_error_code(llvm::errc::io_error);
1184   }
1185 
1186   uint64_t Address = *AddrRes;
1187   if (!BC->HasFixedLoadAddress)
1188     adjustAddress(Address, MMapInfoIter->second);
1189 
1190   return PerfMemSample{PCRes.get(), Address};
1191 }
1192 
parseLocationOrOffset()1193 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1194   auto parseOffset = [this]() -> ErrorOr<Location> {
1195     ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1196     if (std::error_code EC = Res.getError())
1197       return EC;
1198     return Location(Res.get());
1199   };
1200 
1201   size_t Sep = ParsingBuf.find_first_of(" \n");
1202   if (Sep == StringRef::npos)
1203     return parseOffset();
1204   StringRef LookAhead = ParsingBuf.substr(0, Sep);
1205   if (LookAhead.find_first_of(":") == StringRef::npos)
1206     return parseOffset();
1207 
1208   ErrorOr<StringRef> BuildID = parseString(':');
1209   if (std::error_code EC = BuildID.getError())
1210     return EC;
1211   ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1212   if (std::error_code EC = Offset.getError())
1213     return EC;
1214   return Location(true, BuildID.get(), Offset.get());
1215 }
1216 
1217 ErrorOr<DataAggregator::AggregatedLBREntry>
parseAggregatedLBREntry()1218 DataAggregator::parseAggregatedLBREntry() {
1219   while (checkAndConsumeFS()) {
1220   }
1221 
1222   ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1223   if (std::error_code EC = TypeOrErr.getError())
1224     return EC;
1225   auto Type = AggregatedLBREntry::BRANCH;
1226   if (TypeOrErr.get() == "B") {
1227     Type = AggregatedLBREntry::BRANCH;
1228   } else if (TypeOrErr.get() == "F") {
1229     Type = AggregatedLBREntry::FT;
1230   } else if (TypeOrErr.get() == "f") {
1231     Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1232   } else {
1233     reportError("expected B, F or f");
1234     return make_error_code(llvm::errc::io_error);
1235   }
1236 
1237   while (checkAndConsumeFS()) {
1238   }
1239   ErrorOr<Location> From = parseLocationOrOffset();
1240   if (std::error_code EC = From.getError())
1241     return EC;
1242 
1243   while (checkAndConsumeFS()) {
1244   }
1245   ErrorOr<Location> To = parseLocationOrOffset();
1246   if (std::error_code EC = To.getError())
1247     return EC;
1248 
1249   while (checkAndConsumeFS()) {
1250   }
1251   ErrorOr<int64_t> Frequency =
1252       parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1253   if (std::error_code EC = Frequency.getError())
1254     return EC;
1255 
1256   uint64_t Mispreds = 0;
1257   if (Type == AggregatedLBREntry::BRANCH) {
1258     while (checkAndConsumeFS()) {
1259     }
1260     ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1261     if (std::error_code EC = MispredsOrErr.getError())
1262       return EC;
1263     Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1264   }
1265 
1266   if (!checkAndConsumeNewLine()) {
1267     reportError("expected end of line");
1268     return make_error_code(llvm::errc::io_error);
1269   }
1270 
1271   return AggregatedLBREntry{From.get(), To.get(),
1272                             static_cast<uint64_t>(Frequency.get()), Mispreds,
1273                             Type};
1274 }
1275 
ignoreKernelInterrupt(LBREntry & LBR) const1276 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1277   return opts::IgnoreInterruptLBR &&
1278          (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1279 }
1280 
printLBRHeatMap()1281 std::error_code DataAggregator::printLBRHeatMap() {
1282   outs() << "PERF2BOLT: parse branch events...\n";
1283   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1284                      TimerGroupDesc, opts::TimeAggregator);
1285 
1286   if (opts::LinuxKernelMode) {
1287     opts::HeatmapMaxAddress = 0xffffffffffffffff;
1288     opts::HeatmapMinAddress = KernelBaseAddr;
1289   }
1290   Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1291              opts::HeatmapMaxAddress, getTextSections(BC));
1292   uint64_t NumTotalSamples = 0;
1293 
1294   if (opts::BasicAggregation) {
1295     while (hasData()) {
1296       ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1297       if (std::error_code EC = SampleRes.getError()) {
1298         if (EC == errc::no_such_process)
1299           continue;
1300         return EC;
1301       }
1302       PerfBasicSample &Sample = SampleRes.get();
1303       HM.registerAddress(Sample.PC);
1304       NumTotalSamples++;
1305     }
1306     outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
1307   } else {
1308     while (hasData()) {
1309       ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1310       if (std::error_code EC = SampleRes.getError()) {
1311         if (EC == errc::no_such_process)
1312           continue;
1313         return EC;
1314       }
1315 
1316       PerfBranchSample &Sample = SampleRes.get();
1317 
1318       // LBRs are stored in reverse execution order. NextLBR refers to the next
1319       // executed branch record.
1320       const LBREntry *NextLBR = nullptr;
1321       for (const LBREntry &LBR : Sample.LBR) {
1322         if (NextLBR) {
1323           // Record fall-through trace.
1324           const uint64_t TraceFrom = LBR.To;
1325           const uint64_t TraceTo = NextLBR->From;
1326           ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1327         }
1328         NextLBR = &LBR;
1329       }
1330       if (!Sample.LBR.empty()) {
1331         HM.registerAddress(Sample.LBR.front().To);
1332         HM.registerAddress(Sample.LBR.back().From);
1333       }
1334       NumTotalSamples += Sample.LBR.size();
1335     }
1336     outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1337     outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1338   }
1339 
1340   if (!NumTotalSamples) {
1341     if (opts::BasicAggregation) {
1342       errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1343                 "Cannot build heatmap.";
1344     } else {
1345       errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1346                 "Cannot build heatmap. Use -nl for building heatmap from "
1347                 "basic events.\n";
1348     }
1349     exit(1);
1350   }
1351 
1352   outs() << "HEATMAP: building heat map...\n";
1353 
1354   for (const auto &LBR : FallthroughLBRs) {
1355     const Trace &Trace = LBR.first;
1356     const FTInfo &Info = LBR.second;
1357     HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1358   }
1359 
1360   if (HM.getNumInvalidRanges())
1361     outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1362 
1363   if (!HM.size()) {
1364     errs() << "HEATMAP-ERROR: no valid traces registered\n";
1365     exit(1);
1366   }
1367 
1368   HM.print(opts::OutputFilename);
1369   if (opts::OutputFilename == "-")
1370     HM.printCDF(opts::OutputFilename);
1371   else
1372     HM.printCDF(opts::OutputFilename + ".csv");
1373   if (opts::OutputFilename == "-")
1374     HM.printSectionHotness(opts::OutputFilename);
1375   else
1376     HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv");
1377 
1378   return std::error_code();
1379 }
1380 
parseBranchEvents()1381 std::error_code DataAggregator::parseBranchEvents() {
1382   outs() << "PERF2BOLT: parse branch events...\n";
1383   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1384                      TimerGroupDesc, opts::TimeAggregator);
1385 
1386   uint64_t NumTotalSamples = 0;
1387   uint64_t NumEntries = 0;
1388   uint64_t NumSamples = 0;
1389   uint64_t NumSamplesNoLBR = 0;
1390   uint64_t NumTraces = 0;
1391   bool NeedsSkylakeFix = false;
1392 
1393   while (hasData() && NumTotalSamples < opts::MaxSamples) {
1394     ++NumTotalSamples;
1395 
1396     ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1397     if (std::error_code EC = SampleRes.getError()) {
1398       if (EC == errc::no_such_process)
1399         continue;
1400       return EC;
1401     }
1402     ++NumSamples;
1403 
1404     PerfBranchSample &Sample = SampleRes.get();
1405     if (opts::WriteAutoFDOData)
1406       ++BasicSamples[Sample.PC];
1407 
1408     if (Sample.LBR.empty()) {
1409       ++NumSamplesNoLBR;
1410       continue;
1411     }
1412 
1413     NumEntries += Sample.LBR.size();
1414     if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1415       errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1416       NeedsSkylakeFix = true;
1417     }
1418 
1419     // LBRs are stored in reverse execution order. NextPC refers to the next
1420     // recorded executed PC.
1421     uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1422     uint32_t NumEntry = 0;
1423     for (const LBREntry &LBR : Sample.LBR) {
1424       ++NumEntry;
1425       // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1426       // sometimes record entry 32 as an exact copy of entry 31. This will cause
1427       // us to likely record an invalid trace and generate a stale function for
1428       // BAT mode (non BAT disassembles the function and is able to ignore this
1429       // trace at aggregation time). Drop first 2 entries (last two, in
1430       // chronological order)
1431       if (NeedsSkylakeFix && NumEntry <= 2)
1432         continue;
1433       if (NextPC) {
1434         // Record fall-through trace.
1435         const uint64_t TraceFrom = LBR.To;
1436         const uint64_t TraceTo = NextPC;
1437         const BinaryFunction *TraceBF =
1438             getBinaryFunctionContainingAddress(TraceFrom);
1439         if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1440           FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1441           if (TraceBF->containsAddress(LBR.From))
1442             ++Info.InternCount;
1443           else
1444             ++Info.ExternCount;
1445         } else {
1446           if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) {
1447             LLVM_DEBUG(dbgs()
1448                        << "Invalid trace starting in "
1449                        << TraceBF->getPrintName() << " @ "
1450                        << Twine::utohexstr(TraceFrom - TraceBF->getAddress())
1451                        << " and ending @ " << Twine::utohexstr(TraceTo)
1452                        << '\n');
1453             ++NumInvalidTraces;
1454           } else {
1455             LLVM_DEBUG(dbgs()
1456                        << "Out of range trace starting in "
1457                        << (TraceBF ? TraceBF->getPrintName() : "None") << " @ "
1458                        << Twine::utohexstr(
1459                               TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1460                        << " and ending in "
1461                        << (getBinaryFunctionContainingAddress(TraceTo)
1462                                ? getBinaryFunctionContainingAddress(TraceTo)
1463                                      ->getPrintName()
1464                                : "None")
1465                        << " @ "
1466                        << Twine::utohexstr(
1467                               TraceTo -
1468                               (getBinaryFunctionContainingAddress(TraceTo)
1469                                    ? getBinaryFunctionContainingAddress(TraceTo)
1470                                          ->getAddress()
1471                                    : 0))
1472                        << '\n');
1473             ++NumLongRangeTraces;
1474           }
1475         }
1476         ++NumTraces;
1477       }
1478       NextPC = LBR.From;
1479 
1480       uint64_t From = LBR.From;
1481       if (!getBinaryFunctionContainingAddress(From))
1482         From = 0;
1483       uint64_t To = LBR.To;
1484       if (!getBinaryFunctionContainingAddress(To))
1485         To = 0;
1486       if (!From && !To)
1487         continue;
1488       BranchInfo &Info = BranchLBRs[Trace(From, To)];
1489       ++Info.TakenCount;
1490       Info.MispredCount += LBR.Mispred;
1491     }
1492   }
1493 
1494   for (const auto &LBR : BranchLBRs) {
1495     const Trace &Trace = LBR.first;
1496     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From))
1497       BF->setHasProfileAvailable();
1498     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To))
1499       BF->setHasProfileAvailable();
1500   }
1501 
1502   auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1503     OS << " (";
1504     if (OS.has_colors()) {
1505       if (Percent > T2)
1506         OS.changeColor(raw_ostream::RED);
1507       else if (Percent > T1)
1508         OS.changeColor(raw_ostream::YELLOW);
1509       else
1510         OS.changeColor(raw_ostream::GREEN);
1511     }
1512     OS << format("%.1f%%", Percent);
1513     if (OS.has_colors())
1514       OS.resetColor();
1515     OS << ")";
1516   };
1517 
1518   outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1519          << " LBR entries\n";
1520   if (NumTotalSamples) {
1521     if (NumSamples && NumSamplesNoLBR == NumSamples) {
1522       // Note: we don't know if perf2bolt is being used to parse memory samples
1523       // at this point. In this case, it is OK to parse zero LBRs.
1524       errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1525                 "LBR. Record profile with perf record -j any or run perf2bolt "
1526                 "in no-LBR mode with -nl (the performance improvement in -nl "
1527                 "mode may be limited)\n";
1528     } else {
1529       const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1530       const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1531       outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1532       printColored(outs(), PercentIgnored, 20, 50);
1533       outs() << " were ignored\n";
1534       if (PercentIgnored > 50.0f)
1535         errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1536                   "were attributed to the input binary\n";
1537     }
1538   }
1539   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1540          << NumInvalidTraces;
1541   float Perc = 0.0f;
1542   if (NumTraces > 0) {
1543     Perc = NumInvalidTraces * 100.0f / NumTraces;
1544     printColored(outs(), Perc, 5, 10);
1545   }
1546   outs() << "\n";
1547   if (Perc > 10.0f)
1548     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1549               "binary is probably not the same binary used during profiling "
1550               "collection. The generated data may be ineffective for improving "
1551               "performance.\n\n";
1552 
1553   outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1554          << NumLongRangeTraces;
1555   if (NumTraces > 0)
1556     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1557   outs() << "\n";
1558 
1559   if (NumColdSamples > 0) {
1560     const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1561     outs() << "PERF2BOLT: " << NumColdSamples
1562            << format(" (%.1f%%)", ColdSamples)
1563            << " samples recorded in cold regions of split functions.\n";
1564     if (ColdSamples > 5.0f)
1565       outs()
1566           << "WARNING: The BOLT-processed binary where samples were collected "
1567              "likely used bad data or your service observed a large shift in "
1568              "profile. You may want to audit this.\n";
1569   }
1570 
1571   return std::error_code();
1572 }
1573 
processBranchEvents()1574 void DataAggregator::processBranchEvents() {
1575   outs() << "PERF2BOLT: processing branch events...\n";
1576   NamedRegionTimer T("processBranch", "Processing branch events",
1577                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1578 
1579   for (const auto &AggrLBR : FallthroughLBRs) {
1580     const Trace &Loc = AggrLBR.first;
1581     const FTInfo &Info = AggrLBR.second;
1582     LBREntry First{Loc.From, Loc.From, false};
1583     LBREntry Second{Loc.To, Loc.To, false};
1584     if (Info.InternCount)
1585       doTrace(First, Second, Info.InternCount);
1586     if (Info.ExternCount) {
1587       First.From = 0;
1588       doTrace(First, Second, Info.ExternCount);
1589     }
1590   }
1591 
1592   for (const auto &AggrLBR : BranchLBRs) {
1593     const Trace &Loc = AggrLBR.first;
1594     const BranchInfo &Info = AggrLBR.second;
1595     doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
1596   }
1597 }
1598 
parseBasicEvents()1599 std::error_code DataAggregator::parseBasicEvents() {
1600   outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1601   NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1602                      TimerGroupDesc, opts::TimeAggregator);
1603   while (hasData()) {
1604     ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1605     if (std::error_code EC = Sample.getError())
1606       return EC;
1607 
1608     if (!Sample->PC)
1609       continue;
1610 
1611     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1612       BF->setHasProfileAvailable();
1613 
1614     ++BasicSamples[Sample->PC];
1615     EventNames.insert(Sample->EventName);
1616   }
1617 
1618   return std::error_code();
1619 }
1620 
processBasicEvents()1621 void DataAggregator::processBasicEvents() {
1622   outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1623   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1624                      TimerGroupDesc, opts::TimeAggregator);
1625   uint64_t OutOfRangeSamples = 0;
1626   uint64_t NumSamples = 0;
1627   for (auto &Sample : BasicSamples) {
1628     const uint64_t PC = Sample.first;
1629     const uint64_t HitCount = Sample.second;
1630     NumSamples += HitCount;
1631     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1632     if (!Func) {
1633       OutOfRangeSamples += HitCount;
1634       continue;
1635     }
1636 
1637     doSample(*Func, PC, HitCount);
1638   }
1639   outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1640 
1641   outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1642          << OutOfRangeSamples;
1643   float Perc = 0.0f;
1644   if (NumSamples > 0) {
1645     outs() << " (";
1646     Perc = OutOfRangeSamples * 100.0f / NumSamples;
1647     if (outs().has_colors()) {
1648       if (Perc > 60.0f)
1649         outs().changeColor(raw_ostream::RED);
1650       else if (Perc > 40.0f)
1651         outs().changeColor(raw_ostream::YELLOW);
1652       else
1653         outs().changeColor(raw_ostream::GREEN);
1654     }
1655     outs() << format("%.1f%%", Perc);
1656     if (outs().has_colors())
1657       outs().resetColor();
1658     outs() << ")";
1659   }
1660   outs() << "\n";
1661   if (Perc > 80.0f)
1662     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1663               "binary is probably not the same binary used during profiling "
1664               "collection. The generated data may be ineffective for improving "
1665               "performance.\n\n";
1666 }
1667 
parseMemEvents()1668 std::error_code DataAggregator::parseMemEvents() {
1669   outs() << "PERF2BOLT: parsing memory events...\n";
1670   NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1671                      TimerGroupDesc, opts::TimeAggregator);
1672   while (hasData()) {
1673     ErrorOr<PerfMemSample> Sample = parseMemSample();
1674     if (std::error_code EC = Sample.getError())
1675       return EC;
1676 
1677     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1678       BF->setHasProfileAvailable();
1679 
1680     MemSamples.emplace_back(std::move(Sample.get()));
1681   }
1682 
1683   return std::error_code();
1684 }
1685 
processMemEvents()1686 void DataAggregator::processMemEvents() {
1687   NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1688                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1689   for (const PerfMemSample &Sample : MemSamples) {
1690     uint64_t PC = Sample.PC;
1691     uint64_t Addr = Sample.Addr;
1692     StringRef FuncName;
1693     StringRef MemName;
1694 
1695     // Try to resolve symbol for PC
1696     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1697     if (!Func) {
1698       LLVM_DEBUG(if (PC != 0) {
1699         dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x"
1700                << Twine::utohexstr(Addr) << "\n";
1701       });
1702       continue;
1703     }
1704 
1705     FuncName = Func->getOneName();
1706     PC -= Func->getAddress();
1707 
1708     // Try to resolve symbol for memory load
1709     if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1710       MemName = BD->getName();
1711       Addr -= BD->getAddress();
1712     } else if (opts::FilterMemProfile) {
1713       // Filter out heap/stack accesses
1714       continue;
1715     }
1716 
1717     const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1718     const Location AddrLoc(!MemName.empty(), MemName, Addr);
1719 
1720     FuncMemData *MemData = &NamesToMemEvents[FuncName];
1721     setMemData(*Func, MemData);
1722     MemData->update(FuncLoc, AddrLoc);
1723     LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1724   }
1725 }
1726 
parsePreAggregatedLBRSamples()1727 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1728   outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1729   NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1730                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1731   while (hasData()) {
1732     ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1733     if (std::error_code EC = AggrEntry.getError())
1734       return EC;
1735 
1736     if (BinaryFunction *BF =
1737             getBinaryFunctionContainingAddress(AggrEntry->From.Offset))
1738       BF->setHasProfileAvailable();
1739     if (BinaryFunction *BF =
1740             getBinaryFunctionContainingAddress(AggrEntry->To.Offset))
1741       BF->setHasProfileAvailable();
1742 
1743     AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1744   }
1745 
1746   return std::error_code();
1747 }
1748 
processPreAggregated()1749 void DataAggregator::processPreAggregated() {
1750   outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1751   NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1752                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1753 
1754   uint64_t NumTraces = 0;
1755   for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1756     switch (AggrEntry.EntryType) {
1757     case AggregatedLBREntry::BRANCH:
1758       doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1759                AggrEntry.Mispreds);
1760       break;
1761     case AggregatedLBREntry::FT:
1762     case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1763       LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1764                          ? AggrEntry.From.Offset
1765                          : 0,
1766                      AggrEntry.From.Offset, false};
1767       LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1768       doTrace(First, Second, AggrEntry.Count);
1769       NumTraces += AggrEntry.Count;
1770       break;
1771     }
1772     }
1773   }
1774 
1775   outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1776          << " aggregated LBR entries\n";
1777   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1778          << NumInvalidTraces;
1779   float Perc = 0.0f;
1780   if (NumTraces > 0) {
1781     outs() << " (";
1782     Perc = NumInvalidTraces * 100.0f / NumTraces;
1783     if (outs().has_colors()) {
1784       if (Perc > 10.0f)
1785         outs().changeColor(raw_ostream::RED);
1786       else if (Perc > 5.0f)
1787         outs().changeColor(raw_ostream::YELLOW);
1788       else
1789         outs().changeColor(raw_ostream::GREEN);
1790     }
1791     outs() << format("%.1f%%", Perc);
1792     if (outs().has_colors())
1793       outs().resetColor();
1794     outs() << ")";
1795   }
1796   outs() << "\n";
1797   if (Perc > 10.0f)
1798     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1799               "binary is probably not the same binary used during profiling "
1800               "collection. The generated data may be ineffective for improving "
1801               "performance.\n\n";
1802 
1803   outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1804          << NumLongRangeTraces;
1805   if (NumTraces > 0)
1806     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1807   outs() << "\n";
1808 }
1809 
parseCommExecEvent()1810 Optional<int32_t> DataAggregator::parseCommExecEvent() {
1811   size_t LineEnd = ParsingBuf.find_first_of("\n");
1812   if (LineEnd == StringRef::npos) {
1813     reportError("expected rest of line");
1814     Diag << "Found: " << ParsingBuf << "\n";
1815     return NoneType();
1816   }
1817   StringRef Line = ParsingBuf.substr(0, LineEnd);
1818 
1819   size_t Pos = Line.find("PERF_RECORD_COMM exec");
1820   if (Pos == StringRef::npos)
1821     return NoneType();
1822   Line = Line.drop_front(Pos);
1823 
1824   // Line:
1825   //  PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1826   StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1827   int32_t PID;
1828   if (PIDStr.getAsInteger(10, PID)) {
1829     reportError("expected PID");
1830     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1831     return NoneType();
1832   }
1833 
1834   return PID;
1835 }
1836 
1837 namespace {
parsePerfTime(const StringRef TimeStr)1838 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1839   const StringRef SecTimeStr = TimeStr.split('.').first;
1840   const StringRef USecTimeStr = TimeStr.split('.').second;
1841   uint64_t SecTime;
1842   uint64_t USecTime;
1843   if (SecTimeStr.getAsInteger(10, SecTime) ||
1844       USecTimeStr.getAsInteger(10, USecTime))
1845     return NoneType();
1846   return SecTime * 1000000ULL + USecTime;
1847 }
1848 }
1849 
parseForkEvent()1850 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1851   while (checkAndConsumeFS()) {
1852   }
1853 
1854   size_t LineEnd = ParsingBuf.find_first_of("\n");
1855   if (LineEnd == StringRef::npos) {
1856     reportError("expected rest of line");
1857     Diag << "Found: " << ParsingBuf << "\n";
1858     return NoneType();
1859   }
1860   StringRef Line = ParsingBuf.substr(0, LineEnd);
1861 
1862   size_t Pos = Line.find("PERF_RECORD_FORK");
1863   if (Pos == StringRef::npos) {
1864     consumeRestOfLine();
1865     return NoneType();
1866   }
1867 
1868   ForkInfo FI;
1869 
1870   const StringRef TimeStr =
1871       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1872   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1873     FI.Time = *TimeRes;
1874   }
1875 
1876   Line = Line.drop_front(Pos);
1877 
1878   // Line:
1879   //  PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1880   const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1881   if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1882     reportError("expected PID");
1883     Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1884     return NoneType();
1885   }
1886 
1887   const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1888   if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1889     reportError("expected PID");
1890     Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1891     return NoneType();
1892   }
1893 
1894   consumeRestOfLine();
1895 
1896   return FI;
1897 }
1898 
1899 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
parseMMapEvent()1900 DataAggregator::parseMMapEvent() {
1901   while (checkAndConsumeFS()) {
1902   }
1903 
1904   MMapInfo ParsedInfo;
1905 
1906   size_t LineEnd = ParsingBuf.find_first_of("\n");
1907   if (LineEnd == StringRef::npos) {
1908     reportError("expected rest of line");
1909     Diag << "Found: " << ParsingBuf << "\n";
1910     return make_error_code(llvm::errc::io_error);
1911   }
1912   StringRef Line = ParsingBuf.substr(0, LineEnd);
1913 
1914   size_t Pos = Line.find("PERF_RECORD_MMAP2");
1915   if (Pos == StringRef::npos) {
1916     consumeRestOfLine();
1917     return std::make_pair(StringRef(), ParsedInfo);
1918   }
1919 
1920   // Line:
1921   //   {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1922 
1923   const StringRef TimeStr =
1924       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1925   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1926     ParsedInfo.Time = *TimeRes;
1927 
1928   Line = Line.drop_front(Pos);
1929 
1930   // Line:
1931   //   PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1932 
1933   StringRef FileName = Line.rsplit(FieldSeparator).second;
1934   if (FileName.startswith("//") || FileName.startswith("[")) {
1935     consumeRestOfLine();
1936     return std::make_pair(StringRef(), ParsedInfo);
1937   }
1938   FileName = sys::path::filename(FileName);
1939 
1940   const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
1941   if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
1942     reportError("expected PID");
1943     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1944     return make_error_code(llvm::errc::io_error);
1945   }
1946 
1947   const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
1948   if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) {
1949     reportError("expected base address");
1950     Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1951     return make_error_code(llvm::errc::io_error);
1952   }
1953 
1954   const StringRef SizeStr = Line.split('(').second.split(')').first;
1955   if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
1956     reportError("expected mmaped size");
1957     Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1958     return make_error_code(llvm::errc::io_error);
1959   }
1960 
1961   const StringRef OffsetStr =
1962       Line.split('@').second.ltrim().split(FieldSeparator).first;
1963   if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
1964     reportError("expected mmaped page-aligned offset");
1965     Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1966     return make_error_code(llvm::errc::io_error);
1967   }
1968 
1969   consumeRestOfLine();
1970 
1971   return std::make_pair(FileName, ParsedInfo);
1972 }
1973 
parseMMapEvents()1974 std::error_code DataAggregator::parseMMapEvents() {
1975   outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1976   NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1977                      TimerGroupDesc, opts::TimeAggregator);
1978 
1979   std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1980   while (hasData()) {
1981     ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1982     if (std::error_code EC = FileMMapInfoRes.getError())
1983       return EC;
1984 
1985     std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1986     if (FileMMapInfo.second.PID == -1)
1987       continue;
1988 
1989     // Consider only the first mapping of the file for any given PID
1990     bool PIDExists = false;
1991     auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
1992     for (auto MI = Range.first; MI != Range.second; ++MI) {
1993       if (MI->second.PID == FileMMapInfo.second.PID) {
1994         PIDExists = true;
1995         break;
1996       }
1997     }
1998     if (PIDExists)
1999       continue;
2000 
2001     GlobalMMapInfo.insert(FileMMapInfo);
2002   }
2003 
2004   LLVM_DEBUG({
2005     dbgs() << "FileName -> mmap info:\n";
2006     for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo)
2007       dbgs() << "  " << Pair.first << " : " << Pair.second.PID << " [0x"
2008              << Twine::utohexstr(Pair.second.MMapAddress) << ", "
2009              << Twine::utohexstr(Pair.second.Size) << " @ "
2010              << Twine::utohexstr(Pair.second.Offset) << "]\n";
2011   });
2012 
2013   StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
2014   if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
2015     errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
2016            << "\" for profile matching\n";
2017     NameToUse = BuildIDBinaryName;
2018   }
2019 
2020   auto Range = GlobalMMapInfo.equal_range(NameToUse);
2021   for (auto I = Range.first; I != Range.second; ++I) {
2022     MMapInfo &MMapInfo = I->second;
2023     if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
2024       // Check that the binary mapping matches one of the segments.
2025       bool MatchFound = false;
2026       for (auto &KV : BC->SegmentMapInfo) {
2027         SegmentInfo &SegInfo = KV.second;
2028         // The mapping is page-aligned and hence the MMapAddress could be
2029         // different from the segment start address. We cannot know the page
2030         // size of the mapping, but we know it should not exceed the segment
2031         // alignment value. Hence we are performing an approximate check.
2032         if (SegInfo.Address >= MMapInfo.MMapAddress &&
2033             SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) {
2034           MatchFound = true;
2035           break;
2036         }
2037       }
2038       if (!MatchFound) {
2039         errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2040                << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n';
2041         continue;
2042       }
2043     }
2044 
2045     // Set base address for shared objects.
2046     if (!BC->HasFixedLoadAddress) {
2047       Optional<uint64_t> BaseAddress =
2048           BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset);
2049       if (!BaseAddress) {
2050         errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2051                   "binary when memory mapped at 0x"
2052                << Twine::utohexstr(MMapInfo.MMapAddress)
2053                << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
2054                << ". Ignoring profile data for this mapping\n";
2055         continue;
2056       } else {
2057         MMapInfo.BaseAddress = *BaseAddress;
2058       }
2059     }
2060 
2061     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2062   }
2063 
2064   if (BinaryMMapInfo.empty()) {
2065     if (errs().has_colors())
2066       errs().changeColor(raw_ostream::RED);
2067     errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2068            << BC->getFilename() << "\".";
2069     if (!GlobalMMapInfo.empty()) {
2070       errs() << " Profile for the following binary name(s) is available:\n";
2071       for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2072            I = GlobalMMapInfo.upper_bound(I->first))
2073         errs() << "  " << I->first << '\n';
2074       errs() << "Please rename the input binary.\n";
2075     } else {
2076       errs() << " Failed to extract any binary name from a profile.\n";
2077     }
2078     if (errs().has_colors())
2079       errs().resetColor();
2080 
2081     exit(1);
2082   }
2083 
2084   return std::error_code();
2085 }
2086 
parseTaskEvents()2087 std::error_code DataAggregator::parseTaskEvents() {
2088   outs() << "PERF2BOLT: parsing perf-script task events output\n";
2089   NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2090                      TimerGroupDesc, opts::TimeAggregator);
2091 
2092   while (hasData()) {
2093     if (Optional<int32_t> CommInfo = parseCommExecEvent()) {
2094       // Remove forked child that ran execve
2095       auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2096       if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2097         BinaryMMapInfo.erase(MMapInfoIter);
2098       consumeRestOfLine();
2099       continue;
2100     }
2101 
2102     Optional<ForkInfo> ForkInfo = parseForkEvent();
2103     if (!ForkInfo)
2104       continue;
2105 
2106     if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2107       continue;
2108 
2109     if (ForkInfo->Time == 0) {
2110       // Process was forked and mmaped before perf ran. In this case the child
2111       // should have its own mmap entry unless it was execve'd.
2112       continue;
2113     }
2114 
2115     auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2116     if (MMapInfoIter == BinaryMMapInfo.end())
2117       continue;
2118 
2119     MMapInfo MMapInfo = MMapInfoIter->second;
2120     MMapInfo.PID = ForkInfo->ChildPID;
2121     MMapInfo.Forked = true;
2122     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2123   }
2124 
2125   outs() << "PERF2BOLT: input binary is associated with "
2126          << BinaryMMapInfo.size() << " PID(s)\n";
2127 
2128   LLVM_DEBUG({
2129     for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
2130       outs() << "  " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "")
2131              << ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x"
2132              << Twine::utohexstr(MMI.second.Size) << ")\n";
2133   });
2134 
2135   return std::error_code();
2136 }
2137 
2138 Optional<std::pair<StringRef, StringRef>>
parseNameBuildIDPair()2139 DataAggregator::parseNameBuildIDPair() {
2140   while (checkAndConsumeFS()) {
2141   }
2142 
2143   ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2144   if (std::error_code EC = BuildIDStr.getError())
2145     return NoneType();
2146 
2147   // If one of the strings is missing, don't issue a parsing error, but still
2148   // do not return a value.
2149   if (ParsingBuf[0] == '\n')
2150     return NoneType();
2151 
2152   ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2153   if (std::error_code EC = NameStr.getError())
2154     return NoneType();
2155 
2156   consumeRestOfLine();
2157   return std::make_pair(NameStr.get(), BuildIDStr.get());
2158 }
2159 
hasAllBuildIDs()2160 bool DataAggregator::hasAllBuildIDs() {
2161   const StringRef SavedParsingBuf = ParsingBuf;
2162 
2163   if (!hasData())
2164     return false;
2165 
2166   bool HasInvalidEntries = false;
2167   while (hasData()) {
2168     if (!parseNameBuildIDPair()) {
2169       HasInvalidEntries = true;
2170       break;
2171     }
2172   }
2173 
2174   ParsingBuf = SavedParsingBuf;
2175 
2176   return !HasInvalidEntries;
2177 }
2178 
2179 Optional<StringRef>
getFileNameForBuildID(StringRef FileBuildID)2180 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2181   const StringRef SavedParsingBuf = ParsingBuf;
2182 
2183   StringRef FileName;
2184   while (hasData()) {
2185     Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair();
2186     if (!IDPair) {
2187       consumeRestOfLine();
2188       continue;
2189     }
2190 
2191     if (IDPair->second.startswith(FileBuildID)) {
2192       FileName = sys::path::filename(IDPair->first);
2193       break;
2194     }
2195   }
2196 
2197   ParsingBuf = SavedParsingBuf;
2198 
2199   if (!FileName.empty())
2200     return FileName;
2201 
2202   return NoneType();
2203 }
2204 
2205 std::error_code
writeAggregatedFile(StringRef OutputFilename) const2206 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2207   std::error_code EC;
2208   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2209   if (EC)
2210     return EC;
2211 
2212   bool WriteMemLocs = false;
2213 
2214   auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2215     if (WriteMemLocs)
2216       OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2217     else
2218       OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2219     OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2220             << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2221   };
2222 
2223   uint64_t BranchValues = 0;
2224   uint64_t MemValues = 0;
2225 
2226   if (BAT)
2227     OutFile << "boltedcollection\n";
2228   if (opts::BasicAggregation) {
2229     OutFile << "no_lbr";
2230     for (const StringMapEntry<NoneType> &Entry : EventNames)
2231       OutFile << " " << Entry.getKey();
2232     OutFile << "\n";
2233 
2234     for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
2235       for (const SampleInfo &SI : Func.getValue().Data) {
2236         writeLocation(SI.Loc);
2237         OutFile << SI.Hits << "\n";
2238         ++BranchValues;
2239       }
2240     }
2241   } else {
2242     for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
2243       for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) {
2244         writeLocation(BI.From);
2245         writeLocation(BI.To);
2246         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2247         ++BranchValues;
2248       }
2249       for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) {
2250         // Do not output if source is a known symbol, since this was already
2251         // accounted for in the source function
2252         if (BI.From.IsSymbol)
2253           continue;
2254         writeLocation(BI.From);
2255         writeLocation(BI.To);
2256         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2257         ++BranchValues;
2258       }
2259     }
2260 
2261     WriteMemLocs = true;
2262     for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
2263       for (const MemInfo &MemEvent : Func.getValue().Data) {
2264         writeLocation(MemEvent.Offset);
2265         writeLocation(MemEvent.Addr);
2266         OutFile << MemEvent.Count << "\n";
2267         ++MemValues;
2268       }
2269     }
2270   }
2271 
2272   outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2273          << " memory objects to " << OutputFilename << "\n";
2274 
2275   return std::error_code();
2276 }
2277 
dump() const2278 void DataAggregator::dump() const { DataReader::dump(); }
2279 
dump(const LBREntry & LBR) const2280 void DataAggregator::dump(const LBREntry &LBR) const {
2281   Diag << "From: " << Twine::utohexstr(LBR.From)
2282        << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2283        << "\n";
2284 }
2285 
dump(const PerfBranchSample & Sample) const2286 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2287   Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2288   for (const LBREntry &LBR : Sample.LBR)
2289     dump(LBR);
2290 }
2291 
dump(const PerfMemSample & Sample) const2292 void DataAggregator::dump(const PerfMemSample &Sample) const {
2293   Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
2294 }
2295