1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Profile/BoltAddressTranslation.h"
18 #include "bolt/Profile/Heatmap.h"
19 #include "bolt/Utils/CommandLineOpts.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/ScopeExit.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/Process.h"
27 #include "llvm/Support/Program.h"
28 #include "llvm/Support/Regex.h"
29 #include "llvm/Support/Timer.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <map>
32 #include <unordered_map>
33 #include <utility>
34 
35 #define DEBUG_TYPE "aggregator"
36 
37 using namespace llvm;
38 using namespace bolt;
39 
40 namespace opts {
41 
42 static cl::opt<bool>
43     BasicAggregation("nl",
44                      cl::desc("aggregate basic samples (without LBR info)"),
45                      cl::cat(AggregatorCategory));
46 
47 static cl::opt<bool>
48 FilterMemProfile("filter-mem-profile",
49   cl::desc("if processing a memory profile, filter out stack or heap accesses "
50            "that won't be useful for BOLT to reduce profile file size"),
51   cl::init(true),
52   cl::cat(AggregatorCategory));
53 
54 static cl::opt<unsigned long long>
55 FilterPID("pid",
56   cl::desc("only use samples from process with specified PID"),
57   cl::init(0),
58   cl::Optional,
59   cl::cat(AggregatorCategory));
60 
61 static cl::opt<bool>
62 IgnoreBuildID("ignore-build-id",
63   cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
64   cl::init(false),
65   cl::cat(AggregatorCategory));
66 
67 static cl::opt<bool> IgnoreInterruptLBR(
68     "ignore-interrupt-lbr",
69     cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
70     cl::init(true), cl::cat(AggregatorCategory));
71 
72 static cl::opt<unsigned long long>
73 MaxSamples("max-samples",
74   cl::init(-1ULL),
75   cl::desc("maximum number of samples to read from LBR profile"),
76   cl::Optional,
77   cl::Hidden,
78   cl::cat(AggregatorCategory));
79 
80 static cl::opt<bool> ReadPreAggregated(
81     "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
82     cl::cat(AggregatorCategory));
83 
84 static cl::opt<bool>
85 TimeAggregator("time-aggr",
86   cl::desc("time BOLT aggregator"),
87   cl::init(false),
88   cl::ZeroOrMore,
89   cl::cat(AggregatorCategory));
90 
91 static cl::opt<bool>
92     UseEventPC("use-event-pc",
93                cl::desc("use event PC in combination with LBR sampling"),
94                cl::cat(AggregatorCategory));
95 
96 static cl::opt<bool> WriteAutoFDOData(
97     "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
98     cl::cat(AggregatorCategory));
99 
100 } // namespace opts
101 
102 namespace {
103 
104 const char TimerGroupName[] = "aggregator";
105 const char TimerGroupDesc[] = "Aggregator";
106 
107 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
108   std::vector<SectionNameAndRange> sections;
109   for (BinarySection &Section : BC->sections()) {
110     if (!Section.isText())
111       continue;
112     if (Section.getSize() == 0)
113       continue;
114     sections.push_back(
115         {Section.getName(), Section.getAddress(), Section.getEndAddress()});
116   }
117   llvm::sort(sections,
118              [](const SectionNameAndRange &A, const SectionNameAndRange &B) {
119                return A.BeginAddress < B.BeginAddress;
120              });
121   return sections;
122 }
123 }
124 
125 constexpr uint64_t DataAggregator::KernelBaseAddr;
126 
127 DataAggregator::~DataAggregator() { deleteTempFiles(); }
128 
129 namespace {
130 void deleteTempFile(const std::string &FileName) {
131   if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
132     errs() << "PERF2BOLT: failed to delete temporary file " << FileName
133            << " with error " << Errc.message() << "\n";
134 }
135 }
136 
137 void DataAggregator::deleteTempFiles() {
138   for (std::string &FileName : TempFiles)
139     deleteTempFile(FileName);
140   TempFiles.clear();
141 }
142 
143 void DataAggregator::findPerfExecutable() {
144   Optional<std::string> PerfExecutable =
145       sys::Process::FindInEnvPath("PATH", "perf");
146   if (!PerfExecutable) {
147     outs() << "PERF2BOLT: No perf executable found!\n";
148     exit(1);
149   }
150   PerfPath = *PerfExecutable;
151 }
152 
153 void DataAggregator::start() {
154   outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
155 
156   // Don't launch perf for pre-aggregated files
157   if (opts::ReadPreAggregated)
158     return;
159 
160   findPerfExecutable();
161 
162   if (opts::BasicAggregation)
163     launchPerfProcess("events without LBR",
164                       MainEventsPPI,
165                       "script -F pid,event,ip",
166                       /*Wait = */false);
167   else
168     launchPerfProcess("branch events",
169                       MainEventsPPI,
170                       "script -F pid,ip,brstack",
171                       /*Wait = */false);
172 
173   // Note: we launch script for mem events regardless of the option, as the
174   //       command fails fairly fast if mem events were not collected.
175   launchPerfProcess("mem events",
176                     MemEventsPPI,
177                     "script -F pid,event,addr,ip",
178                     /*Wait = */false);
179 
180   launchPerfProcess("process events",
181                     MMapEventsPPI,
182                     "script --show-mmap-events",
183                     /*Wait = */false);
184 
185   launchPerfProcess("task events",
186                     TaskEventsPPI,
187                     "script --show-task-events",
188                     /*Wait = */false);
189 }
190 
191 void DataAggregator::abort() {
192   if (opts::ReadPreAggregated)
193     return;
194 
195   std::string Error;
196 
197   // Kill subprocesses in case they are not finished
198   sys::Wait(TaskEventsPPI.PI, 1, false, &Error);
199   sys::Wait(MMapEventsPPI.PI, 1, false, &Error);
200   sys::Wait(MainEventsPPI.PI, 1, false, &Error);
201   sys::Wait(MemEventsPPI.PI, 1, false, &Error);
202 
203   deleteTempFiles();
204 
205   exit(1);
206 }
207 
208 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
209                                        const char *ArgsString, bool Wait) {
210   SmallVector<StringRef, 4> Argv;
211 
212   outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
213   Argv.push_back(PerfPath.data());
214 
215   char *WritableArgsString = strdup(ArgsString);
216   char *Str = WritableArgsString;
217   do {
218     Argv.push_back(Str);
219     while (*Str && *Str != ' ')
220       ++Str;
221     if (!*Str)
222       break;
223     *Str++ = 0;
224   } while (true);
225 
226   Argv.push_back("-f");
227   Argv.push_back("-i");
228   Argv.push_back(Filename.c_str());
229 
230   if (std::error_code Errc =
231           sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
232     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
233            << " with error " << Errc.message() << "\n";
234     exit(1);
235   }
236   TempFiles.push_back(PPI.StdoutPath.data());
237 
238   if (std::error_code Errc =
239           sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
240     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
241            << " with error " << Errc.message() << "\n";
242     exit(1);
243   }
244   TempFiles.push_back(PPI.StderrPath.data());
245 
246   Optional<StringRef> Redirects[] = {
247       llvm::None,                        // Stdin
248       StringRef(PPI.StdoutPath.data()),  // Stdout
249       StringRef(PPI.StderrPath.data())}; // Stderr
250 
251   LLVM_DEBUG({
252     dbgs() << "Launching perf: ";
253     for (StringRef Arg : Argv)
254       dbgs() << Arg << " ";
255     dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
256            << "\n";
257   });
258 
259   if (Wait)
260     PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
261                                             /*envp*/ llvm::None, Redirects);
262   else
263     PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None,
264                                 Redirects);
265 
266   free(WritableArgsString);
267 }
268 
269 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
270   PerfProcessInfo BuildIDProcessInfo;
271   launchPerfProcess("buildid list",
272                     BuildIDProcessInfo,
273                     "buildid-list",
274                     /*Wait = */true);
275 
276   if (BuildIDProcessInfo.PI.ReturnCode != 0) {
277     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
278         MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
279     StringRef ErrBuf = (*MB)->getBuffer();
280 
281     errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
282            << '\n';
283     errs() << ErrBuf;
284     return;
285   }
286 
287   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
288       MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
289   if (std::error_code EC = MB.getError()) {
290     errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
291            << EC.message() << "\n";
292     return;
293   }
294 
295   FileBuf = std::move(*MB);
296   ParsingBuf = FileBuf->getBuffer();
297   if (ParsingBuf.empty()) {
298     errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
299               "data was recorded without it\n";
300     return;
301   }
302 
303   Col = 0;
304   Line = 1;
305   Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
306   if (!FileName) {
307     errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
308               "This indicates the input binary supplied for data aggregation "
309               "is not the same recorded by perf when collecting profiling "
310               "data, or there were no samples recorded for the binary. "
311               "Use -ignore-build-id option to override.\n";
312     if (!opts::IgnoreBuildID)
313       abort();
314   } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
315     errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
316     BuildIDBinaryName = std::string(*FileName);
317   } else {
318     outs() << "PERF2BOLT: matched build-id and file name\n";
319   }
320 
321   return;
322 }
323 
324 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
325   if (opts::ReadPreAggregated)
326     return true;
327 
328   Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
329   if (!FD)
330     return false;
331 
332   char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
333 
334   auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
335   Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
336       *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0);
337   if (!BytesRead || *BytesRead != 7)
338     return false;
339 
340   if (strncmp(Buf, "PERFILE", 7) == 0)
341     return true;
342   return false;
343 }
344 
345 void DataAggregator::parsePreAggregated() {
346   std::string Error;
347 
348   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
349       MemoryBuffer::getFileOrSTDIN(Filename);
350   if (std::error_code EC = MB.getError()) {
351     errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
352            << EC.message() << "\n";
353     exit(1);
354   }
355 
356   FileBuf = std::move(*MB);
357   ParsingBuf = FileBuf->getBuffer();
358   Col = 0;
359   Line = 1;
360   if (parsePreAggregatedLBRSamples()) {
361     errs() << "PERF2BOLT: failed to parse samples\n";
362     exit(1);
363   }
364 }
365 
366 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
367   outs() << "PERF2BOLT: writing data for autofdo tools...\n";
368   NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
369                      TimerGroupDesc, opts::TimeAggregator);
370 
371   std::error_code EC;
372   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
373   if (EC)
374     return EC;
375 
376   // Format:
377   // number of unique traces
378   // from_1-to_1:count_1
379   // from_2-to_2:count_2
380   // ......
381   // from_n-to_n:count_n
382   // number of unique sample addresses
383   // addr_1:count_1
384   // addr_2:count_2
385   // ......
386   // addr_n:count_n
387   // number of unique LBR entries
388   // src_1->dst_1:count_1
389   // src_2->dst_2:count_2
390   // ......
391   // src_n->dst_n:count_n
392 
393   const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
394 
395   // AutoFDO addresses are relative to the first allocated loadable program
396   // segment
397   auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
398     if (Address < FirstAllocAddress)
399       return 0;
400     return Address - FirstAllocAddress;
401   };
402 
403   OutFile << FallthroughLBRs.size() << "\n";
404   for (const auto &AggrLBR : FallthroughLBRs) {
405     const Trace &Trace = AggrLBR.first;
406     const FTInfo &Info = AggrLBR.second;
407     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-"
408             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
409             << (Info.InternCount + Info.ExternCount) << "\n";
410   }
411 
412   OutFile << BasicSamples.size() << "\n";
413   for (const auto &Sample : BasicSamples) {
414     uint64_t PC = Sample.first;
415     uint64_t HitCount = Sample.second;
416     OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n";
417   }
418 
419   OutFile << BranchLBRs.size() << "\n";
420   for (const auto &AggrLBR : BranchLBRs) {
421     const Trace &Trace = AggrLBR.first;
422     const BranchInfo &Info = AggrLBR.second;
423     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->"
424             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
425             << Info.TakenCount << "\n";
426   }
427 
428   outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
429          << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
430          << " unique branches to " << OutputFilename << "\n";
431 
432   return std::error_code();
433 }
434 
435 void DataAggregator::filterBinaryMMapInfo() {
436   if (opts::FilterPID) {
437     auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
438     if (MMapInfoIter != BinaryMMapInfo.end()) {
439       MMapInfo MMap = MMapInfoIter->second;
440       BinaryMMapInfo.clear();
441       BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
442     } else {
443       if (errs().has_colors())
444         errs().changeColor(raw_ostream::RED);
445       errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
446              << opts::FilterPID << "\""
447              << " for binary \"" << BC->getFilename() << "\".";
448       assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
449       errs() << " Profile for the following process is available:\n";
450       for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
451         outs() << "  " << MMI.second.PID
452                << (MMI.second.Forked ? " (forked)\n" : "\n");
453 
454       if (errs().has_colors())
455         errs().resetColor();
456 
457       exit(1);
458     }
459   }
460 }
461 
462 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
463   this->BC = &BC;
464 
465   if (opts::ReadPreAggregated) {
466     parsePreAggregated();
467     return Error::success();
468   }
469 
470   if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) {
471     outs() << "BOLT-INFO: binary build-id is:     " << *FileBuildID << "\n";
472     processFileBuildID(*FileBuildID);
473   } else {
474     errs() << "BOLT-WARNING: build-id will not be checked because we could "
475               "not read one from input binary\n";
476   }
477 
478   auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) {
479     std::string Error;
480     outs() << "PERF2BOLT: waiting for perf " << Name
481            << " collection to finish...\n";
482     sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error);
483 
484     if (!Error.empty()) {
485       errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
486       deleteTempFiles();
487       exit(1);
488     }
489 
490     if (PI.ReturnCode != 0) {
491       ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
492           MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
493       StringRef ErrBuf = (*ErrorMB)->getBuffer();
494 
495       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
496       errs() << ErrBuf;
497       deleteTempFiles();
498       exit(1);
499     }
500 
501     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
502         MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
503     if (std::error_code EC = MB.getError()) {
504       errs() << "Cannot open " << Process.StdoutPath.data() << ": "
505              << EC.message() << "\n";
506       deleteTempFiles();
507       exit(1);
508     }
509 
510     FileBuf = std::move(*MB);
511     ParsingBuf = FileBuf->getBuffer();
512     Col = 0;
513     Line = 1;
514   };
515 
516   if (opts::LinuxKernelMode) {
517     // Current MMap parsing logic does not work with linux kernel.
518     // MMap entries for linux kernel uses PERF_RECORD_MMAP
519     // format instead of typical PERF_RECORD_MMAP2 format.
520     // Since linux kernel address mapping is absolute (same as
521     // in the ELF file), we avoid parsing MMap in linux kernel mode.
522     // While generating optimized linux kernel binary, we may need
523     // to parse MMap entries.
524 
525     // In linux kernel mode, we analyze and optimize
526     // all linux kernel binary instructions, irrespective
527     // of whether they are due to system calls or due to
528     // interrupts. Therefore, we cannot ignore interrupt
529     // in Linux kernel mode.
530     opts::IgnoreInterruptLBR = false;
531   } else {
532     prepareToParse("mmap events", MMapEventsPPI);
533     if (parseMMapEvents())
534       errs() << "PERF2BOLT: failed to parse mmap events\n";
535   }
536 
537   prepareToParse("task events", TaskEventsPPI);
538   if (parseTaskEvents())
539     errs() << "PERF2BOLT: failed to parse task events\n";
540 
541   filterBinaryMMapInfo();
542   prepareToParse("events", MainEventsPPI);
543 
544   if (opts::HeatmapMode) {
545     if (std::error_code EC = printLBRHeatMap()) {
546       errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
547       exit(1);
548     }
549     exit(0);
550   }
551 
552   if ((!opts::BasicAggregation && parseBranchEvents()) ||
553       (opts::BasicAggregation && parseBasicEvents()))
554     errs() << "PERF2BOLT: failed to parse samples\n";
555 
556   // We can finish early if the goal is just to generate data for autofdo
557   if (opts::WriteAutoFDOData) {
558     if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
559       errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
560 
561     deleteTempFiles();
562     exit(0);
563   }
564 
565   // Special handling for memory events
566   std::string Error;
567   sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error);
568   if (PI.ReturnCode != 0) {
569     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
570         MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data());
571     StringRef ErrBuf = (*MB)->getBuffer();
572 
573     deleteTempFiles();
574 
575     Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
576                  "Cannot print 'addr' field.");
577     if (!NoData.match(ErrBuf)) {
578       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
579       errs() << ErrBuf;
580       exit(1);
581     }
582     return Error::success();
583   }
584 
585   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
586       MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data());
587   if (std::error_code EC = MB.getError()) {
588     errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": "
589            << EC.message() << "\n";
590     deleteTempFiles();
591     exit(1);
592   }
593 
594   FileBuf = std::move(*MB);
595   ParsingBuf = FileBuf->getBuffer();
596   Col = 0;
597   Line = 1;
598   if (const std::error_code EC = parseMemEvents())
599     errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
600            << '\n';
601 
602   deleteTempFiles();
603 
604   return Error::success();
605 }
606 
607 Error DataAggregator::readProfile(BinaryContext &BC) {
608   processProfile(BC);
609 
610   for (auto &BFI : BC.getBinaryFunctions()) {
611     BinaryFunction &Function = BFI.second;
612     convertBranchData(Function);
613   }
614 
615   if (opts::AggregateOnly) {
616     if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
617       report_error("cannot create output data file", EC);
618   }
619 
620   return Error::success();
621 }
622 
623 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
624   return Function.hasProfileAvailable();
625 }
626 
627 void DataAggregator::processProfile(BinaryContext &BC) {
628   if (opts::ReadPreAggregated)
629     processPreAggregated();
630   else if (opts::BasicAggregation)
631     processBasicEvents();
632   else
633     processBranchEvents();
634 
635   processMemEvents();
636 
637   // Mark all functions with registered events as having a valid profile.
638   for (auto &BFI : BC.getBinaryFunctions()) {
639     BinaryFunction &BF = BFI.second;
640     if (getBranchData(BF)) {
641       const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
642                                                 : BinaryFunction::PF_LBR;
643       BF.markProfiled(Flags);
644     }
645   }
646 
647   // Release intermediate storage.
648   clear(BranchLBRs);
649   clear(FallthroughLBRs);
650   clear(AggregatedLBRs);
651   clear(BasicSamples);
652   clear(MemSamples);
653 }
654 
655 BinaryFunction *
656 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
657   if (!BC->containsAddress(Address))
658     return nullptr;
659 
660   return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
661                                                 /*UseMaxSize=*/true);
662 }
663 
664 StringRef DataAggregator::getLocationName(BinaryFunction &Func,
665                                           uint64_t Count) {
666   if (!BAT)
667     return Func.getOneName();
668 
669   const BinaryFunction *OrigFunc = &Func;
670   if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
671     NumColdSamples += Count;
672     BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
673     if (HotFunc)
674       OrigFunc = HotFunc;
675   }
676   // If it is a local function, prefer the name containing the file name where
677   // the local function was declared
678   for (StringRef AlternativeName : OrigFunc->getNames()) {
679     size_t FileNameIdx = AlternativeName.find('/');
680     // Confirm the alternative name has the pattern Symbol/FileName/1 before
681     // using it
682     if (FileNameIdx == StringRef::npos ||
683         AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
684       continue;
685     return AlternativeName;
686   }
687   return OrigFunc->getOneName();
688 }
689 
690 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
691                               uint64_t Count) {
692   auto I = NamesToSamples.find(Func.getOneName());
693   if (I == NamesToSamples.end()) {
694     bool Success;
695     StringRef LocName = getLocationName(Func, Count);
696     std::tie(I, Success) = NamesToSamples.insert(
697         std::make_pair(Func.getOneName(),
698                        FuncSampleData(LocName, FuncSampleData::ContainerTy())));
699   }
700 
701   Address -= Func.getAddress();
702   if (BAT)
703     Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false);
704 
705   I->second.bumpCount(Address, Count);
706   return true;
707 }
708 
709 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
710                                    uint64_t To, uint64_t Count,
711                                    uint64_t Mispreds) {
712   FuncBranchData *AggrData = getBranchData(Func);
713   if (!AggrData) {
714     AggrData = &NamesToBranches[Func.getOneName()];
715     AggrData->Name = getLocationName(Func, Count);
716     setBranchData(Func, AggrData);
717   }
718 
719   From -= Func.getAddress();
720   To -= Func.getAddress();
721   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName()
722                     << " @ " << Twine::utohexstr(From) << " -> "
723                     << Func.getPrintName() << " @ " << Twine::utohexstr(To)
724                     << '\n');
725   if (BAT) {
726     From = BAT->translate(Func, From, /*IsBranchSrc=*/true);
727     To = BAT->translate(Func, To, /*IsBranchSrc=*/false);
728     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
729                       << Func.getPrintName() << " @ " << Twine::utohexstr(From)
730                       << " -> " << Func.getPrintName() << " @ "
731                       << Twine::utohexstr(To) << '\n');
732   }
733 
734   AggrData->bumpBranchCount(From, To, Count, Mispreds);
735   return true;
736 }
737 
738 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
739                                    BinaryFunction *ToFunc, uint64_t From,
740                                    uint64_t To, uint64_t Count,
741                                    uint64_t Mispreds) {
742   FuncBranchData *FromAggrData = nullptr;
743   FuncBranchData *ToAggrData = nullptr;
744   StringRef SrcFunc;
745   StringRef DstFunc;
746   if (FromFunc) {
747     SrcFunc = getLocationName(*FromFunc, Count);
748     FromAggrData = getBranchData(*FromFunc);
749     if (!FromAggrData) {
750       FromAggrData = &NamesToBranches[FromFunc->getOneName()];
751       FromAggrData->Name = SrcFunc;
752       setBranchData(*FromFunc, FromAggrData);
753     }
754     From -= FromFunc->getAddress();
755     if (BAT)
756       From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true);
757 
758     recordExit(*FromFunc, From, Mispreds, Count);
759   }
760   if (ToFunc) {
761     DstFunc = getLocationName(*ToFunc, 0);
762     ToAggrData = getBranchData(*ToFunc);
763     if (!ToAggrData) {
764       ToAggrData = &NamesToBranches[ToFunc->getOneName()];
765       ToAggrData->Name = DstFunc;
766       setBranchData(*ToFunc, ToAggrData);
767     }
768     To -= ToFunc->getAddress();
769     if (BAT)
770       To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false);
771 
772     recordEntry(*ToFunc, To, Mispreds, Count);
773   }
774 
775   if (FromAggrData)
776     FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
777                                 Count, Mispreds);
778   if (ToAggrData)
779     ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
780                                Count, Mispreds);
781   return true;
782 }
783 
784 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
785                               uint64_t Mispreds) {
786   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
787   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
788   if (!FromFunc && !ToFunc)
789     return false;
790 
791   if (FromFunc == ToFunc) {
792     recordBranch(*FromFunc, From - FromFunc->getAddress(),
793                  To - FromFunc->getAddress(), Count, Mispreds);
794     return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
795   }
796 
797   return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
798 }
799 
800 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
801                              uint64_t Count) {
802   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
803   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
804   if (!FromFunc || !ToFunc) {
805     LLVM_DEBUG(
806         dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
807                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
808                << " and ending in " << ToFunc->getPrintName() << " @ "
809                << ToFunc->getPrintName() << " @ "
810                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
811     NumLongRangeTraces += Count;
812     return false;
813   }
814   if (FromFunc != ToFunc) {
815     NumInvalidTraces += Count;
816     LLVM_DEBUG(
817         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
818                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
819                << " and ending in " << ToFunc->getPrintName() << " @ "
820                << ToFunc->getPrintName() << " @ "
821                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
822     return false;
823   }
824 
825   Optional<BoltAddressTranslation::FallthroughListTy> FTs =
826       BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From)
827           : getFallthroughsInTrace(*FromFunc, First, Second, Count);
828   if (!FTs) {
829     LLVM_DEBUG(
830         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
831                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
832                << " and ending in " << ToFunc->getPrintName() << " @ "
833                << ToFunc->getPrintName() << " @ "
834                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
835     NumInvalidTraces += Count;
836     return false;
837   }
838 
839   LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
840                     << FromFunc->getPrintName() << ":"
841                     << Twine::utohexstr(First.To) << " to "
842                     << Twine::utohexstr(Second.From) << ".\n");
843   for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
844     doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
845                   Pair.second + FromFunc->getAddress(), Count, false);
846 
847   return true;
848 }
849 
850 bool DataAggregator::recordTrace(
851     BinaryFunction &BF,
852     const LBREntry &FirstLBR,
853     const LBREntry &SecondLBR,
854     uint64_t Count,
855     SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const {
856   BinaryContext &BC = BF.getBinaryContext();
857 
858   if (!BF.isSimple())
859     return false;
860 
861   assert(BF.hasCFG() && "can only record traces in CFG state");
862 
863   // Offsets of the trace within this function.
864   const uint64_t From = FirstLBR.To - BF.getAddress();
865   const uint64_t To = SecondLBR.From - BF.getAddress();
866 
867   if (From > To)
868     return false;
869 
870   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
871   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
872 
873   if (!FromBB || !ToBB)
874     return false;
875 
876   // Adjust FromBB if the first LBR is a return from the last instruction in
877   // the previous block (that instruction should be a call).
878   if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
879       !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
880     BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1];
881     if (PrevBB->getSuccessor(FromBB->getLabel())) {
882       const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
883       if (Instr && BC.MIB->isCall(*Instr))
884         FromBB = PrevBB;
885       else
886         LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
887                           << '\n');
888     } else {
889       LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
890     }
891   }
892 
893   // Fill out information for fall-through edges. The From and To could be
894   // within the same basic block, e.g. when two call instructions are in the
895   // same block. In this case we skip the processing.
896   if (FromBB == ToBB)
897     return true;
898 
899   // Process blocks in the original layout order.
900   BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()];
901   assert(BB == FromBB && "index mismatch");
902   while (BB != ToBB) {
903     BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1];
904     assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
905 
906     // Check for bad LBRs.
907     if (!BB->getSuccessor(NextBB->getLabel())) {
908       LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
909                         << "  " << FirstLBR << '\n'
910                         << "  " << SecondLBR << '\n');
911       return false;
912     }
913 
914     // Record fall-through jumps
915     BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB);
916     BI.Count += Count;
917 
918     if (Branches) {
919       const MCInst *Instr = BB->getLastNonPseudoInstr();
920       uint64_t Offset = 0;
921       if (Instr)
922         Offset = BC.MIB->getOffsetWithDefault(*Instr, 0);
923       else
924         Offset = BB->getOffset();
925 
926       Branches->emplace_back(Offset, NextBB->getOffset());
927     }
928 
929     BB = NextBB;
930   }
931 
932   return true;
933 }
934 
935 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
936 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
937                                        const LBREntry &FirstLBR,
938                                        const LBREntry &SecondLBR,
939                                        uint64_t Count) const {
940   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
941 
942   if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res))
943     return NoneType();
944 
945   return Res;
946 }
947 
948 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
949                                  uint64_t Count) const {
950   if (To > BF.getSize())
951     return false;
952 
953   if (!BF.hasProfile())
954     BF.ExecutionCount = 0;
955 
956   BinaryBasicBlock *EntryBB = nullptr;
957   if (To == 0) {
958     BF.ExecutionCount += Count;
959     if (!BF.empty())
960       EntryBB = &BF.front();
961   } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
962     if (BB->isEntryPoint())
963       EntryBB = BB;
964   }
965 
966   if (EntryBB)
967     EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
968 
969   return true;
970 }
971 
972 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
973                                 uint64_t Count) const {
974   if (!BF.isSimple() || From > BF.getSize())
975     return false;
976 
977   if (!BF.hasProfile())
978     BF.ExecutionCount = 0;
979 
980   return true;
981 }
982 
983 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
984   LBREntry Res;
985   ErrorOr<StringRef> FromStrRes = parseString('/');
986   if (std::error_code EC = FromStrRes.getError())
987     return EC;
988   StringRef OffsetStr = FromStrRes.get();
989   if (OffsetStr.getAsInteger(0, Res.From)) {
990     reportError("expected hexadecimal number with From address");
991     Diag << "Found: " << OffsetStr << "\n";
992     return make_error_code(llvm::errc::io_error);
993   }
994 
995   ErrorOr<StringRef> ToStrRes = parseString('/');
996   if (std::error_code EC = ToStrRes.getError())
997     return EC;
998   OffsetStr = ToStrRes.get();
999   if (OffsetStr.getAsInteger(0, Res.To)) {
1000     reportError("expected hexadecimal number with To address");
1001     Diag << "Found: " << OffsetStr << "\n";
1002     return make_error_code(llvm::errc::io_error);
1003   }
1004 
1005   ErrorOr<StringRef> MispredStrRes = parseString('/');
1006   if (std::error_code EC = MispredStrRes.getError())
1007     return EC;
1008   StringRef MispredStr = MispredStrRes.get();
1009   if (MispredStr.size() != 1 ||
1010       (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1011     reportError("expected single char for mispred bit");
1012     Diag << "Found: " << MispredStr << "\n";
1013     return make_error_code(llvm::errc::io_error);
1014   }
1015   Res.Mispred = MispredStr[0] == 'M';
1016 
1017   static bool MispredWarning = true;
1018   if (MispredStr[0] == '-' && MispredWarning) {
1019     errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1020     MispredWarning = false;
1021   }
1022 
1023   ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1024   if (std::error_code EC = Rest.getError())
1025     return EC;
1026   if (Rest.get().size() < 5) {
1027     reportError("expected rest of LBR entry");
1028     Diag << "Found: " << Rest.get() << "\n";
1029     return make_error_code(llvm::errc::io_error);
1030   }
1031   return Res;
1032 }
1033 
1034 bool DataAggregator::checkAndConsumeFS() {
1035   if (ParsingBuf[0] != FieldSeparator)
1036     return false;
1037 
1038   ParsingBuf = ParsingBuf.drop_front(1);
1039   Col += 1;
1040   return true;
1041 }
1042 
1043 void DataAggregator::consumeRestOfLine() {
1044   size_t LineEnd = ParsingBuf.find_first_of('\n');
1045   if (LineEnd == StringRef::npos) {
1046     ParsingBuf = StringRef();
1047     Col = 0;
1048     Line += 1;
1049     return;
1050   }
1051   ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1052   Col = 0;
1053   Line += 1;
1054 }
1055 
1056 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1057   PerfBranchSample Res;
1058 
1059   while (checkAndConsumeFS()) {
1060   }
1061 
1062   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1063   if (std::error_code EC = PIDRes.getError())
1064     return EC;
1065   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1066   if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
1067     consumeRestOfLine();
1068     return make_error_code(errc::no_such_process);
1069   }
1070 
1071   while (checkAndConsumeFS()) {
1072   }
1073 
1074   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1075   if (std::error_code EC = PCRes.getError())
1076     return EC;
1077   Res.PC = PCRes.get();
1078 
1079   if (checkAndConsumeNewLine())
1080     return Res;
1081 
1082   while (!checkAndConsumeNewLine()) {
1083     checkAndConsumeFS();
1084 
1085     ErrorOr<LBREntry> LBRRes = parseLBREntry();
1086     if (std::error_code EC = LBRRes.getError())
1087       return EC;
1088     LBREntry LBR = LBRRes.get();
1089     if (ignoreKernelInterrupt(LBR))
1090       continue;
1091     if (!BC->HasFixedLoadAddress)
1092       adjustLBR(LBR, MMapInfoIter->second);
1093     Res.LBR.push_back(LBR);
1094   }
1095 
1096   return Res;
1097 }
1098 
1099 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1100   while (checkAndConsumeFS()) {
1101   }
1102 
1103   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1104   if (std::error_code EC = PIDRes.getError())
1105     return EC;
1106 
1107   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1108   if (MMapInfoIter == BinaryMMapInfo.end()) {
1109     consumeRestOfLine();
1110     return PerfBasicSample{StringRef(), 0};
1111   }
1112 
1113   while (checkAndConsumeFS()) {
1114   }
1115 
1116   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1117   if (std::error_code EC = Event.getError())
1118     return EC;
1119 
1120   while (checkAndConsumeFS()) {
1121   }
1122 
1123   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1124   if (std::error_code EC = AddrRes.getError())
1125     return EC;
1126 
1127   if (!checkAndConsumeNewLine()) {
1128     reportError("expected end of line");
1129     return make_error_code(llvm::errc::io_error);
1130   }
1131 
1132   uint64_t Address = *AddrRes;
1133   if (!BC->HasFixedLoadAddress)
1134     adjustAddress(Address, MMapInfoIter->second);
1135 
1136   return PerfBasicSample{Event.get(), Address};
1137 }
1138 
1139 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1140   PerfMemSample Res{0, 0};
1141 
1142   while (checkAndConsumeFS()) {
1143   }
1144 
1145   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1146   if (std::error_code EC = PIDRes.getError())
1147     return EC;
1148 
1149   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1150   if (MMapInfoIter == BinaryMMapInfo.end()) {
1151     consumeRestOfLine();
1152     return Res;
1153   }
1154 
1155   while (checkAndConsumeFS()) {
1156   }
1157 
1158   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1159   if (std::error_code EC = Event.getError())
1160     return EC;
1161   if (Event.get().find("mem-loads") == StringRef::npos) {
1162     consumeRestOfLine();
1163     return Res;
1164   }
1165 
1166   while (checkAndConsumeFS()) {
1167   }
1168 
1169   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1170   if (std::error_code EC = AddrRes.getError())
1171     return EC;
1172 
1173   while (checkAndConsumeFS()) {
1174   }
1175 
1176   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1177   if (std::error_code EC = PCRes.getError()) {
1178     consumeRestOfLine();
1179     return EC;
1180   }
1181 
1182   if (!checkAndConsumeNewLine()) {
1183     reportError("expected end of line");
1184     return make_error_code(llvm::errc::io_error);
1185   }
1186 
1187   uint64_t Address = *AddrRes;
1188   if (!BC->HasFixedLoadAddress)
1189     adjustAddress(Address, MMapInfoIter->second);
1190 
1191   return PerfMemSample{PCRes.get(), Address};
1192 }
1193 
1194 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1195   auto parseOffset = [this]() -> ErrorOr<Location> {
1196     ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1197     if (std::error_code EC = Res.getError())
1198       return EC;
1199     return Location(Res.get());
1200   };
1201 
1202   size_t Sep = ParsingBuf.find_first_of(" \n");
1203   if (Sep == StringRef::npos)
1204     return parseOffset();
1205   StringRef LookAhead = ParsingBuf.substr(0, Sep);
1206   if (LookAhead.find_first_of(":") == StringRef::npos)
1207     return parseOffset();
1208 
1209   ErrorOr<StringRef> BuildID = parseString(':');
1210   if (std::error_code EC = BuildID.getError())
1211     return EC;
1212   ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1213   if (std::error_code EC = Offset.getError())
1214     return EC;
1215   return Location(true, BuildID.get(), Offset.get());
1216 }
1217 
1218 ErrorOr<DataAggregator::AggregatedLBREntry>
1219 DataAggregator::parseAggregatedLBREntry() {
1220   while (checkAndConsumeFS()) {
1221   }
1222 
1223   ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1224   if (std::error_code EC = TypeOrErr.getError())
1225     return EC;
1226   auto Type = AggregatedLBREntry::BRANCH;
1227   if (TypeOrErr.get() == "B") {
1228     Type = AggregatedLBREntry::BRANCH;
1229   } else if (TypeOrErr.get() == "F") {
1230     Type = AggregatedLBREntry::FT;
1231   } else if (TypeOrErr.get() == "f") {
1232     Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1233   } else {
1234     reportError("expected B, F or f");
1235     return make_error_code(llvm::errc::io_error);
1236   }
1237 
1238   while (checkAndConsumeFS()) {
1239   }
1240   ErrorOr<Location> From = parseLocationOrOffset();
1241   if (std::error_code EC = From.getError())
1242     return EC;
1243 
1244   while (checkAndConsumeFS()) {
1245   }
1246   ErrorOr<Location> To = parseLocationOrOffset();
1247   if (std::error_code EC = To.getError())
1248     return EC;
1249 
1250   while (checkAndConsumeFS()) {
1251   }
1252   ErrorOr<int64_t> Frequency =
1253       parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1254   if (std::error_code EC = Frequency.getError())
1255     return EC;
1256 
1257   uint64_t Mispreds = 0;
1258   if (Type == AggregatedLBREntry::BRANCH) {
1259     while (checkAndConsumeFS()) {
1260     }
1261     ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1262     if (std::error_code EC = MispredsOrErr.getError())
1263       return EC;
1264     Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1265   }
1266 
1267   if (!checkAndConsumeNewLine()) {
1268     reportError("expected end of line");
1269     return make_error_code(llvm::errc::io_error);
1270   }
1271 
1272   return AggregatedLBREntry{From.get(), To.get(),
1273                             static_cast<uint64_t>(Frequency.get()), Mispreds,
1274                             Type};
1275 }
1276 
1277 bool DataAggregator::hasData() {
1278   if (ParsingBuf.size() == 0)
1279     return false;
1280 
1281   return true;
1282 }
1283 
1284 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1285   return opts::IgnoreInterruptLBR &&
1286          (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1287 }
1288 
1289 std::error_code DataAggregator::printLBRHeatMap() {
1290   outs() << "PERF2BOLT: parse branch events...\n";
1291   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1292                      TimerGroupDesc, opts::TimeAggregator);
1293 
1294   if (opts::LinuxKernelMode) {
1295     opts::HeatmapMaxAddress = 0xffffffffffffffff;
1296     opts::HeatmapMinAddress = KernelBaseAddr;
1297   }
1298   Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1299              opts::HeatmapMaxAddress, getTextSections(BC));
1300   uint64_t NumTotalSamples = 0;
1301 
1302   if (opts::BasicAggregation) {
1303     while (hasData()) {
1304       ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1305       if (std::error_code EC = SampleRes.getError()) {
1306         if (EC == errc::no_such_process)
1307           continue;
1308         return EC;
1309       }
1310       PerfBasicSample &Sample = SampleRes.get();
1311       HM.registerAddress(Sample.PC);
1312       NumTotalSamples++;
1313     }
1314     outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
1315   } else {
1316     while (hasData()) {
1317       ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1318       if (std::error_code EC = SampleRes.getError()) {
1319         if (EC == errc::no_such_process)
1320           continue;
1321         return EC;
1322       }
1323 
1324       PerfBranchSample &Sample = SampleRes.get();
1325 
1326       // LBRs are stored in reverse execution order. NextLBR refers to the next
1327       // executed branch record.
1328       const LBREntry *NextLBR = nullptr;
1329       for (const LBREntry &LBR : Sample.LBR) {
1330         if (NextLBR) {
1331           // Record fall-through trace.
1332           const uint64_t TraceFrom = LBR.To;
1333           const uint64_t TraceTo = NextLBR->From;
1334           ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1335         }
1336         NextLBR = &LBR;
1337       }
1338       if (!Sample.LBR.empty()) {
1339         HM.registerAddress(Sample.LBR.front().To);
1340         HM.registerAddress(Sample.LBR.back().From);
1341       }
1342       NumTotalSamples += Sample.LBR.size();
1343     }
1344     outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1345     outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1346   }
1347 
1348   if (!NumTotalSamples) {
1349     if (opts::BasicAggregation) {
1350       errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1351                 "Cannot build heatmap.";
1352     } else {
1353       errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1354                 "Cannot build heatmap. Use -nl for building heatmap from "
1355                 "basic events.\n";
1356     }
1357     exit(1);
1358   }
1359 
1360   outs() << "HEATMAP: building heat map...\n";
1361 
1362   for (const auto &LBR : FallthroughLBRs) {
1363     const Trace &Trace = LBR.first;
1364     const FTInfo &Info = LBR.second;
1365     HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1366   }
1367 
1368   if (HM.getNumInvalidRanges())
1369     outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1370 
1371   if (!HM.size()) {
1372     errs() << "HEATMAP-ERROR: no valid traces registered\n";
1373     exit(1);
1374   }
1375 
1376   HM.print(opts::OutputFilename);
1377   if (opts::OutputFilename == "-")
1378     HM.printCDF(opts::OutputFilename);
1379   else
1380     HM.printCDF(opts::OutputFilename + ".csv");
1381   if (opts::OutputFilename == "-")
1382     HM.printSectionHotness(opts::OutputFilename);
1383   else
1384     HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv");
1385 
1386   return std::error_code();
1387 }
1388 
1389 std::error_code DataAggregator::parseBranchEvents() {
1390   outs() << "PERF2BOLT: parse branch events...\n";
1391   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1392                      TimerGroupDesc, opts::TimeAggregator);
1393 
1394   uint64_t NumTotalSamples = 0;
1395   uint64_t NumEntries = 0;
1396   uint64_t NumSamples = 0;
1397   uint64_t NumSamplesNoLBR = 0;
1398   uint64_t NumTraces = 0;
1399   bool NeedsSkylakeFix = false;
1400 
1401   while (hasData() && NumTotalSamples < opts::MaxSamples) {
1402     ++NumTotalSamples;
1403 
1404     ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1405     if (std::error_code EC = SampleRes.getError()) {
1406       if (EC == errc::no_such_process)
1407         continue;
1408       return EC;
1409     }
1410     ++NumSamples;
1411 
1412     PerfBranchSample &Sample = SampleRes.get();
1413     if (opts::WriteAutoFDOData)
1414       ++BasicSamples[Sample.PC];
1415 
1416     if (Sample.LBR.empty()) {
1417       ++NumSamplesNoLBR;
1418       continue;
1419     }
1420 
1421     NumEntries += Sample.LBR.size();
1422     if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1423       errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1424       NeedsSkylakeFix = true;
1425     }
1426 
1427     // LBRs are stored in reverse execution order. NextPC refers to the next
1428     // recorded executed PC.
1429     uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1430     uint32_t NumEntry = 0;
1431     for (const LBREntry &LBR : Sample.LBR) {
1432       ++NumEntry;
1433       // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1434       // sometimes record entry 32 as an exact copy of entry 31. This will cause
1435       // us to likely record an invalid trace and generate a stale function for
1436       // BAT mode (non BAT disassembles the function and is able to ignore this
1437       // trace at aggregation time). Drop first 2 entries (last two, in
1438       // chronological order)
1439       if (NeedsSkylakeFix && NumEntry <= 2)
1440         continue;
1441       if (NextPC) {
1442         // Record fall-through trace.
1443         const uint64_t TraceFrom = LBR.To;
1444         const uint64_t TraceTo = NextPC;
1445         const BinaryFunction *TraceBF =
1446             getBinaryFunctionContainingAddress(TraceFrom);
1447         if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1448           FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1449           if (TraceBF->containsAddress(LBR.From))
1450             ++Info.InternCount;
1451           else
1452             ++Info.ExternCount;
1453         } else {
1454           if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) {
1455             LLVM_DEBUG(dbgs()
1456                        << "Invalid trace starting in "
1457                        << TraceBF->getPrintName() << " @ "
1458                        << Twine::utohexstr(TraceFrom - TraceBF->getAddress())
1459                        << " and ending @ " << Twine::utohexstr(TraceTo)
1460                        << '\n');
1461             ++NumInvalidTraces;
1462           } else {
1463             LLVM_DEBUG(dbgs()
1464                        << "Out of range trace starting in "
1465                        << (TraceBF ? TraceBF->getPrintName() : "None") << " @ "
1466                        << Twine::utohexstr(
1467                               TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1468                        << " and ending in "
1469                        << (getBinaryFunctionContainingAddress(TraceTo)
1470                                ? getBinaryFunctionContainingAddress(TraceTo)
1471                                      ->getPrintName()
1472                                : "None")
1473                        << " @ "
1474                        << Twine::utohexstr(
1475                               TraceTo -
1476                               (getBinaryFunctionContainingAddress(TraceTo)
1477                                    ? getBinaryFunctionContainingAddress(TraceTo)
1478                                          ->getAddress()
1479                                    : 0))
1480                        << '\n');
1481             ++NumLongRangeTraces;
1482           }
1483         }
1484         ++NumTraces;
1485       }
1486       NextPC = LBR.From;
1487 
1488       uint64_t From = LBR.From;
1489       if (!getBinaryFunctionContainingAddress(From))
1490         From = 0;
1491       uint64_t To = LBR.To;
1492       if (!getBinaryFunctionContainingAddress(To))
1493         To = 0;
1494       if (!From && !To)
1495         continue;
1496       BranchInfo &Info = BranchLBRs[Trace(From, To)];
1497       ++Info.TakenCount;
1498       Info.MispredCount += LBR.Mispred;
1499     }
1500   }
1501 
1502   for (const auto &LBR : BranchLBRs) {
1503     const Trace &Trace = LBR.first;
1504     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From))
1505       BF->setHasProfileAvailable();
1506     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To))
1507       BF->setHasProfileAvailable();
1508   }
1509 
1510   auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1511     OS << " (";
1512     if (OS.has_colors()) {
1513       if (Percent > T2)
1514         OS.changeColor(raw_ostream::RED);
1515       else if (Percent > T1)
1516         OS.changeColor(raw_ostream::YELLOW);
1517       else
1518         OS.changeColor(raw_ostream::GREEN);
1519     }
1520     OS << format("%.1f%%", Percent);
1521     if (OS.has_colors())
1522       OS.resetColor();
1523     OS << ")";
1524   };
1525 
1526   outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1527          << " LBR entries\n";
1528   if (NumTotalSamples) {
1529     if (NumSamples && NumSamplesNoLBR == NumSamples) {
1530       // Note: we don't know if perf2bolt is being used to parse memory samples
1531       // at this point. In this case, it is OK to parse zero LBRs.
1532       errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1533                 "LBR. Record profile with perf record -j any or run perf2bolt "
1534                 "in no-LBR mode with -nl (the performance improvement in -nl "
1535                 "mode may be limited)\n";
1536     } else {
1537       const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1538       const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1539       outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1540       printColored(outs(), PercentIgnored, 20, 50);
1541       outs() << " were ignored\n";
1542       if (PercentIgnored > 50.0f)
1543         errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1544                   "were attributed to the input binary\n";
1545     }
1546   }
1547   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1548          << NumInvalidTraces;
1549   float Perc = 0.0f;
1550   if (NumTraces > 0) {
1551     Perc = NumInvalidTraces * 100.0f / NumTraces;
1552     printColored(outs(), Perc, 5, 10);
1553   }
1554   outs() << "\n";
1555   if (Perc > 10.0f)
1556     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1557               "binary is probably not the same binary used during profiling "
1558               "collection. The generated data may be ineffective for improving "
1559               "performance.\n\n";
1560 
1561   outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1562          << NumLongRangeTraces;
1563   if (NumTraces > 0)
1564     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1565   outs() << "\n";
1566 
1567   if (NumColdSamples > 0) {
1568     const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1569     outs() << "PERF2BOLT: " << NumColdSamples
1570            << format(" (%.1f%%)", ColdSamples)
1571            << " samples recorded in cold regions of split functions.\n";
1572     if (ColdSamples > 5.0f)
1573       outs()
1574           << "WARNING: The BOLT-processed binary where samples were collected "
1575              "likely used bad data or your service observed a large shift in "
1576              "profile. You may want to audit this.\n";
1577   }
1578 
1579   return std::error_code();
1580 }
1581 
1582 void DataAggregator::processBranchEvents() {
1583   outs() << "PERF2BOLT: processing branch events...\n";
1584   NamedRegionTimer T("processBranch", "Processing branch events",
1585                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1586 
1587   for (const auto &AggrLBR : FallthroughLBRs) {
1588     const Trace &Loc = AggrLBR.first;
1589     const FTInfo &Info = AggrLBR.second;
1590     LBREntry First{Loc.From, Loc.From, false};
1591     LBREntry Second{Loc.To, Loc.To, false};
1592     if (Info.InternCount)
1593       doTrace(First, Second, Info.InternCount);
1594     if (Info.ExternCount) {
1595       First.From = 0;
1596       doTrace(First, Second, Info.ExternCount);
1597     }
1598   }
1599 
1600   for (const auto &AggrLBR : BranchLBRs) {
1601     const Trace &Loc = AggrLBR.first;
1602     const BranchInfo &Info = AggrLBR.second;
1603     doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
1604   }
1605 }
1606 
1607 std::error_code DataAggregator::parseBasicEvents() {
1608   outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1609   NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1610                      TimerGroupDesc, opts::TimeAggregator);
1611   while (hasData()) {
1612     ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1613     if (std::error_code EC = Sample.getError())
1614       return EC;
1615 
1616     if (!Sample->PC)
1617       continue;
1618 
1619     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1620       BF->setHasProfileAvailable();
1621 
1622     ++BasicSamples[Sample->PC];
1623     EventNames.insert(Sample->EventName);
1624   }
1625 
1626   return std::error_code();
1627 }
1628 
1629 void DataAggregator::processBasicEvents() {
1630   outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1631   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1632                      TimerGroupDesc, opts::TimeAggregator);
1633   uint64_t OutOfRangeSamples = 0;
1634   uint64_t NumSamples = 0;
1635   for (auto &Sample : BasicSamples) {
1636     const uint64_t PC = Sample.first;
1637     const uint64_t HitCount = Sample.second;
1638     NumSamples += HitCount;
1639     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1640     if (!Func) {
1641       OutOfRangeSamples += HitCount;
1642       continue;
1643     }
1644 
1645     doSample(*Func, PC, HitCount);
1646   }
1647   outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1648 
1649   outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1650          << OutOfRangeSamples;
1651   float Perc = 0.0f;
1652   if (NumSamples > 0) {
1653     outs() << " (";
1654     Perc = OutOfRangeSamples * 100.0f / NumSamples;
1655     if (outs().has_colors()) {
1656       if (Perc > 60.0f)
1657         outs().changeColor(raw_ostream::RED);
1658       else if (Perc > 40.0f)
1659         outs().changeColor(raw_ostream::YELLOW);
1660       else
1661         outs().changeColor(raw_ostream::GREEN);
1662     }
1663     outs() << format("%.1f%%", Perc);
1664     if (outs().has_colors())
1665       outs().resetColor();
1666     outs() << ")";
1667   }
1668   outs() << "\n";
1669   if (Perc > 80.0f)
1670     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1671               "binary is probably not the same binary used during profiling "
1672               "collection. The generated data may be ineffective for improving "
1673               "performance.\n\n";
1674 }
1675 
1676 std::error_code DataAggregator::parseMemEvents() {
1677   outs() << "PERF2BOLT: parsing memory events...\n";
1678   NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1679                      TimerGroupDesc, opts::TimeAggregator);
1680   while (hasData()) {
1681     ErrorOr<PerfMemSample> Sample = parseMemSample();
1682     if (std::error_code EC = Sample.getError())
1683       return EC;
1684 
1685     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1686       BF->setHasProfileAvailable();
1687 
1688     MemSamples.emplace_back(std::move(Sample.get()));
1689   }
1690 
1691   return std::error_code();
1692 }
1693 
1694 void DataAggregator::processMemEvents() {
1695   NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1696                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1697   for (const PerfMemSample &Sample : MemSamples) {
1698     uint64_t PC = Sample.PC;
1699     uint64_t Addr = Sample.Addr;
1700     StringRef FuncName;
1701     StringRef MemName;
1702 
1703     // Try to resolve symbol for PC
1704     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1705     if (!Func) {
1706       LLVM_DEBUG(if (PC != 0) {
1707         dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x"
1708                << Twine::utohexstr(Addr) << "\n";
1709       });
1710       continue;
1711     }
1712 
1713     FuncName = Func->getOneName();
1714     PC -= Func->getAddress();
1715 
1716     // Try to resolve symbol for memory load
1717     if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1718       MemName = BD->getName();
1719       Addr -= BD->getAddress();
1720     } else if (opts::FilterMemProfile) {
1721       // Filter out heap/stack accesses
1722       continue;
1723     }
1724 
1725     const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1726     const Location AddrLoc(!MemName.empty(), MemName, Addr);
1727 
1728     FuncMemData *MemData = &NamesToMemEvents[FuncName];
1729     setMemData(*Func, MemData);
1730     MemData->update(FuncLoc, AddrLoc);
1731     LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1732   }
1733 }
1734 
1735 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1736   outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1737   NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1738                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1739   while (hasData()) {
1740     ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1741     if (std::error_code EC = AggrEntry.getError())
1742       return EC;
1743 
1744     if (BinaryFunction *BF =
1745             getBinaryFunctionContainingAddress(AggrEntry->From.Offset))
1746       BF->setHasProfileAvailable();
1747     if (BinaryFunction *BF =
1748             getBinaryFunctionContainingAddress(AggrEntry->To.Offset))
1749       BF->setHasProfileAvailable();
1750 
1751     AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1752   }
1753 
1754   return std::error_code();
1755 }
1756 
1757 void DataAggregator::processPreAggregated() {
1758   outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1759   NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1760                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1761 
1762   uint64_t NumTraces = 0;
1763   for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1764     switch (AggrEntry.EntryType) {
1765     case AggregatedLBREntry::BRANCH:
1766       doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1767                AggrEntry.Mispreds);
1768       break;
1769     case AggregatedLBREntry::FT:
1770     case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1771       LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1772                          ? AggrEntry.From.Offset
1773                          : 0,
1774                      AggrEntry.From.Offset, false};
1775       LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1776       doTrace(First, Second, AggrEntry.Count);
1777       NumTraces += AggrEntry.Count;
1778       break;
1779     }
1780     }
1781   }
1782 
1783   outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1784          << " aggregated LBR entries\n";
1785   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1786          << NumInvalidTraces;
1787   float Perc = 0.0f;
1788   if (NumTraces > 0) {
1789     outs() << " (";
1790     Perc = NumInvalidTraces * 100.0f / NumTraces;
1791     if (outs().has_colors()) {
1792       if (Perc > 10.0f)
1793         outs().changeColor(raw_ostream::RED);
1794       else if (Perc > 5.0f)
1795         outs().changeColor(raw_ostream::YELLOW);
1796       else
1797         outs().changeColor(raw_ostream::GREEN);
1798     }
1799     outs() << format("%.1f%%", Perc);
1800     if (outs().has_colors())
1801       outs().resetColor();
1802     outs() << ")";
1803   }
1804   outs() << "\n";
1805   if (Perc > 10.0f)
1806     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1807               "binary is probably not the same binary used during profiling "
1808               "collection. The generated data may be ineffective for improving "
1809               "performance.\n\n";
1810 
1811   outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1812          << NumLongRangeTraces;
1813   if (NumTraces > 0)
1814     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1815   outs() << "\n";
1816 }
1817 
1818 Optional<int32_t> DataAggregator::parseCommExecEvent() {
1819   size_t LineEnd = ParsingBuf.find_first_of("\n");
1820   if (LineEnd == StringRef::npos) {
1821     reportError("expected rest of line");
1822     Diag << "Found: " << ParsingBuf << "\n";
1823     return NoneType();
1824   }
1825   StringRef Line = ParsingBuf.substr(0, LineEnd);
1826 
1827   size_t Pos = Line.find("PERF_RECORD_COMM exec");
1828   if (Pos == StringRef::npos)
1829     return NoneType();
1830   Line = Line.drop_front(Pos);
1831 
1832   // Line:
1833   //  PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1834   StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1835   int32_t PID;
1836   if (PIDStr.getAsInteger(10, PID)) {
1837     reportError("expected PID");
1838     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1839     return NoneType();
1840   }
1841 
1842   return PID;
1843 }
1844 
1845 namespace {
1846 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1847   const StringRef SecTimeStr = TimeStr.split('.').first;
1848   const StringRef USecTimeStr = TimeStr.split('.').second;
1849   uint64_t SecTime;
1850   uint64_t USecTime;
1851   if (SecTimeStr.getAsInteger(10, SecTime) ||
1852       USecTimeStr.getAsInteger(10, USecTime))
1853     return NoneType();
1854   return SecTime * 1000000ULL + USecTime;
1855 }
1856 }
1857 
1858 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1859   while (checkAndConsumeFS()) {
1860   }
1861 
1862   size_t LineEnd = ParsingBuf.find_first_of("\n");
1863   if (LineEnd == StringRef::npos) {
1864     reportError("expected rest of line");
1865     Diag << "Found: " << ParsingBuf << "\n";
1866     return NoneType();
1867   }
1868   StringRef Line = ParsingBuf.substr(0, LineEnd);
1869 
1870   size_t Pos = Line.find("PERF_RECORD_FORK");
1871   if (Pos == StringRef::npos) {
1872     consumeRestOfLine();
1873     return NoneType();
1874   }
1875 
1876   ForkInfo FI;
1877 
1878   const StringRef TimeStr =
1879       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1880   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1881     FI.Time = *TimeRes;
1882   }
1883 
1884   Line = Line.drop_front(Pos);
1885 
1886   // Line:
1887   //  PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1888   const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1889   if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1890     reportError("expected PID");
1891     Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1892     return NoneType();
1893   }
1894 
1895   const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1896   if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1897     reportError("expected PID");
1898     Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1899     return NoneType();
1900   }
1901 
1902   consumeRestOfLine();
1903 
1904   return FI;
1905 }
1906 
1907 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1908 DataAggregator::parseMMapEvent() {
1909   while (checkAndConsumeFS()) {
1910   }
1911 
1912   MMapInfo ParsedInfo;
1913 
1914   size_t LineEnd = ParsingBuf.find_first_of("\n");
1915   if (LineEnd == StringRef::npos) {
1916     reportError("expected rest of line");
1917     Diag << "Found: " << ParsingBuf << "\n";
1918     return make_error_code(llvm::errc::io_error);
1919   }
1920   StringRef Line = ParsingBuf.substr(0, LineEnd);
1921 
1922   size_t Pos = Line.find("PERF_RECORD_MMAP2");
1923   if (Pos == StringRef::npos) {
1924     consumeRestOfLine();
1925     return std::make_pair(StringRef(), ParsedInfo);
1926   }
1927 
1928   // Line:
1929   //   {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1930 
1931   const StringRef TimeStr =
1932       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1933   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1934     ParsedInfo.Time = *TimeRes;
1935 
1936   Line = Line.drop_front(Pos);
1937 
1938   // Line:
1939   //   PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1940 
1941   StringRef FileName = Line.rsplit(FieldSeparator).second;
1942   if (FileName.startswith("//") || FileName.startswith("[")) {
1943     consumeRestOfLine();
1944     return std::make_pair(StringRef(), ParsedInfo);
1945   }
1946   FileName = sys::path::filename(FileName);
1947 
1948   const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
1949   if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
1950     reportError("expected PID");
1951     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1952     return make_error_code(llvm::errc::io_error);
1953   }
1954 
1955   const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
1956   if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) {
1957     reportError("expected base address");
1958     Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1959     return make_error_code(llvm::errc::io_error);
1960   }
1961 
1962   const StringRef SizeStr = Line.split('(').second.split(')').first;
1963   if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
1964     reportError("expected mmaped size");
1965     Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1966     return make_error_code(llvm::errc::io_error);
1967   }
1968 
1969   const StringRef OffsetStr =
1970       Line.split('@').second.ltrim().split(FieldSeparator).first;
1971   if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
1972     reportError("expected mmaped page-aligned offset");
1973     Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1974     return make_error_code(llvm::errc::io_error);
1975   }
1976 
1977   consumeRestOfLine();
1978 
1979   return std::make_pair(FileName, ParsedInfo);
1980 }
1981 
1982 std::error_code DataAggregator::parseMMapEvents() {
1983   outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1984   NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1985                      TimerGroupDesc, opts::TimeAggregator);
1986 
1987   std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1988   while (hasData()) {
1989     ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1990     if (std::error_code EC = FileMMapInfoRes.getError())
1991       return EC;
1992 
1993     std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1994     if (FileMMapInfo.second.PID == -1)
1995       continue;
1996 
1997     // Consider only the first mapping of the file for any given PID
1998     bool PIDExists = false;
1999     auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
2000     for (auto MI = Range.first; MI != Range.second; ++MI) {
2001       if (MI->second.PID == FileMMapInfo.second.PID) {
2002         PIDExists = true;
2003         break;
2004       }
2005     }
2006     if (PIDExists)
2007       continue;
2008 
2009     GlobalMMapInfo.insert(FileMMapInfo);
2010   }
2011 
2012   LLVM_DEBUG({
2013     dbgs() << "FileName -> mmap info:\n";
2014     for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo)
2015       dbgs() << "  " << Pair.first << " : " << Pair.second.PID << " [0x"
2016              << Twine::utohexstr(Pair.second.MMapAddress) << ", "
2017              << Twine::utohexstr(Pair.second.Size) << " @ "
2018              << Twine::utohexstr(Pair.second.Offset) << "]\n";
2019   });
2020 
2021   StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
2022   if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
2023     errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
2024            << "\" for profile matching\n";
2025     NameToUse = BuildIDBinaryName;
2026   }
2027 
2028   auto Range = GlobalMMapInfo.equal_range(NameToUse);
2029   for (auto I = Range.first; I != Range.second; ++I) {
2030     MMapInfo &MMapInfo = I->second;
2031     if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
2032       // Check that the binary mapping matches one of the segments.
2033       bool MatchFound = false;
2034       for (auto &KV : BC->SegmentMapInfo) {
2035         SegmentInfo &SegInfo = KV.second;
2036         // The mapping is page-aligned and hence the MMapAddress could be
2037         // different from the segment start address. We cannot know the page
2038         // size of the mapping, but we know it should not exceed the segment
2039         // alignment value. Hence we are performing an approximate check.
2040         if (SegInfo.Address >= MMapInfo.MMapAddress &&
2041             SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) {
2042           MatchFound = true;
2043           break;
2044         }
2045       }
2046       if (!MatchFound) {
2047         errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2048                << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n';
2049         continue;
2050       }
2051     }
2052 
2053     // Set base address for shared objects.
2054     if (!BC->HasFixedLoadAddress) {
2055       Optional<uint64_t> BaseAddress =
2056           BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset);
2057       if (!BaseAddress) {
2058         errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2059                   "binary when memory mapped at 0x"
2060                << Twine::utohexstr(MMapInfo.MMapAddress)
2061                << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
2062                << ". Ignoring profile data for this mapping\n";
2063         continue;
2064       } else {
2065         MMapInfo.BaseAddress = *BaseAddress;
2066       }
2067     }
2068 
2069     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2070   }
2071 
2072   if (BinaryMMapInfo.empty()) {
2073     if (errs().has_colors())
2074       errs().changeColor(raw_ostream::RED);
2075     errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2076            << BC->getFilename() << "\".";
2077     if (!GlobalMMapInfo.empty()) {
2078       errs() << " Profile for the following binary name(s) is available:\n";
2079       for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2080            I = GlobalMMapInfo.upper_bound(I->first))
2081         errs() << "  " << I->first << '\n';
2082       errs() << "Please rename the input binary.\n";
2083     } else {
2084       errs() << " Failed to extract any binary name from a profile.\n";
2085     }
2086     if (errs().has_colors())
2087       errs().resetColor();
2088 
2089     exit(1);
2090   }
2091 
2092   return std::error_code();
2093 }
2094 
2095 std::error_code DataAggregator::parseTaskEvents() {
2096   outs() << "PERF2BOLT: parsing perf-script task events output\n";
2097   NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2098                      TimerGroupDesc, opts::TimeAggregator);
2099 
2100   while (hasData()) {
2101     if (Optional<int32_t> CommInfo = parseCommExecEvent()) {
2102       // Remove forked child that ran execve
2103       auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2104       if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2105         BinaryMMapInfo.erase(MMapInfoIter);
2106       consumeRestOfLine();
2107       continue;
2108     }
2109 
2110     Optional<ForkInfo> ForkInfo = parseForkEvent();
2111     if (!ForkInfo)
2112       continue;
2113 
2114     if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2115       continue;
2116 
2117     if (ForkInfo->Time == 0) {
2118       // Process was forked and mmaped before perf ran. In this case the child
2119       // should have its own mmap entry unless it was execve'd.
2120       continue;
2121     }
2122 
2123     auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2124     if (MMapInfoIter == BinaryMMapInfo.end())
2125       continue;
2126 
2127     MMapInfo MMapInfo = MMapInfoIter->second;
2128     MMapInfo.PID = ForkInfo->ChildPID;
2129     MMapInfo.Forked = true;
2130     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2131   }
2132 
2133   outs() << "PERF2BOLT: input binary is associated with "
2134          << BinaryMMapInfo.size() << " PID(s)\n";
2135 
2136   LLVM_DEBUG({
2137     for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
2138       outs() << "  " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "")
2139              << ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x"
2140              << Twine::utohexstr(MMI.second.Size) << ")\n";
2141   });
2142 
2143   return std::error_code();
2144 }
2145 
2146 Optional<std::pair<StringRef, StringRef>>
2147 DataAggregator::parseNameBuildIDPair() {
2148   while (checkAndConsumeFS()) {
2149   }
2150 
2151   ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2152   if (std::error_code EC = BuildIDStr.getError())
2153     return NoneType();
2154 
2155   ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2156   if (std::error_code EC = NameStr.getError())
2157     return NoneType();
2158 
2159   consumeRestOfLine();
2160   return std::make_pair(NameStr.get(), BuildIDStr.get());
2161 }
2162 
2163 Optional<StringRef>
2164 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2165   while (hasData()) {
2166     Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair();
2167     if (!IDPair)
2168       return NoneType();
2169 
2170     if (IDPair->second.startswith(FileBuildID))
2171       return sys::path::filename(IDPair->first);
2172   }
2173   return NoneType();
2174 }
2175 
2176 std::error_code
2177 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2178   std::error_code EC;
2179   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2180   if (EC)
2181     return EC;
2182 
2183   bool WriteMemLocs = false;
2184 
2185   auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2186     if (WriteMemLocs)
2187       OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2188     else
2189       OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2190     OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2191             << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2192   };
2193 
2194   uint64_t BranchValues = 0;
2195   uint64_t MemValues = 0;
2196 
2197   if (BAT)
2198     OutFile << "boltedcollection\n";
2199   if (opts::BasicAggregation) {
2200     OutFile << "no_lbr";
2201     for (const StringMapEntry<NoneType> &Entry : EventNames)
2202       OutFile << " " << Entry.getKey();
2203     OutFile << "\n";
2204 
2205     for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
2206       for (const SampleInfo &SI : Func.getValue().Data) {
2207         writeLocation(SI.Loc);
2208         OutFile << SI.Hits << "\n";
2209         ++BranchValues;
2210       }
2211     }
2212   } else {
2213     for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
2214       for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) {
2215         writeLocation(BI.From);
2216         writeLocation(BI.To);
2217         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2218         ++BranchValues;
2219       }
2220       for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) {
2221         // Do not output if source is a known symbol, since this was already
2222         // accounted for in the source function
2223         if (BI.From.IsSymbol)
2224           continue;
2225         writeLocation(BI.From);
2226         writeLocation(BI.To);
2227         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2228         ++BranchValues;
2229       }
2230     }
2231 
2232     WriteMemLocs = true;
2233     for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
2234       for (const MemInfo &MemEvent : Func.getValue().Data) {
2235         writeLocation(MemEvent.Offset);
2236         writeLocation(MemEvent.Addr);
2237         OutFile << MemEvent.Count << "\n";
2238         ++MemValues;
2239       }
2240     }
2241   }
2242 
2243   outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2244          << " memory objects to " << OutputFilename << "\n";
2245 
2246   return std::error_code();
2247 }
2248 
2249 void DataAggregator::dump() const { DataReader::dump(); }
2250 
2251 void DataAggregator::dump(const LBREntry &LBR) const {
2252   Diag << "From: " << Twine::utohexstr(LBR.From)
2253        << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2254        << "\n";
2255 }
2256 
2257 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2258   Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2259   for (const LBREntry &LBR : Sample.LBR)
2260     dump(LBR);
2261 }
2262 
2263 void DataAggregator::dump(const PerfMemSample &Sample) const {
2264   Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
2265 }
2266