1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Profile/BoltAddressTranslation.h"
18 #include "bolt/Profile/Heatmap.h"
19 #include "bolt/Utils/CommandLineOpts.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/ScopeExit.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/Process.h"
27 #include "llvm/Support/Program.h"
28 #include "llvm/Support/Regex.h"
29 #include "llvm/Support/Timer.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <map>
32 #include <unordered_map>
33 #include <utility>
34 
35 #define DEBUG_TYPE "aggregator"
36 
37 using namespace llvm;
38 using namespace bolt;
39 
40 namespace opts {
41 
42 static cl::opt<bool>
43 BasicAggregation("nl",
44   cl::desc("aggregate basic samples (without LBR info)"),
45   cl::init(false),
46   cl::ZeroOrMore,
47   cl::cat(AggregatorCategory));
48 
49 static cl::opt<bool>
50 FilterMemProfile("filter-mem-profile",
51   cl::desc("if processing a memory profile, filter out stack or heap accesses "
52            "that won't be useful for BOLT to reduce profile file size"),
53   cl::init(true),
54   cl::cat(AggregatorCategory));
55 
56 static cl::opt<unsigned long long>
57 FilterPID("pid",
58   cl::desc("only use samples from process with specified PID"),
59   cl::init(0),
60   cl::Optional,
61   cl::cat(AggregatorCategory));
62 
63 static cl::opt<bool>
64 IgnoreBuildID("ignore-build-id",
65   cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
66   cl::init(false),
67   cl::cat(AggregatorCategory));
68 
69 static cl::opt<bool>
70 IgnoreInterruptLBR("ignore-interrupt-lbr",
71   cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
72   cl::init(true),
73   cl::ZeroOrMore,
74   cl::cat(AggregatorCategory));
75 
76 static cl::opt<unsigned long long>
77 MaxSamples("max-samples",
78   cl::init(-1ULL),
79   cl::desc("maximum number of samples to read from LBR profile"),
80   cl::Optional,
81   cl::Hidden,
82   cl::cat(AggregatorCategory));
83 
84 static cl::opt<bool>
85 ReadPreAggregated("pa",
86   cl::desc("skip perf and read data from a pre-aggregated file format"),
87   cl::init(false),
88   cl::ZeroOrMore,
89   cl::cat(AggregatorCategory));
90 
91 static cl::opt<bool>
92 TimeAggregator("time-aggr",
93   cl::desc("time BOLT aggregator"),
94   cl::init(false),
95   cl::ZeroOrMore,
96   cl::cat(AggregatorCategory));
97 
98 static cl::opt<bool>
99 UseEventPC("use-event-pc",
100   cl::desc("use event PC in combination with LBR sampling"),
101   cl::init(false),
102   cl::ZeroOrMore,
103   cl::cat(AggregatorCategory));
104 
105 static cl::opt<bool>
106 WriteAutoFDOData("autofdo",
107   cl::desc("generate autofdo textual data instead of bolt data"),
108   cl::init(false),
109   cl::ZeroOrMore,
110   cl::cat(AggregatorCategory));
111 
112 } // namespace opts
113 
114 namespace {
115 
116 const char TimerGroupName[] = "aggregator";
117 const char TimerGroupDesc[] = "Aggregator";
118 
119 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
120   std::vector<SectionNameAndRange> sections;
121   for (BinarySection &Section : BC->sections()) {
122     if (!Section.isText())
123       continue;
124     if (Section.getSize() == 0)
125       continue;
126     sections.push_back(
127         {Section.getName(), Section.getAddress(), Section.getEndAddress()});
128   }
129   std::sort(sections.begin(), sections.end(),
130             [](const SectionNameAndRange &A, const SectionNameAndRange &B) {
131               return A.BeginAddress < B.BeginAddress;
132             });
133   return sections;
134 }
135 }
136 
137 constexpr uint64_t DataAggregator::KernelBaseAddr;
138 
139 DataAggregator::~DataAggregator() { deleteTempFiles(); }
140 
141 namespace {
142 void deleteTempFile(const std::string &FileName) {
143   if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
144     errs() << "PERF2BOLT: failed to delete temporary file " << FileName
145            << " with error " << Errc.message() << "\n";
146 }
147 }
148 
149 void DataAggregator::deleteTempFiles() {
150   for (std::string &FileName : TempFiles)
151     deleteTempFile(FileName);
152   TempFiles.clear();
153 }
154 
155 void DataAggregator::findPerfExecutable() {
156   Optional<std::string> PerfExecutable =
157       sys::Process::FindInEnvPath("PATH", "perf");
158   if (!PerfExecutable) {
159     outs() << "PERF2BOLT: No perf executable found!\n";
160     exit(1);
161   }
162   PerfPath = *PerfExecutable;
163 }
164 
165 void DataAggregator::start() {
166   outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
167 
168   // Don't launch perf for pre-aggregated files
169   if (opts::ReadPreAggregated)
170     return;
171 
172   findPerfExecutable();
173 
174   if (opts::BasicAggregation)
175     launchPerfProcess("events without LBR",
176                       MainEventsPPI,
177                       "script -F pid,event,ip",
178                       /*Wait = */false);
179   else
180     launchPerfProcess("branch events",
181                       MainEventsPPI,
182                       "script -F pid,ip,brstack",
183                       /*Wait = */false);
184 
185   // Note: we launch script for mem events regardless of the option, as the
186   //       command fails fairly fast if mem events were not collected.
187   launchPerfProcess("mem events",
188                     MemEventsPPI,
189                     "script -F pid,event,addr,ip",
190                     /*Wait = */false);
191 
192   launchPerfProcess("process events",
193                     MMapEventsPPI,
194                     "script --show-mmap-events",
195                     /*Wait = */false);
196 
197   launchPerfProcess("task events",
198                     TaskEventsPPI,
199                     "script --show-task-events",
200                     /*Wait = */false);
201 }
202 
203 void DataAggregator::abort() {
204   if (opts::ReadPreAggregated)
205     return;
206 
207   std::string Error;
208 
209   // Kill subprocesses in case they are not finished
210   sys::Wait(TaskEventsPPI.PI, 1, false, &Error);
211   sys::Wait(MMapEventsPPI.PI, 1, false, &Error);
212   sys::Wait(MainEventsPPI.PI, 1, false, &Error);
213   sys::Wait(MemEventsPPI.PI, 1, false, &Error);
214 
215   deleteTempFiles();
216 
217   exit(1);
218 }
219 
220 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
221                                        const char *ArgsString, bool Wait) {
222   SmallVector<StringRef, 4> Argv;
223 
224   outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
225   Argv.push_back(PerfPath.data());
226 
227   char *WritableArgsString = strdup(ArgsString);
228   char *Str = WritableArgsString;
229   do {
230     Argv.push_back(Str);
231     while (*Str && *Str != ' ')
232       ++Str;
233     if (!*Str)
234       break;
235     *Str++ = 0;
236   } while (true);
237 
238   Argv.push_back("-f");
239   Argv.push_back("-i");
240   Argv.push_back(Filename.c_str());
241 
242   if (std::error_code Errc =
243           sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
244     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
245            << " with error " << Errc.message() << "\n";
246     exit(1);
247   }
248   TempFiles.push_back(PPI.StdoutPath.data());
249 
250   if (std::error_code Errc =
251           sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
252     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
253            << " with error " << Errc.message() << "\n";
254     exit(1);
255   }
256   TempFiles.push_back(PPI.StderrPath.data());
257 
258   Optional<StringRef> Redirects[] = {
259       llvm::None,                        // Stdin
260       StringRef(PPI.StdoutPath.data()),  // Stdout
261       StringRef(PPI.StderrPath.data())}; // Stderr
262 
263   LLVM_DEBUG({
264     dbgs() << "Launching perf: ";
265     for (StringRef Arg : Argv)
266       dbgs() << Arg << " ";
267     dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
268            << "\n";
269   });
270 
271   if (Wait)
272     PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
273                                             /*envp*/ llvm::None, Redirects);
274   else
275     PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None,
276                                 Redirects);
277 
278   free(WritableArgsString);
279 }
280 
281 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
282   PerfProcessInfo BuildIDProcessInfo;
283   launchPerfProcess("buildid list",
284                     BuildIDProcessInfo,
285                     "buildid-list",
286                     /*Wait = */true);
287 
288   if (BuildIDProcessInfo.PI.ReturnCode != 0) {
289     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
290         MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
291     StringRef ErrBuf = (*MB)->getBuffer();
292 
293     errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
294            << '\n';
295     errs() << ErrBuf;
296     return;
297   }
298 
299   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
300       MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
301   if (std::error_code EC = MB.getError()) {
302     errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
303            << EC.message() << "\n";
304     return;
305   }
306 
307   FileBuf = std::move(*MB);
308   ParsingBuf = FileBuf->getBuffer();
309   if (ParsingBuf.empty()) {
310     errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
311               "data was recorded without it\n";
312     return;
313   }
314 
315   Col = 0;
316   Line = 1;
317   Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
318   if (!FileName) {
319     errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
320               "This indicates the input binary supplied for data aggregation "
321               "is not the same recorded by perf when collecting profiling "
322               "data, or there were no samples recorded for the binary. "
323               "Use -ignore-build-id option to override.\n";
324     if (!opts::IgnoreBuildID)
325       abort();
326   } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
327     errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
328     BuildIDBinaryName = std::string(*FileName);
329   } else {
330     outs() << "PERF2BOLT: matched build-id and file name\n";
331   }
332 
333   return;
334 }
335 
336 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
337   if (opts::ReadPreAggregated)
338     return true;
339 
340   Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
341   if (!FD)
342     return false;
343 
344   char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
345 
346   auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
347   Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
348       *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0);
349   if (!BytesRead || *BytesRead != 7)
350     return false;
351 
352   if (strncmp(Buf, "PERFILE", 7) == 0)
353     return true;
354   return false;
355 }
356 
357 void DataAggregator::parsePreAggregated() {
358   std::string Error;
359 
360   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
361       MemoryBuffer::getFileOrSTDIN(Filename);
362   if (std::error_code EC = MB.getError()) {
363     errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
364            << EC.message() << "\n";
365     exit(1);
366   }
367 
368   FileBuf = std::move(*MB);
369   ParsingBuf = FileBuf->getBuffer();
370   Col = 0;
371   Line = 1;
372   if (parsePreAggregatedLBRSamples()) {
373     errs() << "PERF2BOLT: failed to parse samples\n";
374     exit(1);
375   }
376 }
377 
378 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
379   outs() << "PERF2BOLT: writing data for autofdo tools...\n";
380   NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
381                      TimerGroupDesc, opts::TimeAggregator);
382 
383   std::error_code EC;
384   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
385   if (EC)
386     return EC;
387 
388   // Format:
389   // number of unique traces
390   // from_1-to_1:count_1
391   // from_2-to_2:count_2
392   // ......
393   // from_n-to_n:count_n
394   // number of unique sample addresses
395   // addr_1:count_1
396   // addr_2:count_2
397   // ......
398   // addr_n:count_n
399   // number of unique LBR entries
400   // src_1->dst_1:count_1
401   // src_2->dst_2:count_2
402   // ......
403   // src_n->dst_n:count_n
404 
405   const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
406 
407   // AutoFDO addresses are relative to the first allocated loadable program
408   // segment
409   auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
410     if (Address < FirstAllocAddress)
411       return 0;
412     return Address - FirstAllocAddress;
413   };
414 
415   OutFile << FallthroughLBRs.size() << "\n";
416   for (const auto &AggrLBR : FallthroughLBRs) {
417     const Trace &Trace = AggrLBR.first;
418     const FTInfo &Info = AggrLBR.second;
419     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-"
420             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
421             << (Info.InternCount + Info.ExternCount) << "\n";
422   }
423 
424   OutFile << BasicSamples.size() << "\n";
425   for (const auto &Sample : BasicSamples) {
426     uint64_t PC = Sample.first;
427     uint64_t HitCount = Sample.second;
428     OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n";
429   }
430 
431   OutFile << BranchLBRs.size() << "\n";
432   for (const auto &AggrLBR : BranchLBRs) {
433     const Trace &Trace = AggrLBR.first;
434     const BranchInfo &Info = AggrLBR.second;
435     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->"
436             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
437             << Info.TakenCount << "\n";
438   }
439 
440   outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
441          << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
442          << " unique branches to " << OutputFilename << "\n";
443 
444   return std::error_code();
445 }
446 
447 void DataAggregator::filterBinaryMMapInfo() {
448   if (opts::FilterPID) {
449     auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
450     if (MMapInfoIter != BinaryMMapInfo.end()) {
451       MMapInfo MMap = MMapInfoIter->second;
452       BinaryMMapInfo.clear();
453       BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
454     } else {
455       if (errs().has_colors())
456         errs().changeColor(raw_ostream::RED);
457       errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
458              << opts::FilterPID << "\""
459              << " for binary \"" << BC->getFilename() << "\".";
460       assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
461       errs() << " Profile for the following process is available:\n";
462       for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
463         outs() << "  " << MMI.second.PID
464                << (MMI.second.Forked ? " (forked)\n" : "\n");
465 
466       if (errs().has_colors())
467         errs().resetColor();
468 
469       exit(1);
470     }
471   }
472 }
473 
474 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
475   this->BC = &BC;
476 
477   if (opts::ReadPreAggregated) {
478     parsePreAggregated();
479     return Error::success();
480   }
481 
482   if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) {
483     outs() << "BOLT-INFO: binary build-id is:     " << *FileBuildID << "\n";
484     processFileBuildID(*FileBuildID);
485   } else {
486     errs() << "BOLT-WARNING: build-id will not be checked because we could "
487               "not read one from input binary\n";
488   }
489 
490   auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) {
491     std::string Error;
492     outs() << "PERF2BOLT: waiting for perf " << Name
493            << " collection to finish...\n";
494     sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error);
495 
496     if (!Error.empty()) {
497       errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
498       deleteTempFiles();
499       exit(1);
500     }
501 
502     if (PI.ReturnCode != 0) {
503       ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
504           MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
505       StringRef ErrBuf = (*ErrorMB)->getBuffer();
506 
507       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
508       errs() << ErrBuf;
509       deleteTempFiles();
510       exit(1);
511     }
512 
513     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
514         MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
515     if (std::error_code EC = MB.getError()) {
516       errs() << "Cannot open " << Process.StdoutPath.data() << ": "
517              << EC.message() << "\n";
518       deleteTempFiles();
519       exit(1);
520     }
521 
522     FileBuf = std::move(*MB);
523     ParsingBuf = FileBuf->getBuffer();
524     Col = 0;
525     Line = 1;
526   };
527 
528   if (opts::LinuxKernelMode) {
529     // Current MMap parsing logic does not work with linux kernel.
530     // MMap entries for linux kernel uses PERF_RECORD_MMAP
531     // format instead of typical PERF_RECORD_MMAP2 format.
532     // Since linux kernel address mapping is absolute (same as
533     // in the ELF file), we avoid parsing MMap in linux kernel mode.
534     // While generating optimized linux kernel binary, we may need
535     // to parse MMap entries.
536 
537     // In linux kernel mode, we analyze and optimize
538     // all linux kernel binary instructions, irrespective
539     // of whether they are due to system calls or due to
540     // interrupts. Therefore, we cannot ignore interrupt
541     // in Linux kernel mode.
542     opts::IgnoreInterruptLBR = false;
543   } else {
544     prepareToParse("mmap events", MMapEventsPPI);
545     if (parseMMapEvents())
546       errs() << "PERF2BOLT: failed to parse mmap events\n";
547   }
548 
549   prepareToParse("task events", TaskEventsPPI);
550   if (parseTaskEvents())
551     errs() << "PERF2BOLT: failed to parse task events\n";
552 
553   filterBinaryMMapInfo();
554   prepareToParse("events", MainEventsPPI);
555 
556   if (opts::HeatmapMode) {
557     if (std::error_code EC = printLBRHeatMap()) {
558       errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
559       exit(1);
560     }
561     exit(0);
562   }
563 
564   if ((!opts::BasicAggregation && parseBranchEvents()) ||
565       (opts::BasicAggregation && parseBasicEvents()))
566     errs() << "PERF2BOLT: failed to parse samples\n";
567 
568   // We can finish early if the goal is just to generate data for autofdo
569   if (opts::WriteAutoFDOData) {
570     if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
571       errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
572 
573     deleteTempFiles();
574     exit(0);
575   }
576 
577   // Special handling for memory events
578   std::string Error;
579   sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error);
580   if (PI.ReturnCode != 0) {
581     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
582         MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data());
583     StringRef ErrBuf = (*MB)->getBuffer();
584 
585     deleteTempFiles();
586 
587     Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
588                  "Cannot print 'addr' field.");
589     if (!NoData.match(ErrBuf)) {
590       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
591       errs() << ErrBuf;
592       exit(1);
593     }
594     return Error::success();
595   }
596 
597   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
598       MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data());
599   if (std::error_code EC = MB.getError()) {
600     errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": "
601            << EC.message() << "\n";
602     deleteTempFiles();
603     exit(1);
604   }
605 
606   FileBuf = std::move(*MB);
607   ParsingBuf = FileBuf->getBuffer();
608   Col = 0;
609   Line = 1;
610   if (const std::error_code EC = parseMemEvents())
611     errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
612            << '\n';
613 
614   deleteTempFiles();
615 
616   return Error::success();
617 }
618 
619 Error DataAggregator::readProfile(BinaryContext &BC) {
620   processProfile(BC);
621 
622   for (auto &BFI : BC.getBinaryFunctions()) {
623     BinaryFunction &Function = BFI.second;
624     convertBranchData(Function);
625   }
626 
627   if (opts::AggregateOnly) {
628     if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
629       report_error("cannot create output data file", EC);
630   }
631 
632   return Error::success();
633 }
634 
635 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
636   return Function.hasProfileAvailable();
637 }
638 
639 void DataAggregator::processProfile(BinaryContext &BC) {
640   if (opts::ReadPreAggregated)
641     processPreAggregated();
642   else if (opts::BasicAggregation)
643     processBasicEvents();
644   else
645     processBranchEvents();
646 
647   processMemEvents();
648 
649   // Mark all functions with registered events as having a valid profile.
650   for (auto &BFI : BC.getBinaryFunctions()) {
651     BinaryFunction &BF = BFI.second;
652     if (getBranchData(BF)) {
653       const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
654                                                 : BinaryFunction::PF_LBR;
655       BF.markProfiled(Flags);
656     }
657   }
658 
659   // Release intermediate storage.
660   clear(BranchLBRs);
661   clear(FallthroughLBRs);
662   clear(AggregatedLBRs);
663   clear(BasicSamples);
664   clear(MemSamples);
665 }
666 
667 BinaryFunction *
668 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
669   if (!BC->containsAddress(Address))
670     return nullptr;
671 
672   return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
673                                                 /*UseMaxSize=*/true);
674 }
675 
676 StringRef DataAggregator::getLocationName(BinaryFunction &Func,
677                                           uint64_t Count) {
678   if (!BAT)
679     return Func.getOneName();
680 
681   const BinaryFunction *OrigFunc = &Func;
682   if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
683     NumColdSamples += Count;
684     BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
685     if (HotFunc)
686       OrigFunc = HotFunc;
687   }
688   // If it is a local function, prefer the name containing the file name where
689   // the local function was declared
690   for (StringRef AlternativeName : OrigFunc->getNames()) {
691     size_t FileNameIdx = AlternativeName.find('/');
692     // Confirm the alternative name has the pattern Symbol/FileName/1 before
693     // using it
694     if (FileNameIdx == StringRef::npos ||
695         AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
696       continue;
697     return AlternativeName;
698   }
699   return OrigFunc->getOneName();
700 }
701 
702 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
703                               uint64_t Count) {
704   auto I = NamesToSamples.find(Func.getOneName());
705   if (I == NamesToSamples.end()) {
706     bool Success;
707     StringRef LocName = getLocationName(Func, Count);
708     std::tie(I, Success) = NamesToSamples.insert(
709         std::make_pair(Func.getOneName(),
710                        FuncSampleData(LocName, FuncSampleData::ContainerTy())));
711   }
712 
713   Address -= Func.getAddress();
714   if (BAT)
715     Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false);
716 
717   I->second.bumpCount(Address, Count);
718   return true;
719 }
720 
721 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
722                                    uint64_t To, uint64_t Count,
723                                    uint64_t Mispreds) {
724   FuncBranchData *AggrData = getBranchData(Func);
725   if (!AggrData) {
726     AggrData = &NamesToBranches[Func.getOneName()];
727     AggrData->Name = getLocationName(Func, Count);
728     setBranchData(Func, AggrData);
729   }
730 
731   From -= Func.getAddress();
732   To -= Func.getAddress();
733   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName()
734                     << " @ " << Twine::utohexstr(From) << " -> "
735                     << Func.getPrintName() << " @ " << Twine::utohexstr(To)
736                     << '\n');
737   if (BAT) {
738     From = BAT->translate(Func, From, /*IsBranchSrc=*/true);
739     To = BAT->translate(Func, To, /*IsBranchSrc=*/false);
740     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
741                       << Func.getPrintName() << " @ " << Twine::utohexstr(From)
742                       << " -> " << Func.getPrintName() << " @ "
743                       << Twine::utohexstr(To) << '\n');
744   }
745 
746   AggrData->bumpBranchCount(From, To, Count, Mispreds);
747   return true;
748 }
749 
750 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
751                                    BinaryFunction *ToFunc, uint64_t From,
752                                    uint64_t To, uint64_t Count,
753                                    uint64_t Mispreds) {
754   FuncBranchData *FromAggrData = nullptr;
755   FuncBranchData *ToAggrData = nullptr;
756   StringRef SrcFunc;
757   StringRef DstFunc;
758   if (FromFunc) {
759     SrcFunc = getLocationName(*FromFunc, Count);
760     FromAggrData = getBranchData(*FromFunc);
761     if (!FromAggrData) {
762       FromAggrData = &NamesToBranches[FromFunc->getOneName()];
763       FromAggrData->Name = SrcFunc;
764       setBranchData(*FromFunc, FromAggrData);
765     }
766     From -= FromFunc->getAddress();
767     if (BAT)
768       From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true);
769 
770     recordExit(*FromFunc, From, Mispreds, Count);
771   }
772   if (ToFunc) {
773     DstFunc = getLocationName(*ToFunc, 0);
774     ToAggrData = getBranchData(*ToFunc);
775     if (!ToAggrData) {
776       ToAggrData = &NamesToBranches[ToFunc->getOneName()];
777       ToAggrData->Name = DstFunc;
778       setBranchData(*ToFunc, ToAggrData);
779     }
780     To -= ToFunc->getAddress();
781     if (BAT)
782       To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false);
783 
784     recordEntry(*ToFunc, To, Mispreds, Count);
785   }
786 
787   if (FromAggrData)
788     FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
789                                 Count, Mispreds);
790   if (ToAggrData)
791     ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
792                                Count, Mispreds);
793   return true;
794 }
795 
796 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
797                               uint64_t Mispreds) {
798   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
799   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
800   if (!FromFunc && !ToFunc)
801     return false;
802 
803   if (FromFunc == ToFunc) {
804     recordBranch(*FromFunc, From - FromFunc->getAddress(),
805                  To - FromFunc->getAddress(), Count, Mispreds);
806     return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
807   }
808 
809   return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
810 }
811 
812 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
813                              uint64_t Count) {
814   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
815   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
816   if (!FromFunc || !ToFunc) {
817     LLVM_DEBUG(
818         dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
819                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
820                << " and ending in " << ToFunc->getPrintName() << " @ "
821                << ToFunc->getPrintName() << " @ "
822                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
823     NumLongRangeTraces += Count;
824     return false;
825   }
826   if (FromFunc != ToFunc) {
827     NumInvalidTraces += Count;
828     LLVM_DEBUG(
829         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
830                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
831                << " and ending in " << ToFunc->getPrintName() << " @ "
832                << ToFunc->getPrintName() << " @ "
833                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
834     return false;
835   }
836 
837   Optional<BoltAddressTranslation::FallthroughListTy> FTs =
838       BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From)
839           : getFallthroughsInTrace(*FromFunc, First, Second, Count);
840   if (!FTs) {
841     LLVM_DEBUG(
842         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
843                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
844                << " and ending in " << ToFunc->getPrintName() << " @ "
845                << ToFunc->getPrintName() << " @ "
846                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
847     NumInvalidTraces += Count;
848     return false;
849   }
850 
851   LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
852                     << FromFunc->getPrintName() << ":"
853                     << Twine::utohexstr(First.To) << " to "
854                     << Twine::utohexstr(Second.From) << ".\n");
855   for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
856     doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
857                   Pair.second + FromFunc->getAddress(), Count, false);
858 
859   return true;
860 }
861 
862 bool DataAggregator::recordTrace(
863     BinaryFunction &BF,
864     const LBREntry &FirstLBR,
865     const LBREntry &SecondLBR,
866     uint64_t Count,
867     SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const {
868   BinaryContext &BC = BF.getBinaryContext();
869 
870   if (!BF.isSimple())
871     return false;
872 
873   assert(BF.hasCFG() && "can only record traces in CFG state");
874 
875   // Offsets of the trace within this function.
876   const uint64_t From = FirstLBR.To - BF.getAddress();
877   const uint64_t To = SecondLBR.From - BF.getAddress();
878 
879   if (From > To)
880     return false;
881 
882   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
883   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
884 
885   if (!FromBB || !ToBB)
886     return false;
887 
888   // Adjust FromBB if the first LBR is a return from the last instruction in
889   // the previous block (that instruction should be a call).
890   if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
891       !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
892     BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1];
893     if (PrevBB->getSuccessor(FromBB->getLabel())) {
894       const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
895       if (Instr && BC.MIB->isCall(*Instr))
896         FromBB = PrevBB;
897       else
898         LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
899                           << '\n');
900     } else {
901       LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
902     }
903   }
904 
905   // Fill out information for fall-through edges. The From and To could be
906   // within the same basic block, e.g. when two call instructions are in the
907   // same block. In this case we skip the processing.
908   if (FromBB == ToBB)
909     return true;
910 
911   // Process blocks in the original layout order.
912   BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()];
913   assert(BB == FromBB && "index mismatch");
914   while (BB != ToBB) {
915     BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1];
916     assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
917 
918     // Check for bad LBRs.
919     if (!BB->getSuccessor(NextBB->getLabel())) {
920       LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
921                         << "  " << FirstLBR << '\n'
922                         << "  " << SecondLBR << '\n');
923       return false;
924     }
925 
926     // Record fall-through jumps
927     BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB);
928     BI.Count += Count;
929 
930     if (Branches) {
931       const MCInst *Instr = BB->getLastNonPseudoInstr();
932       uint64_t Offset = 0;
933       if (Instr)
934         Offset = BC.MIB->getOffsetWithDefault(*Instr, 0);
935       else
936         Offset = BB->getOffset();
937 
938       Branches->emplace_back(Offset, NextBB->getOffset());
939     }
940 
941     BB = NextBB;
942   }
943 
944   return true;
945 }
946 
947 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
948 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
949                                        const LBREntry &FirstLBR,
950                                        const LBREntry &SecondLBR,
951                                        uint64_t Count) const {
952   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
953 
954   if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res))
955     return NoneType();
956 
957   return Res;
958 }
959 
960 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
961                                  uint64_t Count) const {
962   if (To > BF.getSize())
963     return false;
964 
965   if (!BF.hasProfile())
966     BF.ExecutionCount = 0;
967 
968   BinaryBasicBlock *EntryBB = nullptr;
969   if (To == 0) {
970     BF.ExecutionCount += Count;
971     if (!BF.empty())
972       EntryBB = &BF.front();
973   } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
974     if (BB->isEntryPoint())
975       EntryBB = BB;
976   }
977 
978   if (EntryBB)
979     EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
980 
981   return true;
982 }
983 
984 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
985                                 uint64_t Count) const {
986   if (!BF.isSimple() || From > BF.getSize())
987     return false;
988 
989   if (!BF.hasProfile())
990     BF.ExecutionCount = 0;
991 
992   return true;
993 }
994 
995 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
996   LBREntry Res;
997   ErrorOr<StringRef> FromStrRes = parseString('/');
998   if (std::error_code EC = FromStrRes.getError())
999     return EC;
1000   StringRef OffsetStr = FromStrRes.get();
1001   if (OffsetStr.getAsInteger(0, Res.From)) {
1002     reportError("expected hexadecimal number with From address");
1003     Diag << "Found: " << OffsetStr << "\n";
1004     return make_error_code(llvm::errc::io_error);
1005   }
1006 
1007   ErrorOr<StringRef> ToStrRes = parseString('/');
1008   if (std::error_code EC = ToStrRes.getError())
1009     return EC;
1010   OffsetStr = ToStrRes.get();
1011   if (OffsetStr.getAsInteger(0, Res.To)) {
1012     reportError("expected hexadecimal number with To address");
1013     Diag << "Found: " << OffsetStr << "\n";
1014     return make_error_code(llvm::errc::io_error);
1015   }
1016 
1017   ErrorOr<StringRef> MispredStrRes = parseString('/');
1018   if (std::error_code EC = MispredStrRes.getError())
1019     return EC;
1020   StringRef MispredStr = MispredStrRes.get();
1021   if (MispredStr.size() != 1 ||
1022       (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1023     reportError("expected single char for mispred bit");
1024     Diag << "Found: " << MispredStr << "\n";
1025     return make_error_code(llvm::errc::io_error);
1026   }
1027   Res.Mispred = MispredStr[0] == 'M';
1028 
1029   static bool MispredWarning = true;
1030   if (MispredStr[0] == '-' && MispredWarning) {
1031     errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1032     MispredWarning = false;
1033   }
1034 
1035   ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1036   if (std::error_code EC = Rest.getError())
1037     return EC;
1038   if (Rest.get().size() < 5) {
1039     reportError("expected rest of LBR entry");
1040     Diag << "Found: " << Rest.get() << "\n";
1041     return make_error_code(llvm::errc::io_error);
1042   }
1043   return Res;
1044 }
1045 
1046 bool DataAggregator::checkAndConsumeFS() {
1047   if (ParsingBuf[0] != FieldSeparator)
1048     return false;
1049 
1050   ParsingBuf = ParsingBuf.drop_front(1);
1051   Col += 1;
1052   return true;
1053 }
1054 
1055 void DataAggregator::consumeRestOfLine() {
1056   size_t LineEnd = ParsingBuf.find_first_of('\n');
1057   if (LineEnd == StringRef::npos) {
1058     ParsingBuf = StringRef();
1059     Col = 0;
1060     Line += 1;
1061     return;
1062   }
1063   ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1064   Col = 0;
1065   Line += 1;
1066 }
1067 
1068 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1069   PerfBranchSample Res;
1070 
1071   while (checkAndConsumeFS()) {
1072   }
1073 
1074   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1075   if (std::error_code EC = PIDRes.getError())
1076     return EC;
1077   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1078   if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
1079     consumeRestOfLine();
1080     return make_error_code(errc::no_such_process);
1081   }
1082 
1083   while (checkAndConsumeFS()) {
1084   }
1085 
1086   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1087   if (std::error_code EC = PCRes.getError())
1088     return EC;
1089   Res.PC = PCRes.get();
1090 
1091   if (checkAndConsumeNewLine())
1092     return Res;
1093 
1094   while (!checkAndConsumeNewLine()) {
1095     checkAndConsumeFS();
1096 
1097     ErrorOr<LBREntry> LBRRes = parseLBREntry();
1098     if (std::error_code EC = LBRRes.getError())
1099       return EC;
1100     LBREntry LBR = LBRRes.get();
1101     if (ignoreKernelInterrupt(LBR))
1102       continue;
1103     if (!BC->HasFixedLoadAddress)
1104       adjustLBR(LBR, MMapInfoIter->second);
1105     Res.LBR.push_back(LBR);
1106   }
1107 
1108   return Res;
1109 }
1110 
1111 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1112   while (checkAndConsumeFS()) {
1113   }
1114 
1115   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1116   if (std::error_code EC = PIDRes.getError())
1117     return EC;
1118 
1119   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1120   if (MMapInfoIter == BinaryMMapInfo.end()) {
1121     consumeRestOfLine();
1122     return PerfBasicSample{StringRef(), 0};
1123   }
1124 
1125   while (checkAndConsumeFS()) {
1126   }
1127 
1128   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1129   if (std::error_code EC = Event.getError())
1130     return EC;
1131 
1132   while (checkAndConsumeFS()) {
1133   }
1134 
1135   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1136   if (std::error_code EC = AddrRes.getError())
1137     return EC;
1138 
1139   if (!checkAndConsumeNewLine()) {
1140     reportError("expected end of line");
1141     return make_error_code(llvm::errc::io_error);
1142   }
1143 
1144   uint64_t Address = *AddrRes;
1145   if (!BC->HasFixedLoadAddress)
1146     adjustAddress(Address, MMapInfoIter->second);
1147 
1148   return PerfBasicSample{Event.get(), Address};
1149 }
1150 
1151 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1152   PerfMemSample Res{0, 0};
1153 
1154   while (checkAndConsumeFS()) {
1155   }
1156 
1157   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1158   if (std::error_code EC = PIDRes.getError())
1159     return EC;
1160 
1161   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1162   if (MMapInfoIter == BinaryMMapInfo.end()) {
1163     consumeRestOfLine();
1164     return Res;
1165   }
1166 
1167   while (checkAndConsumeFS()) {
1168   }
1169 
1170   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1171   if (std::error_code EC = Event.getError())
1172     return EC;
1173   if (Event.get().find("mem-loads") == StringRef::npos) {
1174     consumeRestOfLine();
1175     return Res;
1176   }
1177 
1178   while (checkAndConsumeFS()) {
1179   }
1180 
1181   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1182   if (std::error_code EC = AddrRes.getError())
1183     return EC;
1184 
1185   while (checkAndConsumeFS()) {
1186   }
1187 
1188   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1189   if (std::error_code EC = PCRes.getError()) {
1190     consumeRestOfLine();
1191     return EC;
1192   }
1193 
1194   if (!checkAndConsumeNewLine()) {
1195     reportError("expected end of line");
1196     return make_error_code(llvm::errc::io_error);
1197   }
1198 
1199   uint64_t Address = *AddrRes;
1200   if (!BC->HasFixedLoadAddress)
1201     adjustAddress(Address, MMapInfoIter->second);
1202 
1203   return PerfMemSample{PCRes.get(), Address};
1204 }
1205 
1206 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1207   auto parseOffset = [this]() -> ErrorOr<Location> {
1208     ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1209     if (std::error_code EC = Res.getError())
1210       return EC;
1211     return Location(Res.get());
1212   };
1213 
1214   size_t Sep = ParsingBuf.find_first_of(" \n");
1215   if (Sep == StringRef::npos)
1216     return parseOffset();
1217   StringRef LookAhead = ParsingBuf.substr(0, Sep);
1218   if (LookAhead.find_first_of(":") == StringRef::npos)
1219     return parseOffset();
1220 
1221   ErrorOr<StringRef> BuildID = parseString(':');
1222   if (std::error_code EC = BuildID.getError())
1223     return EC;
1224   ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1225   if (std::error_code EC = Offset.getError())
1226     return EC;
1227   return Location(true, BuildID.get(), Offset.get());
1228 }
1229 
1230 ErrorOr<DataAggregator::AggregatedLBREntry>
1231 DataAggregator::parseAggregatedLBREntry() {
1232   while (checkAndConsumeFS()) {
1233   }
1234 
1235   ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1236   if (std::error_code EC = TypeOrErr.getError())
1237     return EC;
1238   auto Type = AggregatedLBREntry::BRANCH;
1239   if (TypeOrErr.get() == "B") {
1240     Type = AggregatedLBREntry::BRANCH;
1241   } else if (TypeOrErr.get() == "F") {
1242     Type = AggregatedLBREntry::FT;
1243   } else if (TypeOrErr.get() == "f") {
1244     Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1245   } else {
1246     reportError("expected B, F or f");
1247     return make_error_code(llvm::errc::io_error);
1248   }
1249 
1250   while (checkAndConsumeFS()) {
1251   }
1252   ErrorOr<Location> From = parseLocationOrOffset();
1253   if (std::error_code EC = From.getError())
1254     return EC;
1255 
1256   while (checkAndConsumeFS()) {
1257   }
1258   ErrorOr<Location> To = parseLocationOrOffset();
1259   if (std::error_code EC = To.getError())
1260     return EC;
1261 
1262   while (checkAndConsumeFS()) {
1263   }
1264   ErrorOr<int64_t> Frequency =
1265       parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1266   if (std::error_code EC = Frequency.getError())
1267     return EC;
1268 
1269   uint64_t Mispreds = 0;
1270   if (Type == AggregatedLBREntry::BRANCH) {
1271     while (checkAndConsumeFS()) {
1272     }
1273     ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1274     if (std::error_code EC = MispredsOrErr.getError())
1275       return EC;
1276     Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1277   }
1278 
1279   if (!checkAndConsumeNewLine()) {
1280     reportError("expected end of line");
1281     return make_error_code(llvm::errc::io_error);
1282   }
1283 
1284   return AggregatedLBREntry{From.get(), To.get(),
1285                             static_cast<uint64_t>(Frequency.get()), Mispreds,
1286                             Type};
1287 }
1288 
1289 bool DataAggregator::hasData() {
1290   if (ParsingBuf.size() == 0)
1291     return false;
1292 
1293   return true;
1294 }
1295 
1296 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1297   return opts::IgnoreInterruptLBR &&
1298          (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1299 }
1300 
1301 std::error_code DataAggregator::printLBRHeatMap() {
1302   outs() << "PERF2BOLT: parse branch events...\n";
1303   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1304                      TimerGroupDesc, opts::TimeAggregator);
1305 
1306   if (opts::LinuxKernelMode) {
1307     opts::HeatmapMaxAddress = 0xffffffffffffffff;
1308     opts::HeatmapMinAddress = KernelBaseAddr;
1309   }
1310   Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1311              opts::HeatmapMaxAddress, getTextSections(BC));
1312   uint64_t NumTotalSamples = 0;
1313 
1314   if (opts::BasicAggregation) {
1315     while (hasData()) {
1316       ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1317       if (std::error_code EC = SampleRes.getError()) {
1318         if (EC == errc::no_such_process)
1319           continue;
1320         return EC;
1321       }
1322       PerfBasicSample &Sample = SampleRes.get();
1323       HM.registerAddress(Sample.PC);
1324       NumTotalSamples++;
1325     }
1326     outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
1327   } else {
1328     while (hasData()) {
1329       ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1330       if (std::error_code EC = SampleRes.getError()) {
1331         if (EC == errc::no_such_process)
1332           continue;
1333         return EC;
1334       }
1335 
1336       PerfBranchSample &Sample = SampleRes.get();
1337 
1338       // LBRs are stored in reverse execution order. NextLBR refers to the next
1339       // executed branch record.
1340       const LBREntry *NextLBR = nullptr;
1341       for (const LBREntry &LBR : Sample.LBR) {
1342         if (NextLBR) {
1343           // Record fall-through trace.
1344           const uint64_t TraceFrom = LBR.To;
1345           const uint64_t TraceTo = NextLBR->From;
1346           ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1347         }
1348         NextLBR = &LBR;
1349       }
1350       if (!Sample.LBR.empty()) {
1351         HM.registerAddress(Sample.LBR.front().To);
1352         HM.registerAddress(Sample.LBR.back().From);
1353       }
1354       NumTotalSamples += Sample.LBR.size();
1355     }
1356     outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1357     outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1358   }
1359 
1360   if (!NumTotalSamples) {
1361     if (opts::BasicAggregation) {
1362       errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1363                 "Cannot build heatmap.";
1364     } else {
1365       errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1366                 "Cannot build heatmap. Use -nl for building heatmap from "
1367                 "basic events.\n";
1368     }
1369     exit(1);
1370   }
1371 
1372   outs() << "HEATMAP: building heat map...\n";
1373 
1374   for (const auto &LBR : FallthroughLBRs) {
1375     const Trace &Trace = LBR.first;
1376     const FTInfo &Info = LBR.second;
1377     HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1378   }
1379 
1380   if (HM.getNumInvalidRanges())
1381     outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1382 
1383   if (!HM.size()) {
1384     errs() << "HEATMAP-ERROR: no valid traces registered\n";
1385     exit(1);
1386   }
1387 
1388   HM.print(opts::OutputFilename);
1389   if (opts::OutputFilename == "-")
1390     HM.printCDF(opts::OutputFilename);
1391   else
1392     HM.printCDF(opts::OutputFilename + ".csv");
1393   if (opts::OutputFilename == "-")
1394     HM.printSectionHotness(opts::OutputFilename);
1395   else
1396     HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv");
1397 
1398   return std::error_code();
1399 }
1400 
1401 std::error_code DataAggregator::parseBranchEvents() {
1402   outs() << "PERF2BOLT: parse branch events...\n";
1403   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1404                      TimerGroupDesc, opts::TimeAggregator);
1405 
1406   uint64_t NumTotalSamples = 0;
1407   uint64_t NumEntries = 0;
1408   uint64_t NumSamples = 0;
1409   uint64_t NumSamplesNoLBR = 0;
1410   uint64_t NumTraces = 0;
1411   bool NeedsSkylakeFix = false;
1412 
1413   while (hasData() && NumTotalSamples < opts::MaxSamples) {
1414     ++NumTotalSamples;
1415 
1416     ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1417     if (std::error_code EC = SampleRes.getError()) {
1418       if (EC == errc::no_such_process)
1419         continue;
1420       return EC;
1421     }
1422     ++NumSamples;
1423 
1424     PerfBranchSample &Sample = SampleRes.get();
1425     if (opts::WriteAutoFDOData)
1426       ++BasicSamples[Sample.PC];
1427 
1428     if (Sample.LBR.empty()) {
1429       ++NumSamplesNoLBR;
1430       continue;
1431     }
1432 
1433     NumEntries += Sample.LBR.size();
1434     if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1435       errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1436       NeedsSkylakeFix = true;
1437     }
1438 
1439     // LBRs are stored in reverse execution order. NextPC refers to the next
1440     // recorded executed PC.
1441     uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1442     uint32_t NumEntry = 0;
1443     for (const LBREntry &LBR : Sample.LBR) {
1444       ++NumEntry;
1445       // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1446       // sometimes record entry 32 as an exact copy of entry 31. This will cause
1447       // us to likely record an invalid trace and generate a stale function for
1448       // BAT mode (non BAT disassembles the function and is able to ignore this
1449       // trace at aggregation time). Drop first 2 entries (last two, in
1450       // chronological order)
1451       if (NeedsSkylakeFix && NumEntry <= 2)
1452         continue;
1453       if (NextPC) {
1454         // Record fall-through trace.
1455         const uint64_t TraceFrom = LBR.To;
1456         const uint64_t TraceTo = NextPC;
1457         const BinaryFunction *TraceBF =
1458             getBinaryFunctionContainingAddress(TraceFrom);
1459         if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1460           FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1461           if (TraceBF->containsAddress(LBR.From))
1462             ++Info.InternCount;
1463           else
1464             ++Info.ExternCount;
1465         } else {
1466           if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) {
1467             LLVM_DEBUG(dbgs()
1468                        << "Invalid trace starting in "
1469                        << TraceBF->getPrintName() << " @ "
1470                        << Twine::utohexstr(TraceFrom - TraceBF->getAddress())
1471                        << " and ending @ " << Twine::utohexstr(TraceTo)
1472                        << '\n');
1473             ++NumInvalidTraces;
1474           } else {
1475             LLVM_DEBUG(dbgs()
1476                        << "Out of range trace starting in "
1477                        << (TraceBF ? TraceBF->getPrintName() : "None") << " @ "
1478                        << Twine::utohexstr(
1479                               TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1480                        << " and ending in "
1481                        << (getBinaryFunctionContainingAddress(TraceTo)
1482                                ? getBinaryFunctionContainingAddress(TraceTo)
1483                                      ->getPrintName()
1484                                : "None")
1485                        << " @ "
1486                        << Twine::utohexstr(
1487                               TraceTo -
1488                               (getBinaryFunctionContainingAddress(TraceTo)
1489                                    ? getBinaryFunctionContainingAddress(TraceTo)
1490                                          ->getAddress()
1491                                    : 0))
1492                        << '\n');
1493             ++NumLongRangeTraces;
1494           }
1495         }
1496         ++NumTraces;
1497       }
1498       NextPC = LBR.From;
1499 
1500       uint64_t From = LBR.From;
1501       if (!getBinaryFunctionContainingAddress(From))
1502         From = 0;
1503       uint64_t To = LBR.To;
1504       if (!getBinaryFunctionContainingAddress(To))
1505         To = 0;
1506       if (!From && !To)
1507         continue;
1508       BranchInfo &Info = BranchLBRs[Trace(From, To)];
1509       ++Info.TakenCount;
1510       Info.MispredCount += LBR.Mispred;
1511     }
1512   }
1513 
1514   for (const auto &LBR : BranchLBRs) {
1515     const Trace &Trace = LBR.first;
1516     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From))
1517       BF->setHasProfileAvailable();
1518     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To))
1519       BF->setHasProfileAvailable();
1520   }
1521 
1522   auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1523     OS << " (";
1524     if (OS.has_colors()) {
1525       if (Percent > T2)
1526         OS.changeColor(raw_ostream::RED);
1527       else if (Percent > T1)
1528         OS.changeColor(raw_ostream::YELLOW);
1529       else
1530         OS.changeColor(raw_ostream::GREEN);
1531     }
1532     OS << format("%.1f%%", Percent);
1533     if (OS.has_colors())
1534       OS.resetColor();
1535     OS << ")";
1536   };
1537 
1538   outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1539          << " LBR entries\n";
1540   if (NumTotalSamples) {
1541     if (NumSamples && NumSamplesNoLBR == NumSamples) {
1542       // Note: we don't know if perf2bolt is being used to parse memory samples
1543       // at this point. In this case, it is OK to parse zero LBRs.
1544       errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1545                 "LBR. Record profile with perf record -j any or run perf2bolt "
1546                 "in no-LBR mode with -nl (the performance improvement in -nl "
1547                 "mode may be limited)\n";
1548     } else {
1549       const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1550       const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1551       outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1552       printColored(outs(), PercentIgnored, 20, 50);
1553       outs() << " were ignored\n";
1554       if (PercentIgnored > 50.0f)
1555         errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1556                   "were attributed to the input binary\n";
1557     }
1558   }
1559   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1560          << NumInvalidTraces;
1561   float Perc = 0.0f;
1562   if (NumTraces > 0) {
1563     Perc = NumInvalidTraces * 100.0f / NumTraces;
1564     printColored(outs(), Perc, 5, 10);
1565   }
1566   outs() << "\n";
1567   if (Perc > 10.0f)
1568     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1569               "binary is probably not the same binary used during profiling "
1570               "collection. The generated data may be ineffective for improving "
1571               "performance.\n\n";
1572 
1573   outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1574          << NumLongRangeTraces;
1575   if (NumTraces > 0)
1576     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1577   outs() << "\n";
1578 
1579   if (NumColdSamples > 0) {
1580     const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1581     outs() << "PERF2BOLT: " << NumColdSamples
1582            << format(" (%.1f%%)", ColdSamples)
1583            << " samples recorded in cold regions of split functions.\n";
1584     if (ColdSamples > 5.0f)
1585       outs()
1586           << "WARNING: The BOLT-processed binary where samples were collected "
1587              "likely used bad data or your service observed a large shift in "
1588              "profile. You may want to audit this.\n";
1589   }
1590 
1591   return std::error_code();
1592 }
1593 
1594 void DataAggregator::processBranchEvents() {
1595   outs() << "PERF2BOLT: processing branch events...\n";
1596   NamedRegionTimer T("processBranch", "Processing branch events",
1597                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1598 
1599   for (const auto &AggrLBR : FallthroughLBRs) {
1600     const Trace &Loc = AggrLBR.first;
1601     const FTInfo &Info = AggrLBR.second;
1602     LBREntry First{Loc.From, Loc.From, false};
1603     LBREntry Second{Loc.To, Loc.To, false};
1604     if (Info.InternCount)
1605       doTrace(First, Second, Info.InternCount);
1606     if (Info.ExternCount) {
1607       First.From = 0;
1608       doTrace(First, Second, Info.ExternCount);
1609     }
1610   }
1611 
1612   for (const auto &AggrLBR : BranchLBRs) {
1613     const Trace &Loc = AggrLBR.first;
1614     const BranchInfo &Info = AggrLBR.second;
1615     doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
1616   }
1617 }
1618 
1619 std::error_code DataAggregator::parseBasicEvents() {
1620   outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1621   NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1622                      TimerGroupDesc, opts::TimeAggregator);
1623   while (hasData()) {
1624     ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1625     if (std::error_code EC = Sample.getError())
1626       return EC;
1627 
1628     if (!Sample->PC)
1629       continue;
1630 
1631     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1632       BF->setHasProfileAvailable();
1633 
1634     ++BasicSamples[Sample->PC];
1635     EventNames.insert(Sample->EventName);
1636   }
1637 
1638   return std::error_code();
1639 }
1640 
1641 void DataAggregator::processBasicEvents() {
1642   outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1643   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1644                      TimerGroupDesc, opts::TimeAggregator);
1645   uint64_t OutOfRangeSamples = 0;
1646   uint64_t NumSamples = 0;
1647   for (auto &Sample : BasicSamples) {
1648     const uint64_t PC = Sample.first;
1649     const uint64_t HitCount = Sample.second;
1650     NumSamples += HitCount;
1651     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1652     if (!Func) {
1653       OutOfRangeSamples += HitCount;
1654       continue;
1655     }
1656 
1657     doSample(*Func, PC, HitCount);
1658   }
1659   outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1660 
1661   outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1662          << OutOfRangeSamples;
1663   float Perc = 0.0f;
1664   if (NumSamples > 0) {
1665     outs() << " (";
1666     Perc = OutOfRangeSamples * 100.0f / NumSamples;
1667     if (outs().has_colors()) {
1668       if (Perc > 60.0f)
1669         outs().changeColor(raw_ostream::RED);
1670       else if (Perc > 40.0f)
1671         outs().changeColor(raw_ostream::YELLOW);
1672       else
1673         outs().changeColor(raw_ostream::GREEN);
1674     }
1675     outs() << format("%.1f%%", Perc);
1676     if (outs().has_colors())
1677       outs().resetColor();
1678     outs() << ")";
1679   }
1680   outs() << "\n";
1681   if (Perc > 80.0f)
1682     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1683               "binary is probably not the same binary used during profiling "
1684               "collection. The generated data may be ineffective for improving "
1685               "performance.\n\n";
1686 }
1687 
1688 std::error_code DataAggregator::parseMemEvents() {
1689   outs() << "PERF2BOLT: parsing memory events...\n";
1690   NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1691                      TimerGroupDesc, opts::TimeAggregator);
1692   while (hasData()) {
1693     ErrorOr<PerfMemSample> Sample = parseMemSample();
1694     if (std::error_code EC = Sample.getError())
1695       return EC;
1696 
1697     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1698       BF->setHasProfileAvailable();
1699 
1700     MemSamples.emplace_back(std::move(Sample.get()));
1701   }
1702 
1703   return std::error_code();
1704 }
1705 
1706 void DataAggregator::processMemEvents() {
1707   NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1708                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1709   for (const PerfMemSample &Sample : MemSamples) {
1710     uint64_t PC = Sample.PC;
1711     uint64_t Addr = Sample.Addr;
1712     StringRef FuncName;
1713     StringRef MemName;
1714 
1715     // Try to resolve symbol for PC
1716     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1717     if (!Func) {
1718       LLVM_DEBUG(if (PC != 0) {
1719         dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x"
1720                << Twine::utohexstr(Addr) << "\n";
1721       });
1722       continue;
1723     }
1724 
1725     FuncName = Func->getOneName();
1726     PC -= Func->getAddress();
1727 
1728     // Try to resolve symbol for memory load
1729     if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1730       MemName = BD->getName();
1731       Addr -= BD->getAddress();
1732     } else if (opts::FilterMemProfile) {
1733       // Filter out heap/stack accesses
1734       continue;
1735     }
1736 
1737     const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1738     const Location AddrLoc(!MemName.empty(), MemName, Addr);
1739 
1740     FuncMemData *MemData = &NamesToMemEvents[FuncName];
1741     setMemData(*Func, MemData);
1742     MemData->update(FuncLoc, AddrLoc);
1743     LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1744   }
1745 }
1746 
1747 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1748   outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1749   NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1750                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1751   while (hasData()) {
1752     ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1753     if (std::error_code EC = AggrEntry.getError())
1754       return EC;
1755 
1756     if (BinaryFunction *BF =
1757             getBinaryFunctionContainingAddress(AggrEntry->From.Offset))
1758       BF->setHasProfileAvailable();
1759     if (BinaryFunction *BF =
1760             getBinaryFunctionContainingAddress(AggrEntry->To.Offset))
1761       BF->setHasProfileAvailable();
1762 
1763     AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1764   }
1765 
1766   return std::error_code();
1767 }
1768 
1769 void DataAggregator::processPreAggregated() {
1770   outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1771   NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1772                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1773 
1774   uint64_t NumTraces = 0;
1775   for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1776     switch (AggrEntry.EntryType) {
1777     case AggregatedLBREntry::BRANCH:
1778       doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1779                AggrEntry.Mispreds);
1780       break;
1781     case AggregatedLBREntry::FT:
1782     case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1783       LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1784                          ? AggrEntry.From.Offset
1785                          : 0,
1786                      AggrEntry.From.Offset, false};
1787       LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1788       doTrace(First, Second, AggrEntry.Count);
1789       NumTraces += AggrEntry.Count;
1790       break;
1791     }
1792     }
1793   }
1794 
1795   outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1796          << " aggregated LBR entries\n";
1797   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1798          << NumInvalidTraces;
1799   float Perc = 0.0f;
1800   if (NumTraces > 0) {
1801     outs() << " (";
1802     Perc = NumInvalidTraces * 100.0f / NumTraces;
1803     if (outs().has_colors()) {
1804       if (Perc > 10.0f)
1805         outs().changeColor(raw_ostream::RED);
1806       else if (Perc > 5.0f)
1807         outs().changeColor(raw_ostream::YELLOW);
1808       else
1809         outs().changeColor(raw_ostream::GREEN);
1810     }
1811     outs() << format("%.1f%%", Perc);
1812     if (outs().has_colors())
1813       outs().resetColor();
1814     outs() << ")";
1815   }
1816   outs() << "\n";
1817   if (Perc > 10.0f)
1818     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1819               "binary is probably not the same binary used during profiling "
1820               "collection. The generated data may be ineffective for improving "
1821               "performance.\n\n";
1822 
1823   outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1824          << NumLongRangeTraces;
1825   if (NumTraces > 0)
1826     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1827   outs() << "\n";
1828 }
1829 
1830 Optional<int32_t> DataAggregator::parseCommExecEvent() {
1831   size_t LineEnd = ParsingBuf.find_first_of("\n");
1832   if (LineEnd == StringRef::npos) {
1833     reportError("expected rest of line");
1834     Diag << "Found: " << ParsingBuf << "\n";
1835     return NoneType();
1836   }
1837   StringRef Line = ParsingBuf.substr(0, LineEnd);
1838 
1839   size_t Pos = Line.find("PERF_RECORD_COMM exec");
1840   if (Pos == StringRef::npos)
1841     return NoneType();
1842   Line = Line.drop_front(Pos);
1843 
1844   // Line:
1845   //  PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1846   StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1847   int32_t PID;
1848   if (PIDStr.getAsInteger(10, PID)) {
1849     reportError("expected PID");
1850     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1851     return NoneType();
1852   }
1853 
1854   return PID;
1855 }
1856 
1857 namespace {
1858 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1859   const StringRef SecTimeStr = TimeStr.split('.').first;
1860   const StringRef USecTimeStr = TimeStr.split('.').second;
1861   uint64_t SecTime;
1862   uint64_t USecTime;
1863   if (SecTimeStr.getAsInteger(10, SecTime) ||
1864       USecTimeStr.getAsInteger(10, USecTime))
1865     return NoneType();
1866   return SecTime * 1000000ULL + USecTime;
1867 }
1868 }
1869 
1870 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1871   while (checkAndConsumeFS()) {
1872   }
1873 
1874   size_t LineEnd = ParsingBuf.find_first_of("\n");
1875   if (LineEnd == StringRef::npos) {
1876     reportError("expected rest of line");
1877     Diag << "Found: " << ParsingBuf << "\n";
1878     return NoneType();
1879   }
1880   StringRef Line = ParsingBuf.substr(0, LineEnd);
1881 
1882   size_t Pos = Line.find("PERF_RECORD_FORK");
1883   if (Pos == StringRef::npos) {
1884     consumeRestOfLine();
1885     return NoneType();
1886   }
1887 
1888   ForkInfo FI;
1889 
1890   const StringRef TimeStr =
1891       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1892   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1893     FI.Time = *TimeRes;
1894   }
1895 
1896   Line = Line.drop_front(Pos);
1897 
1898   // Line:
1899   //  PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1900   const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1901   if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1902     reportError("expected PID");
1903     Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1904     return NoneType();
1905   }
1906 
1907   const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1908   if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1909     reportError("expected PID");
1910     Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1911     return NoneType();
1912   }
1913 
1914   consumeRestOfLine();
1915 
1916   return FI;
1917 }
1918 
1919 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1920 DataAggregator::parseMMapEvent() {
1921   while (checkAndConsumeFS()) {
1922   }
1923 
1924   MMapInfo ParsedInfo;
1925 
1926   size_t LineEnd = ParsingBuf.find_first_of("\n");
1927   if (LineEnd == StringRef::npos) {
1928     reportError("expected rest of line");
1929     Diag << "Found: " << ParsingBuf << "\n";
1930     return make_error_code(llvm::errc::io_error);
1931   }
1932   StringRef Line = ParsingBuf.substr(0, LineEnd);
1933 
1934   size_t Pos = Line.find("PERF_RECORD_MMAP2");
1935   if (Pos == StringRef::npos) {
1936     consumeRestOfLine();
1937     return std::make_pair(StringRef(), ParsedInfo);
1938   }
1939 
1940   // Line:
1941   //   {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1942 
1943   const StringRef TimeStr =
1944       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1945   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1946     ParsedInfo.Time = *TimeRes;
1947 
1948   Line = Line.drop_front(Pos);
1949 
1950   // Line:
1951   //   PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1952 
1953   StringRef FileName = Line.rsplit(FieldSeparator).second;
1954   if (FileName.startswith("//") || FileName.startswith("[")) {
1955     consumeRestOfLine();
1956     return std::make_pair(StringRef(), ParsedInfo);
1957   }
1958   FileName = sys::path::filename(FileName);
1959 
1960   const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
1961   if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
1962     reportError("expected PID");
1963     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1964     return make_error_code(llvm::errc::io_error);
1965   }
1966 
1967   const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
1968   if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) {
1969     reportError("expected base address");
1970     Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1971     return make_error_code(llvm::errc::io_error);
1972   }
1973 
1974   const StringRef SizeStr = Line.split('(').second.split(')').first;
1975   if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
1976     reportError("expected mmaped size");
1977     Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1978     return make_error_code(llvm::errc::io_error);
1979   }
1980 
1981   const StringRef OffsetStr =
1982       Line.split('@').second.ltrim().split(FieldSeparator).first;
1983   if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
1984     reportError("expected mmaped page-aligned offset");
1985     Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1986     return make_error_code(llvm::errc::io_error);
1987   }
1988 
1989   consumeRestOfLine();
1990 
1991   return std::make_pair(FileName, ParsedInfo);
1992 }
1993 
1994 std::error_code DataAggregator::parseMMapEvents() {
1995   outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1996   NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1997                      TimerGroupDesc, opts::TimeAggregator);
1998 
1999   std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
2000   while (hasData()) {
2001     ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
2002     if (std::error_code EC = FileMMapInfoRes.getError())
2003       return EC;
2004 
2005     std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
2006     if (FileMMapInfo.second.PID == -1)
2007       continue;
2008 
2009     // Consider only the first mapping of the file for any given PID
2010     bool PIDExists = false;
2011     auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
2012     for (auto MI = Range.first; MI != Range.second; ++MI) {
2013       if (MI->second.PID == FileMMapInfo.second.PID) {
2014         PIDExists = true;
2015         break;
2016       }
2017     }
2018     if (PIDExists)
2019       continue;
2020 
2021     GlobalMMapInfo.insert(FileMMapInfo);
2022   }
2023 
2024   LLVM_DEBUG({
2025     dbgs() << "FileName -> mmap info:\n";
2026     for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo)
2027       dbgs() << "  " << Pair.first << " : " << Pair.second.PID << " [0x"
2028              << Twine::utohexstr(Pair.second.MMapAddress) << ", "
2029              << Twine::utohexstr(Pair.second.Size) << " @ "
2030              << Twine::utohexstr(Pair.second.Offset) << "]\n";
2031   });
2032 
2033   StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
2034   if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
2035     errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
2036            << "\" for profile matching\n";
2037     NameToUse = BuildIDBinaryName;
2038   }
2039 
2040   auto Range = GlobalMMapInfo.equal_range(NameToUse);
2041   for (auto I = Range.first; I != Range.second; ++I) {
2042     MMapInfo &MMapInfo = I->second;
2043     if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
2044       // Check that the binary mapping matches one of the segments.
2045       bool MatchFound = false;
2046       for (auto &KV : BC->SegmentMapInfo) {
2047         SegmentInfo &SegInfo = KV.second;
2048         // The mapping is page-aligned and hence the MMapAddress could be
2049         // different from the segment start address. We cannot know the page
2050         // size of the mapping, but we know it should not exceed the segment
2051         // alignment value. Hence we are performing an approximate check.
2052         if (SegInfo.Address >= MMapInfo.MMapAddress &&
2053             SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) {
2054           MatchFound = true;
2055           break;
2056         }
2057       }
2058       if (!MatchFound) {
2059         errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2060                << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n';
2061         continue;
2062       }
2063     }
2064 
2065     // Set base address for shared objects.
2066     if (!BC->HasFixedLoadAddress) {
2067       Optional<uint64_t> BaseAddress =
2068           BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset);
2069       if (!BaseAddress) {
2070         errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2071                   "binary when memory mapped at 0x"
2072                << Twine::utohexstr(MMapInfo.MMapAddress)
2073                << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
2074                << ". Ignoring profile data for this mapping\n";
2075         continue;
2076       } else {
2077         MMapInfo.BaseAddress = *BaseAddress;
2078       }
2079     }
2080 
2081     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2082   }
2083 
2084   if (BinaryMMapInfo.empty()) {
2085     if (errs().has_colors())
2086       errs().changeColor(raw_ostream::RED);
2087     errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2088            << BC->getFilename() << "\".";
2089     if (!GlobalMMapInfo.empty()) {
2090       errs() << " Profile for the following binary name(s) is available:\n";
2091       for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2092            I = GlobalMMapInfo.upper_bound(I->first))
2093         errs() << "  " << I->first << '\n';
2094       errs() << "Please rename the input binary.\n";
2095     } else {
2096       errs() << " Failed to extract any binary name from a profile.\n";
2097     }
2098     if (errs().has_colors())
2099       errs().resetColor();
2100 
2101     exit(1);
2102   }
2103 
2104   return std::error_code();
2105 }
2106 
2107 std::error_code DataAggregator::parseTaskEvents() {
2108   outs() << "PERF2BOLT: parsing perf-script task events output\n";
2109   NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2110                      TimerGroupDesc, opts::TimeAggregator);
2111 
2112   while (hasData()) {
2113     if (Optional<int32_t> CommInfo = parseCommExecEvent()) {
2114       // Remove forked child that ran execve
2115       auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2116       if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2117         BinaryMMapInfo.erase(MMapInfoIter);
2118       consumeRestOfLine();
2119       continue;
2120     }
2121 
2122     Optional<ForkInfo> ForkInfo = parseForkEvent();
2123     if (!ForkInfo)
2124       continue;
2125 
2126     if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2127       continue;
2128 
2129     if (ForkInfo->Time == 0) {
2130       // Process was forked and mmaped before perf ran. In this case the child
2131       // should have its own mmap entry unless it was execve'd.
2132       continue;
2133     }
2134 
2135     auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2136     if (MMapInfoIter == BinaryMMapInfo.end())
2137       continue;
2138 
2139     MMapInfo MMapInfo = MMapInfoIter->second;
2140     MMapInfo.PID = ForkInfo->ChildPID;
2141     MMapInfo.Forked = true;
2142     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2143   }
2144 
2145   outs() << "PERF2BOLT: input binary is associated with "
2146          << BinaryMMapInfo.size() << " PID(s)\n";
2147 
2148   LLVM_DEBUG({
2149     for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
2150       outs() << "  " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "")
2151              << ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x"
2152              << Twine::utohexstr(MMI.second.Size) << ")\n";
2153   });
2154 
2155   return std::error_code();
2156 }
2157 
2158 Optional<std::pair<StringRef, StringRef>>
2159 DataAggregator::parseNameBuildIDPair() {
2160   while (checkAndConsumeFS()) {
2161   }
2162 
2163   ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2164   if (std::error_code EC = BuildIDStr.getError())
2165     return NoneType();
2166 
2167   ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2168   if (std::error_code EC = NameStr.getError())
2169     return NoneType();
2170 
2171   consumeRestOfLine();
2172   return std::make_pair(NameStr.get(), BuildIDStr.get());
2173 }
2174 
2175 Optional<StringRef>
2176 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2177   while (hasData()) {
2178     Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair();
2179     if (!IDPair)
2180       return NoneType();
2181 
2182     if (IDPair->second.startswith(FileBuildID))
2183       return sys::path::filename(IDPair->first);
2184   }
2185   return NoneType();
2186 }
2187 
2188 std::error_code
2189 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2190   std::error_code EC;
2191   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2192   if (EC)
2193     return EC;
2194 
2195   bool WriteMemLocs = false;
2196 
2197   auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2198     if (WriteMemLocs)
2199       OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2200     else
2201       OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2202     OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2203             << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2204   };
2205 
2206   uint64_t BranchValues = 0;
2207   uint64_t MemValues = 0;
2208 
2209   if (BAT)
2210     OutFile << "boltedcollection\n";
2211   if (opts::BasicAggregation) {
2212     OutFile << "no_lbr";
2213     for (const StringMapEntry<NoneType> &Entry : EventNames)
2214       OutFile << " " << Entry.getKey();
2215     OutFile << "\n";
2216 
2217     for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
2218       for (const SampleInfo &SI : Func.getValue().Data) {
2219         writeLocation(SI.Loc);
2220         OutFile << SI.Hits << "\n";
2221         ++BranchValues;
2222       }
2223     }
2224   } else {
2225     for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
2226       for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) {
2227         writeLocation(BI.From);
2228         writeLocation(BI.To);
2229         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2230         ++BranchValues;
2231       }
2232       for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) {
2233         // Do not output if source is a known symbol, since this was already
2234         // accounted for in the source function
2235         if (BI.From.IsSymbol)
2236           continue;
2237         writeLocation(BI.From);
2238         writeLocation(BI.To);
2239         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2240         ++BranchValues;
2241       }
2242     }
2243 
2244     WriteMemLocs = true;
2245     for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
2246       for (const MemInfo &MemEvent : Func.getValue().Data) {
2247         writeLocation(MemEvent.Offset);
2248         writeLocation(MemEvent.Addr);
2249         OutFile << MemEvent.Count << "\n";
2250         ++MemValues;
2251       }
2252     }
2253   }
2254 
2255   outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2256          << " memory objects to " << OutputFilename << "\n";
2257 
2258   return std::error_code();
2259 }
2260 
2261 void DataAggregator::dump() const { DataReader::dump(); }
2262 
2263 void DataAggregator::dump(const LBREntry &LBR) const {
2264   Diag << "From: " << Twine::utohexstr(LBR.From)
2265        << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2266        << "\n";
2267 }
2268 
2269 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2270   Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2271   for (const LBREntry &LBR : Sample.LBR)
2272     dump(LBR);
2273 }
2274 
2275 void DataAggregator::dump(const PerfMemSample &Sample) const {
2276   Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
2277 }
2278