1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Profile/BoltAddressTranslation.h"
18 #include "bolt/Profile/Heatmap.h"
19 #include "bolt/Utils/CommandLineOpts.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/ScopeExit.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/Process.h"
27 #include "llvm/Support/Program.h"
28 #include "llvm/Support/Regex.h"
29 #include "llvm/Support/Timer.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <map>
32 #include <unordered_map>
33 #include <utility>
34 
35 #define DEBUG_TYPE "aggregator"
36 
37 using namespace llvm;
38 using namespace bolt;
39 
40 namespace opts {
41 
42 static cl::opt<bool>
43 BasicAggregation("nl",
44   cl::desc("aggregate basic samples (without LBR info)"),
45   cl::init(false),
46   cl::ZeroOrMore,
47   cl::cat(AggregatorCategory));
48 
49 static cl::opt<bool>
50 FilterMemProfile("filter-mem-profile",
51   cl::desc("if processing a memory profile, filter out stack or heap accesses "
52            "that won't be useful for BOLT to reduce profile file size"),
53   cl::init(true),
54   cl::cat(AggregatorCategory));
55 
56 static cl::opt<unsigned long long>
57 FilterPID("pid",
58   cl::desc("only use samples from process with specified PID"),
59   cl::init(0),
60   cl::Optional,
61   cl::cat(AggregatorCategory));
62 
63 static cl::opt<bool>
64 IgnoreBuildID("ignore-build-id",
65   cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
66   cl::init(false),
67   cl::cat(AggregatorCategory));
68 
69 static cl::opt<bool>
70 IgnoreInterruptLBR("ignore-interrupt-lbr",
71   cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
72   cl::init(true),
73   cl::ZeroOrMore,
74   cl::cat(AggregatorCategory));
75 
76 static cl::opt<unsigned long long>
77 MaxSamples("max-samples",
78   cl::init(-1ULL),
79   cl::desc("maximum number of samples to read from LBR profile"),
80   cl::Optional,
81   cl::Hidden,
82   cl::cat(AggregatorCategory));
83 
84 static cl::opt<bool>
85 ReadPreAggregated("pa",
86   cl::desc("skip perf and read data from a pre-aggregated file format"),
87   cl::init(false),
88   cl::ZeroOrMore,
89   cl::cat(AggregatorCategory));
90 
91 static cl::opt<bool>
92 TimeAggregator("time-aggr",
93   cl::desc("time BOLT aggregator"),
94   cl::init(false),
95   cl::ZeroOrMore,
96   cl::cat(AggregatorCategory));
97 
98 static cl::opt<bool>
99 UseEventPC("use-event-pc",
100   cl::desc("use event PC in combination with LBR sampling"),
101   cl::init(false),
102   cl::ZeroOrMore,
103   cl::cat(AggregatorCategory));
104 
105 static cl::opt<bool>
106 WriteAutoFDOData("autofdo",
107   cl::desc("generate autofdo textual data instead of bolt data"),
108   cl::init(false),
109   cl::ZeroOrMore,
110   cl::cat(AggregatorCategory));
111 
112 } // namespace opts
113 
114 namespace {
115 
116 const char TimerGroupName[] = "aggregator";
117 const char TimerGroupDesc[] = "Aggregator";
118 
119 }
120 
121 constexpr uint64_t DataAggregator::KernelBaseAddr;
122 
123 DataAggregator::~DataAggregator() { deleteTempFiles(); }
124 
125 namespace {
126 void deleteTempFile(const std::string &FileName) {
127   if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
128     errs() << "PERF2BOLT: failed to delete temporary file " << FileName
129            << " with error " << Errc.message() << "\n";
130 }
131 }
132 
133 void DataAggregator::deleteTempFiles() {
134   for (std::string &FileName : TempFiles)
135     deleteTempFile(FileName);
136   TempFiles.clear();
137 }
138 
139 void DataAggregator::findPerfExecutable() {
140   Optional<std::string> PerfExecutable =
141       sys::Process::FindInEnvPath("PATH", "perf");
142   if (!PerfExecutable) {
143     outs() << "PERF2BOLT: No perf executable found!\n";
144     exit(1);
145   }
146   PerfPath = *PerfExecutable;
147 }
148 
149 void DataAggregator::start() {
150   outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
151 
152   // Don't launch perf for pre-aggregated files
153   if (opts::ReadPreAggregated)
154     return;
155 
156   findPerfExecutable();
157 
158   if (opts::BasicAggregation)
159     launchPerfProcess("events without LBR",
160                       MainEventsPPI,
161                       "script -F pid,event,ip",
162                       /*Wait = */false);
163   else
164     launchPerfProcess("branch events",
165                       MainEventsPPI,
166                       "script -F pid,ip,brstack",
167                       /*Wait = */false);
168 
169   // Note: we launch script for mem events regardless of the option, as the
170   //       command fails fairly fast if mem events were not collected.
171   launchPerfProcess("mem events",
172                     MemEventsPPI,
173                     "script -F pid,event,addr,ip",
174                     /*Wait = */false);
175 
176   launchPerfProcess("process events",
177                     MMapEventsPPI,
178                     "script --show-mmap-events",
179                     /*Wait = */false);
180 
181   launchPerfProcess("task events",
182                     TaskEventsPPI,
183                     "script --show-task-events",
184                     /*Wait = */false);
185 }
186 
187 void DataAggregator::abort() {
188   if (opts::ReadPreAggregated)
189     return;
190 
191   std::string Error;
192 
193   // Kill subprocesses in case they are not finished
194   sys::Wait(TaskEventsPPI.PI, 1, false, &Error);
195   sys::Wait(MMapEventsPPI.PI, 1, false, &Error);
196   sys::Wait(MainEventsPPI.PI, 1, false, &Error);
197   sys::Wait(MemEventsPPI.PI, 1, false, &Error);
198 
199   deleteTempFiles();
200 
201   exit(1);
202 }
203 
204 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
205                                        const char *ArgsString, bool Wait) {
206   SmallVector<StringRef, 4> Argv;
207 
208   outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
209   Argv.push_back(PerfPath.data());
210 
211   char *WritableArgsString = strdup(ArgsString);
212   char *Str = WritableArgsString;
213   do {
214     Argv.push_back(Str);
215     while (*Str && *Str != ' ')
216       ++Str;
217     if (!*Str)
218       break;
219     *Str++ = 0;
220   } while (true);
221 
222   Argv.push_back("-f");
223   Argv.push_back("-i");
224   Argv.push_back(Filename.c_str());
225 
226   if (std::error_code Errc =
227           sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
228     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
229            << " with error " << Errc.message() << "\n";
230     exit(1);
231   }
232   TempFiles.push_back(PPI.StdoutPath.data());
233 
234   if (std::error_code Errc =
235           sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
236     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
237            << " with error " << Errc.message() << "\n";
238     exit(1);
239   }
240   TempFiles.push_back(PPI.StderrPath.data());
241 
242   Optional<StringRef> Redirects[] = {
243       llvm::None,                        // Stdin
244       StringRef(PPI.StdoutPath.data()),  // Stdout
245       StringRef(PPI.StderrPath.data())}; // Stderr
246 
247   LLVM_DEBUG({
248     dbgs() << "Launching perf: ";
249     for (StringRef Arg : Argv)
250       dbgs() << Arg << " ";
251     dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
252            << "\n";
253   });
254 
255   if (Wait)
256     PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
257                                             /*envp*/ llvm::None, Redirects);
258   else
259     PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None,
260                                 Redirects);
261 
262   free(WritableArgsString);
263 }
264 
265 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
266   PerfProcessInfo BuildIDProcessInfo;
267   launchPerfProcess("buildid list",
268                     BuildIDProcessInfo,
269                     "buildid-list",
270                     /*Wait = */true);
271 
272   if (BuildIDProcessInfo.PI.ReturnCode != 0) {
273     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
274         MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
275     StringRef ErrBuf = (*MB)->getBuffer();
276 
277     errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
278            << '\n';
279     errs() << ErrBuf;
280     return;
281   }
282 
283   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
284       MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
285   if (std::error_code EC = MB.getError()) {
286     errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
287            << EC.message() << "\n";
288     return;
289   }
290 
291   FileBuf = std::move(*MB);
292   ParsingBuf = FileBuf->getBuffer();
293   if (ParsingBuf.empty()) {
294     errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
295               "data was recorded without it\n";
296     return;
297   }
298 
299   Col = 0;
300   Line = 1;
301   Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
302   if (!FileName) {
303     errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
304               "This indicates the input binary supplied for data aggregation "
305               "is not the same recorded by perf when collecting profiling "
306               "data, or there were no samples recorded for the binary. "
307               "Use -ignore-build-id option to override.\n";
308     if (!opts::IgnoreBuildID)
309       abort();
310   } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
311     errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
312     BuildIDBinaryName = std::string(*FileName);
313   } else {
314     outs() << "PERF2BOLT: matched build-id and file name\n";
315   }
316 
317   return;
318 }
319 
320 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
321   if (opts::ReadPreAggregated)
322     return true;
323 
324   Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
325   if (!FD)
326     return false;
327 
328   char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
329 
330   auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
331   Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
332       *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0);
333   if (!BytesRead || *BytesRead != 7)
334     return false;
335 
336   if (strncmp(Buf, "PERFILE", 7) == 0)
337     return true;
338   return false;
339 }
340 
341 void DataAggregator::parsePreAggregated() {
342   std::string Error;
343 
344   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
345       MemoryBuffer::getFileOrSTDIN(Filename);
346   if (std::error_code EC = MB.getError()) {
347     errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
348            << EC.message() << "\n";
349     exit(1);
350   }
351 
352   FileBuf = std::move(*MB);
353   ParsingBuf = FileBuf->getBuffer();
354   Col = 0;
355   Line = 1;
356   if (parsePreAggregatedLBRSamples()) {
357     errs() << "PERF2BOLT: failed to parse samples\n";
358     exit(1);
359   }
360 }
361 
362 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
363   outs() << "PERF2BOLT: writing data for autofdo tools...\n";
364   NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
365                      TimerGroupDesc, opts::TimeAggregator);
366 
367   std::error_code EC;
368   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
369   if (EC)
370     return EC;
371 
372   // Format:
373   // number of unique traces
374   // from_1-to_1:count_1
375   // from_2-to_2:count_2
376   // ......
377   // from_n-to_n:count_n
378   // number of unique sample addresses
379   // addr_1:count_1
380   // addr_2:count_2
381   // ......
382   // addr_n:count_n
383   // number of unique LBR entries
384   // src_1->dst_1:count_1
385   // src_2->dst_2:count_2
386   // ......
387   // src_n->dst_n:count_n
388 
389   const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
390 
391   // AutoFDO addresses are relative to the first allocated loadable program
392   // segment
393   auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
394     if (Address < FirstAllocAddress)
395       return 0;
396     return Address - FirstAllocAddress;
397   };
398 
399   OutFile << FallthroughLBRs.size() << "\n";
400   for (const auto &AggrLBR : FallthroughLBRs) {
401     const Trace &Trace = AggrLBR.first;
402     const FTInfo &Info = AggrLBR.second;
403     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-"
404             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
405             << (Info.InternCount + Info.ExternCount) << "\n";
406   }
407 
408   OutFile << BasicSamples.size() << "\n";
409   for (const auto &Sample : BasicSamples) {
410     uint64_t PC = Sample.first;
411     uint64_t HitCount = Sample.second;
412     OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n";
413   }
414 
415   OutFile << BranchLBRs.size() << "\n";
416   for (const auto &AggrLBR : BranchLBRs) {
417     const Trace &Trace = AggrLBR.first;
418     const BranchInfo &Info = AggrLBR.second;
419     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->"
420             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
421             << Info.TakenCount << "\n";
422   }
423 
424   outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
425          << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
426          << " unique branches to " << OutputFilename << "\n";
427 
428   return std::error_code();
429 }
430 
431 void DataAggregator::filterBinaryMMapInfo() {
432   if (opts::FilterPID) {
433     auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
434     if (MMapInfoIter != BinaryMMapInfo.end()) {
435       MMapInfo MMap = MMapInfoIter->second;
436       BinaryMMapInfo.clear();
437       BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
438     } else {
439       if (errs().has_colors())
440         errs().changeColor(raw_ostream::RED);
441       errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
442              << opts::FilterPID << "\""
443              << " for binary \"" << BC->getFilename() << "\".";
444       assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
445       errs() << " Profile for the following process is available:\n";
446       for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
447         outs() << "  " << MMI.second.PID
448                << (MMI.second.Forked ? " (forked)\n" : "\n");
449 
450       if (errs().has_colors())
451         errs().resetColor();
452 
453       exit(1);
454     }
455   }
456 }
457 
458 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
459   this->BC = &BC;
460 
461   if (opts::ReadPreAggregated) {
462     parsePreAggregated();
463     return Error::success();
464   }
465 
466   if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) {
467     outs() << "BOLT-INFO: binary build-id is:     " << *FileBuildID << "\n";
468     processFileBuildID(*FileBuildID);
469   } else {
470     errs() << "BOLT-WARNING: build-id will not be checked because we could "
471               "not read one from input binary\n";
472   }
473 
474   auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) {
475     std::string Error;
476     outs() << "PERF2BOLT: waiting for perf " << Name
477            << " collection to finish...\n";
478     sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error);
479 
480     if (!Error.empty()) {
481       errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
482       deleteTempFiles();
483       exit(1);
484     }
485 
486     if (PI.ReturnCode != 0) {
487       ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
488           MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
489       StringRef ErrBuf = (*ErrorMB)->getBuffer();
490 
491       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
492       errs() << ErrBuf;
493       deleteTempFiles();
494       exit(1);
495     }
496 
497     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
498         MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
499     if (std::error_code EC = MB.getError()) {
500       errs() << "Cannot open " << Process.StdoutPath.data() << ": "
501              << EC.message() << "\n";
502       deleteTempFiles();
503       exit(1);
504     }
505 
506     FileBuf = std::move(*MB);
507     ParsingBuf = FileBuf->getBuffer();
508     Col = 0;
509     Line = 1;
510   };
511 
512   if (opts::LinuxKernelMode) {
513     // Current MMap parsing logic does not work with linux kernel.
514     // MMap entries for linux kernel uses PERF_RECORD_MMAP
515     // format instead of typical PERF_RECORD_MMAP2 format.
516     // Since linux kernel address mapping is absolute (same as
517     // in the ELF file), we avoid parsing MMap in linux kernel mode.
518     // While generating optimized linux kernel binary, we may need
519     // to parse MMap entries.
520 
521     // In linux kernel mode, we analyze and optimize
522     // all linux kernel binary instructions, irrespective
523     // of whether they are due to system calls or due to
524     // interrupts. Therefore, we cannot ignore interrupt
525     // in Linux kernel mode.
526     opts::IgnoreInterruptLBR = false;
527   } else {
528     prepareToParse("mmap events", MMapEventsPPI);
529     if (parseMMapEvents())
530       errs() << "PERF2BOLT: failed to parse mmap events\n";
531   }
532 
533   prepareToParse("task events", TaskEventsPPI);
534   if (parseTaskEvents())
535     errs() << "PERF2BOLT: failed to parse task events\n";
536 
537   filterBinaryMMapInfo();
538   prepareToParse("events", MainEventsPPI);
539 
540   if (opts::HeatmapMode) {
541     if (std::error_code EC = printLBRHeatMap()) {
542       errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
543       exit(1);
544     }
545     exit(0);
546   }
547 
548   if ((!opts::BasicAggregation && parseBranchEvents()) ||
549       (opts::BasicAggregation && parseBasicEvents()))
550     errs() << "PERF2BOLT: failed to parse samples\n";
551 
552   // We can finish early if the goal is just to generate data for autofdo
553   if (opts::WriteAutoFDOData) {
554     if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
555       errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
556 
557     deleteTempFiles();
558     exit(0);
559   }
560 
561   // Special handling for memory events
562   std::string Error;
563   sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error);
564   if (PI.ReturnCode != 0) {
565     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
566         MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data());
567     StringRef ErrBuf = (*MB)->getBuffer();
568 
569     deleteTempFiles();
570 
571     Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
572                  "Cannot print 'addr' field.");
573     if (!NoData.match(ErrBuf)) {
574       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
575       errs() << ErrBuf;
576       exit(1);
577     }
578     return Error::success();
579   }
580 
581   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
582       MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data());
583   if (std::error_code EC = MB.getError()) {
584     errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": "
585            << EC.message() << "\n";
586     deleteTempFiles();
587     exit(1);
588   }
589 
590   FileBuf = std::move(*MB);
591   ParsingBuf = FileBuf->getBuffer();
592   Col = 0;
593   Line = 1;
594   if (const std::error_code EC = parseMemEvents())
595     errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
596            << '\n';
597 
598   deleteTempFiles();
599 
600   return Error::success();
601 }
602 
603 Error DataAggregator::readProfile(BinaryContext &BC) {
604   processProfile(BC);
605 
606   for (auto &BFI : BC.getBinaryFunctions()) {
607     BinaryFunction &Function = BFI.second;
608     convertBranchData(Function);
609   }
610 
611   if (opts::AggregateOnly) {
612     if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
613       report_error("cannot create output data file", EC);
614   }
615 
616   return Error::success();
617 }
618 
619 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
620   return Function.hasProfileAvailable();
621 }
622 
623 void DataAggregator::processProfile(BinaryContext &BC) {
624   if (opts::ReadPreAggregated)
625     processPreAggregated();
626   else if (opts::BasicAggregation)
627     processBasicEvents();
628   else
629     processBranchEvents();
630 
631   processMemEvents();
632 
633   // Mark all functions with registered events as having a valid profile.
634   for (auto &BFI : BC.getBinaryFunctions()) {
635     BinaryFunction &BF = BFI.second;
636     if (getBranchData(BF)) {
637       const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
638                                                 : BinaryFunction::PF_LBR;
639       BF.markProfiled(Flags);
640     }
641   }
642 
643   // Release intermediate storage.
644   clear(BranchLBRs);
645   clear(FallthroughLBRs);
646   clear(AggregatedLBRs);
647   clear(BasicSamples);
648   clear(MemSamples);
649 }
650 
651 BinaryFunction *
652 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
653   if (!BC->containsAddress(Address))
654     return nullptr;
655 
656   return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
657                                                 /*UseMaxSize=*/true);
658 }
659 
660 StringRef DataAggregator::getLocationName(BinaryFunction &Func,
661                                           uint64_t Count) {
662   if (!BAT)
663     return Func.getOneName();
664 
665   const BinaryFunction *OrigFunc = &Func;
666   if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
667     NumColdSamples += Count;
668     BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
669     if (HotFunc)
670       OrigFunc = HotFunc;
671   }
672   // If it is a local function, prefer the name containing the file name where
673   // the local function was declared
674   for (StringRef AlternativeName : OrigFunc->getNames()) {
675     size_t FileNameIdx = AlternativeName.find('/');
676     // Confirm the alternative name has the pattern Symbol/FileName/1 before
677     // using it
678     if (FileNameIdx == StringRef::npos ||
679         AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
680       continue;
681     return AlternativeName;
682   }
683   return OrigFunc->getOneName();
684 }
685 
686 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
687                               uint64_t Count) {
688   auto I = NamesToSamples.find(Func.getOneName());
689   if (I == NamesToSamples.end()) {
690     bool Success;
691     StringRef LocName = getLocationName(Func, Count);
692     std::tie(I, Success) = NamesToSamples.insert(
693         std::make_pair(Func.getOneName(),
694                        FuncSampleData(LocName, FuncSampleData::ContainerTy())));
695   }
696 
697   Address -= Func.getAddress();
698   if (BAT)
699     Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false);
700 
701   I->second.bumpCount(Address, Count);
702   return true;
703 }
704 
705 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
706                                    uint64_t To, uint64_t Count,
707                                    uint64_t Mispreds) {
708   FuncBranchData *AggrData = getBranchData(Func);
709   if (!AggrData) {
710     AggrData = &NamesToBranches[Func.getOneName()];
711     AggrData->Name = getLocationName(Func, Count);
712     setBranchData(Func, AggrData);
713   }
714 
715   From -= Func.getAddress();
716   To -= Func.getAddress();
717   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName()
718                     << " @ " << Twine::utohexstr(From) << " -> "
719                     << Func.getPrintName() << " @ " << Twine::utohexstr(To)
720                     << '\n');
721   if (BAT) {
722     From = BAT->translate(Func, From, /*IsBranchSrc=*/true);
723     To = BAT->translate(Func, To, /*IsBranchSrc=*/false);
724     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
725                       << Func.getPrintName() << " @ " << Twine::utohexstr(From)
726                       << " -> " << Func.getPrintName() << " @ "
727                       << Twine::utohexstr(To) << '\n');
728   }
729 
730   AggrData->bumpBranchCount(From, To, Count, Mispreds);
731   return true;
732 }
733 
734 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
735                                    BinaryFunction *ToFunc, uint64_t From,
736                                    uint64_t To, uint64_t Count,
737                                    uint64_t Mispreds) {
738   FuncBranchData *FromAggrData = nullptr;
739   FuncBranchData *ToAggrData = nullptr;
740   StringRef SrcFunc;
741   StringRef DstFunc;
742   if (FromFunc) {
743     SrcFunc = getLocationName(*FromFunc, Count);
744     FromAggrData = getBranchData(*FromFunc);
745     if (!FromAggrData) {
746       FromAggrData = &NamesToBranches[FromFunc->getOneName()];
747       FromAggrData->Name = SrcFunc;
748       setBranchData(*FromFunc, FromAggrData);
749     }
750     From -= FromFunc->getAddress();
751     if (BAT)
752       From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true);
753 
754     recordExit(*FromFunc, From, Mispreds, Count);
755   }
756   if (ToFunc) {
757     DstFunc = getLocationName(*ToFunc, 0);
758     ToAggrData = getBranchData(*ToFunc);
759     if (!ToAggrData) {
760       ToAggrData = &NamesToBranches[ToFunc->getOneName()];
761       ToAggrData->Name = DstFunc;
762       setBranchData(*ToFunc, ToAggrData);
763     }
764     To -= ToFunc->getAddress();
765     if (BAT)
766       To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false);
767 
768     recordEntry(*ToFunc, To, Mispreds, Count);
769   }
770 
771   if (FromAggrData)
772     FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
773                                 Count, Mispreds);
774   if (ToAggrData)
775     ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
776                                Count, Mispreds);
777   return true;
778 }
779 
780 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
781                               uint64_t Mispreds) {
782   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
783   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
784   if (!FromFunc && !ToFunc)
785     return false;
786 
787   if (FromFunc == ToFunc) {
788     recordBranch(*FromFunc, From - FromFunc->getAddress(),
789                  To - FromFunc->getAddress(), Count, Mispreds);
790     return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
791   }
792 
793   return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
794 }
795 
796 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
797                              uint64_t Count) {
798   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
799   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
800   if (!FromFunc || !ToFunc) {
801     LLVM_DEBUG(
802         dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
803                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
804                << " and ending in " << ToFunc->getPrintName() << " @ "
805                << ToFunc->getPrintName() << " @ "
806                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
807     NumLongRangeTraces += Count;
808     return false;
809   }
810   if (FromFunc != ToFunc) {
811     NumInvalidTraces += Count;
812     LLVM_DEBUG(
813         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
814                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
815                << " and ending in " << ToFunc->getPrintName() << " @ "
816                << ToFunc->getPrintName() << " @ "
817                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
818     return false;
819   }
820 
821   Optional<BoltAddressTranslation::FallthroughListTy> FTs =
822       BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From)
823           : getFallthroughsInTrace(*FromFunc, First, Second, Count);
824   if (!FTs) {
825     LLVM_DEBUG(
826         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
827                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
828                << " and ending in " << ToFunc->getPrintName() << " @ "
829                << ToFunc->getPrintName() << " @ "
830                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
831     NumInvalidTraces += Count;
832     return false;
833   }
834 
835   LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
836                     << FromFunc->getPrintName() << ":"
837                     << Twine::utohexstr(First.To) << " to "
838                     << Twine::utohexstr(Second.From) << ".\n");
839   for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
840     doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
841                   Pair.second + FromFunc->getAddress(), Count, false);
842 
843   return true;
844 }
845 
846 bool DataAggregator::recordTrace(
847     BinaryFunction &BF,
848     const LBREntry &FirstLBR,
849     const LBREntry &SecondLBR,
850     uint64_t Count,
851     SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const {
852   BinaryContext &BC = BF.getBinaryContext();
853 
854   if (!BF.isSimple())
855     return false;
856 
857   assert(BF.hasCFG() && "can only record traces in CFG state");
858 
859   // Offsets of the trace within this function.
860   const uint64_t From = FirstLBR.To - BF.getAddress();
861   const uint64_t To = SecondLBR.From - BF.getAddress();
862 
863   if (From > To)
864     return false;
865 
866   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
867   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
868 
869   if (!FromBB || !ToBB)
870     return false;
871 
872   // Adjust FromBB if the first LBR is a return from the last instruction in
873   // the previous block (that instruction should be a call).
874   if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
875       !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
876     BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1];
877     if (PrevBB->getSuccessor(FromBB->getLabel())) {
878       const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
879       if (Instr && BC.MIB->isCall(*Instr))
880         FromBB = PrevBB;
881       else
882         LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
883                           << '\n');
884     } else {
885       LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
886     }
887   }
888 
889   // Fill out information for fall-through edges. The From and To could be
890   // within the same basic block, e.g. when two call instructions are in the
891   // same block. In this case we skip the processing.
892   if (FromBB == ToBB)
893     return true;
894 
895   // Process blocks in the original layout order.
896   BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()];
897   assert(BB == FromBB && "index mismatch");
898   while (BB != ToBB) {
899     BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1];
900     assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
901 
902     // Check for bad LBRs.
903     if (!BB->getSuccessor(NextBB->getLabel())) {
904       LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
905                         << "  " << FirstLBR << '\n'
906                         << "  " << SecondLBR << '\n');
907       return false;
908     }
909 
910     // Record fall-through jumps
911     BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB);
912     BI.Count += Count;
913 
914     if (Branches) {
915       const MCInst *Instr = BB->getLastNonPseudoInstr();
916       uint64_t Offset = 0;
917       if (Instr)
918         Offset = BC.MIB->getOffsetWithDefault(*Instr, 0);
919       else
920         Offset = BB->getOffset();
921 
922       Branches->emplace_back(Offset, NextBB->getOffset());
923     }
924 
925     BB = NextBB;
926   }
927 
928   return true;
929 }
930 
931 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
932 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
933                                        const LBREntry &FirstLBR,
934                                        const LBREntry &SecondLBR,
935                                        uint64_t Count) const {
936   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
937 
938   if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res))
939     return NoneType();
940 
941   return Res;
942 }
943 
944 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
945                                  uint64_t Count) const {
946   if (To > BF.getSize())
947     return false;
948 
949   if (!BF.hasProfile())
950     BF.ExecutionCount = 0;
951 
952   BinaryBasicBlock *EntryBB = nullptr;
953   if (To == 0) {
954     BF.ExecutionCount += Count;
955     if (!BF.empty())
956       EntryBB = &BF.front();
957   } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
958     if (BB->isEntryPoint())
959       EntryBB = BB;
960   }
961 
962   if (EntryBB)
963     EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
964 
965   return true;
966 }
967 
968 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
969                                 uint64_t Count) const {
970   if (!BF.isSimple() || From > BF.getSize())
971     return false;
972 
973   if (!BF.hasProfile())
974     BF.ExecutionCount = 0;
975 
976   return true;
977 }
978 
979 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
980   LBREntry Res;
981   ErrorOr<StringRef> FromStrRes = parseString('/');
982   if (std::error_code EC = FromStrRes.getError())
983     return EC;
984   StringRef OffsetStr = FromStrRes.get();
985   if (OffsetStr.getAsInteger(0, Res.From)) {
986     reportError("expected hexadecimal number with From address");
987     Diag << "Found: " << OffsetStr << "\n";
988     return make_error_code(llvm::errc::io_error);
989   }
990 
991   ErrorOr<StringRef> ToStrRes = parseString('/');
992   if (std::error_code EC = ToStrRes.getError())
993     return EC;
994   OffsetStr = ToStrRes.get();
995   if (OffsetStr.getAsInteger(0, Res.To)) {
996     reportError("expected hexadecimal number with To address");
997     Diag << "Found: " << OffsetStr << "\n";
998     return make_error_code(llvm::errc::io_error);
999   }
1000 
1001   ErrorOr<StringRef> MispredStrRes = parseString('/');
1002   if (std::error_code EC = MispredStrRes.getError())
1003     return EC;
1004   StringRef MispredStr = MispredStrRes.get();
1005   if (MispredStr.size() != 1 ||
1006       (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1007     reportError("expected single char for mispred bit");
1008     Diag << "Found: " << MispredStr << "\n";
1009     return make_error_code(llvm::errc::io_error);
1010   }
1011   Res.Mispred = MispredStr[0] == 'M';
1012 
1013   static bool MispredWarning = true;
1014   if (MispredStr[0] == '-' && MispredWarning) {
1015     errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1016     MispredWarning = false;
1017   }
1018 
1019   ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1020   if (std::error_code EC = Rest.getError())
1021     return EC;
1022   if (Rest.get().size() < 5) {
1023     reportError("expected rest of LBR entry");
1024     Diag << "Found: " << Rest.get() << "\n";
1025     return make_error_code(llvm::errc::io_error);
1026   }
1027   return Res;
1028 }
1029 
1030 bool DataAggregator::checkAndConsumeFS() {
1031   if (ParsingBuf[0] != FieldSeparator)
1032     return false;
1033 
1034   ParsingBuf = ParsingBuf.drop_front(1);
1035   Col += 1;
1036   return true;
1037 }
1038 
1039 void DataAggregator::consumeRestOfLine() {
1040   size_t LineEnd = ParsingBuf.find_first_of('\n');
1041   if (LineEnd == StringRef::npos) {
1042     ParsingBuf = StringRef();
1043     Col = 0;
1044     Line += 1;
1045     return;
1046   }
1047   ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1048   Col = 0;
1049   Line += 1;
1050 }
1051 
1052 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1053   PerfBranchSample Res;
1054 
1055   while (checkAndConsumeFS()) {
1056   }
1057 
1058   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1059   if (std::error_code EC = PIDRes.getError())
1060     return EC;
1061   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1062   if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
1063     consumeRestOfLine();
1064     return make_error_code(errc::no_such_process);
1065   }
1066 
1067   while (checkAndConsumeFS()) {
1068   }
1069 
1070   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1071   if (std::error_code EC = PCRes.getError())
1072     return EC;
1073   Res.PC = PCRes.get();
1074 
1075   if (checkAndConsumeNewLine())
1076     return Res;
1077 
1078   while (!checkAndConsumeNewLine()) {
1079     checkAndConsumeFS();
1080 
1081     ErrorOr<LBREntry> LBRRes = parseLBREntry();
1082     if (std::error_code EC = LBRRes.getError())
1083       return EC;
1084     LBREntry LBR = LBRRes.get();
1085     if (ignoreKernelInterrupt(LBR))
1086       continue;
1087     if (!BC->HasFixedLoadAddress)
1088       adjustLBR(LBR, MMapInfoIter->second);
1089     Res.LBR.push_back(LBR);
1090   }
1091 
1092   return Res;
1093 }
1094 
1095 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1096   while (checkAndConsumeFS()) {
1097   }
1098 
1099   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1100   if (std::error_code EC = PIDRes.getError())
1101     return EC;
1102 
1103   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1104   if (MMapInfoIter == BinaryMMapInfo.end()) {
1105     consumeRestOfLine();
1106     return PerfBasicSample{StringRef(), 0};
1107   }
1108 
1109   while (checkAndConsumeFS()) {
1110   }
1111 
1112   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1113   if (std::error_code EC = Event.getError())
1114     return EC;
1115 
1116   while (checkAndConsumeFS()) {
1117   }
1118 
1119   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1120   if (std::error_code EC = AddrRes.getError())
1121     return EC;
1122 
1123   if (!checkAndConsumeNewLine()) {
1124     reportError("expected end of line");
1125     return make_error_code(llvm::errc::io_error);
1126   }
1127 
1128   uint64_t Address = *AddrRes;
1129   if (!BC->HasFixedLoadAddress)
1130     adjustAddress(Address, MMapInfoIter->second);
1131 
1132   return PerfBasicSample{Event.get(), Address};
1133 }
1134 
1135 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1136   PerfMemSample Res{0, 0};
1137 
1138   while (checkAndConsumeFS()) {
1139   }
1140 
1141   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1142   if (std::error_code EC = PIDRes.getError())
1143     return EC;
1144 
1145   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1146   if (MMapInfoIter == BinaryMMapInfo.end()) {
1147     consumeRestOfLine();
1148     return Res;
1149   }
1150 
1151   while (checkAndConsumeFS()) {
1152   }
1153 
1154   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1155   if (std::error_code EC = Event.getError())
1156     return EC;
1157   if (Event.get().find("mem-loads") == StringRef::npos) {
1158     consumeRestOfLine();
1159     return Res;
1160   }
1161 
1162   while (checkAndConsumeFS()) {
1163   }
1164 
1165   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1166   if (std::error_code EC = AddrRes.getError())
1167     return EC;
1168 
1169   while (checkAndConsumeFS()) {
1170   }
1171 
1172   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1173   if (std::error_code EC = PCRes.getError()) {
1174     consumeRestOfLine();
1175     return EC;
1176   }
1177 
1178   if (!checkAndConsumeNewLine()) {
1179     reportError("expected end of line");
1180     return make_error_code(llvm::errc::io_error);
1181   }
1182 
1183   uint64_t Address = *AddrRes;
1184   if (!BC->HasFixedLoadAddress)
1185     adjustAddress(Address, MMapInfoIter->second);
1186 
1187   return PerfMemSample{PCRes.get(), Address};
1188 }
1189 
1190 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1191   auto parseOffset = [this]() -> ErrorOr<Location> {
1192     ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1193     if (std::error_code EC = Res.getError())
1194       return EC;
1195     return Location(Res.get());
1196   };
1197 
1198   size_t Sep = ParsingBuf.find_first_of(" \n");
1199   if (Sep == StringRef::npos)
1200     return parseOffset();
1201   StringRef LookAhead = ParsingBuf.substr(0, Sep);
1202   if (LookAhead.find_first_of(":") == StringRef::npos)
1203     return parseOffset();
1204 
1205   ErrorOr<StringRef> BuildID = parseString(':');
1206   if (std::error_code EC = BuildID.getError())
1207     return EC;
1208   ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1209   if (std::error_code EC = Offset.getError())
1210     return EC;
1211   return Location(true, BuildID.get(), Offset.get());
1212 }
1213 
1214 ErrorOr<DataAggregator::AggregatedLBREntry>
1215 DataAggregator::parseAggregatedLBREntry() {
1216   while (checkAndConsumeFS()) {
1217   }
1218 
1219   ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1220   if (std::error_code EC = TypeOrErr.getError())
1221     return EC;
1222   auto Type = AggregatedLBREntry::BRANCH;
1223   if (TypeOrErr.get() == "B") {
1224     Type = AggregatedLBREntry::BRANCH;
1225   } else if (TypeOrErr.get() == "F") {
1226     Type = AggregatedLBREntry::FT;
1227   } else if (TypeOrErr.get() == "f") {
1228     Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1229   } else {
1230     reportError("expected B, F or f");
1231     return make_error_code(llvm::errc::io_error);
1232   }
1233 
1234   while (checkAndConsumeFS()) {
1235   }
1236   ErrorOr<Location> From = parseLocationOrOffset();
1237   if (std::error_code EC = From.getError())
1238     return EC;
1239 
1240   while (checkAndConsumeFS()) {
1241   }
1242   ErrorOr<Location> To = parseLocationOrOffset();
1243   if (std::error_code EC = To.getError())
1244     return EC;
1245 
1246   while (checkAndConsumeFS()) {
1247   }
1248   ErrorOr<int64_t> Frequency =
1249       parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1250   if (std::error_code EC = Frequency.getError())
1251     return EC;
1252 
1253   uint64_t Mispreds = 0;
1254   if (Type == AggregatedLBREntry::BRANCH) {
1255     while (checkAndConsumeFS()) {
1256     }
1257     ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1258     if (std::error_code EC = MispredsOrErr.getError())
1259       return EC;
1260     Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1261   }
1262 
1263   if (!checkAndConsumeNewLine()) {
1264     reportError("expected end of line");
1265     return make_error_code(llvm::errc::io_error);
1266   }
1267 
1268   return AggregatedLBREntry{From.get(), To.get(),
1269                             static_cast<uint64_t>(Frequency.get()), Mispreds,
1270                             Type};
1271 }
1272 
1273 bool DataAggregator::hasData() {
1274   if (ParsingBuf.size() == 0)
1275     return false;
1276 
1277   return true;
1278 }
1279 
1280 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1281   return opts::IgnoreInterruptLBR &&
1282          (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1283 }
1284 
1285 std::error_code DataAggregator::printLBRHeatMap() {
1286   outs() << "PERF2BOLT: parse branch events...\n";
1287   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1288                      TimerGroupDesc, opts::TimeAggregator);
1289 
1290   if (opts::LinuxKernelMode) {
1291     opts::HeatmapMaxAddress = 0xffffffffffffffff;
1292     opts::HeatmapMinAddress = KernelBaseAddr;
1293   }
1294   Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1295              opts::HeatmapMaxAddress);
1296   uint64_t NumTotalSamples = 0;
1297 
1298   if (opts::BasicAggregation) {
1299     while (hasData()) {
1300       ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1301       if (std::error_code EC = SampleRes.getError()) {
1302         if (EC == errc::no_such_process)
1303           continue;
1304         return EC;
1305       }
1306       PerfBasicSample &Sample = SampleRes.get();
1307       HM.registerAddress(Sample.PC);
1308       NumTotalSamples++;
1309     }
1310     outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
1311   } else {
1312     while (hasData()) {
1313       ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1314       if (std::error_code EC = SampleRes.getError()) {
1315         if (EC == errc::no_such_process)
1316           continue;
1317         return EC;
1318       }
1319 
1320       PerfBranchSample &Sample = SampleRes.get();
1321 
1322       // LBRs are stored in reverse execution order. NextLBR refers to the next
1323       // executed branch record.
1324       const LBREntry *NextLBR = nullptr;
1325       for (const LBREntry &LBR : Sample.LBR) {
1326         if (NextLBR) {
1327           // Record fall-through trace.
1328           const uint64_t TraceFrom = LBR.To;
1329           const uint64_t TraceTo = NextLBR->From;
1330           ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1331         }
1332         NextLBR = &LBR;
1333       }
1334       if (!Sample.LBR.empty()) {
1335         HM.registerAddress(Sample.LBR.front().To);
1336         HM.registerAddress(Sample.LBR.back().From);
1337       }
1338       NumTotalSamples += Sample.LBR.size();
1339     }
1340     outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1341     outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1342   }
1343 
1344   if (!NumTotalSamples) {
1345     if (opts::BasicAggregation) {
1346       errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1347                 "Cannot build heatmap.";
1348     } else {
1349       errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1350                 "Cannot build heatmap. Use -nl for building heatmap from "
1351                 "basic events.\n";
1352     }
1353     exit(1);
1354   }
1355 
1356   outs() << "HEATMAP: building heat map...\n";
1357 
1358   for (const auto &LBR : FallthroughLBRs) {
1359     const Trace &Trace = LBR.first;
1360     const FTInfo &Info = LBR.second;
1361     HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1362   }
1363 
1364   if (HM.getNumInvalidRanges())
1365     outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1366 
1367   if (!HM.size()) {
1368     errs() << "HEATMAP-ERROR: no valid traces registered\n";
1369     exit(1);
1370   }
1371 
1372   HM.print(opts::OutputFilename);
1373   if (opts::OutputFilename == "-")
1374     HM.printCDF(opts::OutputFilename);
1375   else
1376     HM.printCDF(opts::OutputFilename + ".csv");
1377 
1378   return std::error_code();
1379 }
1380 
1381 std::error_code DataAggregator::parseBranchEvents() {
1382   outs() << "PERF2BOLT: parse branch events...\n";
1383   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1384                      TimerGroupDesc, opts::TimeAggregator);
1385 
1386   uint64_t NumTotalSamples = 0;
1387   uint64_t NumEntries = 0;
1388   uint64_t NumSamples = 0;
1389   uint64_t NumSamplesNoLBR = 0;
1390   uint64_t NumTraces = 0;
1391   bool NeedsSkylakeFix = false;
1392 
1393   while (hasData() && NumTotalSamples < opts::MaxSamples) {
1394     ++NumTotalSamples;
1395 
1396     ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1397     if (std::error_code EC = SampleRes.getError()) {
1398       if (EC == errc::no_such_process)
1399         continue;
1400       return EC;
1401     }
1402     ++NumSamples;
1403 
1404     PerfBranchSample &Sample = SampleRes.get();
1405     if (opts::WriteAutoFDOData)
1406       ++BasicSamples[Sample.PC];
1407 
1408     if (Sample.LBR.empty()) {
1409       ++NumSamplesNoLBR;
1410       continue;
1411     }
1412 
1413     NumEntries += Sample.LBR.size();
1414     if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1415       errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1416       NeedsSkylakeFix = true;
1417     }
1418 
1419     // LBRs are stored in reverse execution order. NextPC refers to the next
1420     // recorded executed PC.
1421     uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1422     uint32_t NumEntry = 0;
1423     for (const LBREntry &LBR : Sample.LBR) {
1424       ++NumEntry;
1425       // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1426       // sometimes record entry 32 as an exact copy of entry 31. This will cause
1427       // us to likely record an invalid trace and generate a stale function for
1428       // BAT mode (non BAT disassembles the function and is able to ignore this
1429       // trace at aggregation time). Drop first 2 entries (last two, in
1430       // chronological order)
1431       if (NeedsSkylakeFix && NumEntry <= 2)
1432         continue;
1433       if (NextPC) {
1434         // Record fall-through trace.
1435         const uint64_t TraceFrom = LBR.To;
1436         const uint64_t TraceTo = NextPC;
1437         const BinaryFunction *TraceBF =
1438             getBinaryFunctionContainingAddress(TraceFrom);
1439         if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1440           FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1441           if (TraceBF->containsAddress(LBR.From))
1442             ++Info.InternCount;
1443           else
1444             ++Info.ExternCount;
1445         } else {
1446           if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) {
1447             LLVM_DEBUG(dbgs()
1448                        << "Invalid trace starting in "
1449                        << TraceBF->getPrintName() << " @ "
1450                        << Twine::utohexstr(TraceFrom - TraceBF->getAddress())
1451                        << " and ending @ " << Twine::utohexstr(TraceTo)
1452                        << '\n');
1453             ++NumInvalidTraces;
1454           } else {
1455             LLVM_DEBUG(dbgs()
1456                        << "Out of range trace starting in "
1457                        << (TraceBF ? TraceBF->getPrintName() : "None") << " @ "
1458                        << Twine::utohexstr(
1459                               TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1460                        << " and ending in "
1461                        << (getBinaryFunctionContainingAddress(TraceTo)
1462                                ? getBinaryFunctionContainingAddress(TraceTo)
1463                                      ->getPrintName()
1464                                : "None")
1465                        << " @ "
1466                        << Twine::utohexstr(
1467                               TraceTo -
1468                               (getBinaryFunctionContainingAddress(TraceTo)
1469                                    ? getBinaryFunctionContainingAddress(TraceTo)
1470                                          ->getAddress()
1471                                    : 0))
1472                        << '\n');
1473             ++NumLongRangeTraces;
1474           }
1475         }
1476         ++NumTraces;
1477       }
1478       NextPC = LBR.From;
1479 
1480       uint64_t From = LBR.From;
1481       if (!getBinaryFunctionContainingAddress(From))
1482         From = 0;
1483       uint64_t To = LBR.To;
1484       if (!getBinaryFunctionContainingAddress(To))
1485         To = 0;
1486       if (!From && !To)
1487         continue;
1488       BranchInfo &Info = BranchLBRs[Trace(From, To)];
1489       ++Info.TakenCount;
1490       Info.MispredCount += LBR.Mispred;
1491     }
1492   }
1493 
1494   for (const auto &LBR : BranchLBRs) {
1495     const Trace &Trace = LBR.first;
1496     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From))
1497       BF->setHasProfileAvailable();
1498     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To))
1499       BF->setHasProfileAvailable();
1500   }
1501 
1502   auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1503     OS << " (";
1504     if (OS.has_colors()) {
1505       if (Percent > T2)
1506         OS.changeColor(raw_ostream::RED);
1507       else if (Percent > T1)
1508         OS.changeColor(raw_ostream::YELLOW);
1509       else
1510         OS.changeColor(raw_ostream::GREEN);
1511     }
1512     OS << format("%.1f%%", Percent);
1513     if (OS.has_colors())
1514       OS.resetColor();
1515     OS << ")";
1516   };
1517 
1518   outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1519          << " LBR entries\n";
1520   if (NumTotalSamples) {
1521     if (NumSamples && NumSamplesNoLBR == NumSamples) {
1522       // Note: we don't know if perf2bolt is being used to parse memory samples
1523       // at this point. In this case, it is OK to parse zero LBRs.
1524       errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1525                 "LBR. Record profile with perf record -j any or run perf2bolt "
1526                 "in no-LBR mode with -nl (the performance improvement in -nl "
1527                 "mode may be limited)\n";
1528     } else {
1529       const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1530       const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1531       outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1532       printColored(outs(), PercentIgnored, 20, 50);
1533       outs() << " were ignored\n";
1534       if (PercentIgnored > 50.0f)
1535         errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1536                   "were attributed to the input binary\n";
1537     }
1538   }
1539   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1540          << NumInvalidTraces;
1541   float Perc = 0.0f;
1542   if (NumTraces > 0) {
1543     Perc = NumInvalidTraces * 100.0f / NumTraces;
1544     printColored(outs(), Perc, 5, 10);
1545   }
1546   outs() << "\n";
1547   if (Perc > 10.0f)
1548     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1549               "binary is probably not the same binary used during profiling "
1550               "collection. The generated data may be ineffective for improving "
1551               "performance.\n\n";
1552 
1553   outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1554          << NumLongRangeTraces;
1555   if (NumTraces > 0)
1556     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1557   outs() << "\n";
1558 
1559   if (NumColdSamples > 0) {
1560     const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1561     outs() << "PERF2BOLT: " << NumColdSamples
1562            << format(" (%.1f%%)", ColdSamples)
1563            << " samples recorded in cold regions of split functions.\n";
1564     if (ColdSamples > 5.0f)
1565       outs()
1566           << "WARNING: The BOLT-processed binary where samples were collected "
1567              "likely used bad data or your service observed a large shift in "
1568              "profile. You may want to audit this.\n";
1569   }
1570 
1571   return std::error_code();
1572 }
1573 
1574 void DataAggregator::processBranchEvents() {
1575   outs() << "PERF2BOLT: processing branch events...\n";
1576   NamedRegionTimer T("processBranch", "Processing branch events",
1577                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1578 
1579   for (const auto &AggrLBR : FallthroughLBRs) {
1580     const Trace &Loc = AggrLBR.first;
1581     const FTInfo &Info = AggrLBR.second;
1582     LBREntry First{Loc.From, Loc.From, false};
1583     LBREntry Second{Loc.To, Loc.To, false};
1584     if (Info.InternCount)
1585       doTrace(First, Second, Info.InternCount);
1586     if (Info.ExternCount) {
1587       First.From = 0;
1588       doTrace(First, Second, Info.ExternCount);
1589     }
1590   }
1591 
1592   for (const auto &AggrLBR : BranchLBRs) {
1593     const Trace &Loc = AggrLBR.first;
1594     const BranchInfo &Info = AggrLBR.second;
1595     doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
1596   }
1597 }
1598 
1599 std::error_code DataAggregator::parseBasicEvents() {
1600   outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1601   NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1602                      TimerGroupDesc, opts::TimeAggregator);
1603   while (hasData()) {
1604     ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1605     if (std::error_code EC = Sample.getError())
1606       return EC;
1607 
1608     if (!Sample->PC)
1609       continue;
1610 
1611     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1612       BF->setHasProfileAvailable();
1613 
1614     ++BasicSamples[Sample->PC];
1615     EventNames.insert(Sample->EventName);
1616   }
1617 
1618   return std::error_code();
1619 }
1620 
1621 void DataAggregator::processBasicEvents() {
1622   outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1623   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1624                      TimerGroupDesc, opts::TimeAggregator);
1625   uint64_t OutOfRangeSamples = 0;
1626   uint64_t NumSamples = 0;
1627   for (auto &Sample : BasicSamples) {
1628     const uint64_t PC = Sample.first;
1629     const uint64_t HitCount = Sample.second;
1630     NumSamples += HitCount;
1631     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1632     if (!Func) {
1633       OutOfRangeSamples += HitCount;
1634       continue;
1635     }
1636 
1637     doSample(*Func, PC, HitCount);
1638   }
1639   outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1640 
1641   outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1642          << OutOfRangeSamples;
1643   float Perc = 0.0f;
1644   if (NumSamples > 0) {
1645     outs() << " (";
1646     Perc = OutOfRangeSamples * 100.0f / NumSamples;
1647     if (outs().has_colors()) {
1648       if (Perc > 60.0f)
1649         outs().changeColor(raw_ostream::RED);
1650       else if (Perc > 40.0f)
1651         outs().changeColor(raw_ostream::YELLOW);
1652       else
1653         outs().changeColor(raw_ostream::GREEN);
1654     }
1655     outs() << format("%.1f%%", Perc);
1656     if (outs().has_colors())
1657       outs().resetColor();
1658     outs() << ")";
1659   }
1660   outs() << "\n";
1661   if (Perc > 80.0f)
1662     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1663               "binary is probably not the same binary used during profiling "
1664               "collection. The generated data may be ineffective for improving "
1665               "performance.\n\n";
1666 }
1667 
1668 std::error_code DataAggregator::parseMemEvents() {
1669   outs() << "PERF2BOLT: parsing memory events...\n";
1670   NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1671                      TimerGroupDesc, opts::TimeAggregator);
1672   while (hasData()) {
1673     ErrorOr<PerfMemSample> Sample = parseMemSample();
1674     if (std::error_code EC = Sample.getError())
1675       return EC;
1676 
1677     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1678       BF->setHasProfileAvailable();
1679 
1680     MemSamples.emplace_back(std::move(Sample.get()));
1681   }
1682 
1683   return std::error_code();
1684 }
1685 
1686 void DataAggregator::processMemEvents() {
1687   NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1688                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1689   for (const PerfMemSample &Sample : MemSamples) {
1690     uint64_t PC = Sample.PC;
1691     uint64_t Addr = Sample.Addr;
1692     StringRef FuncName;
1693     StringRef MemName;
1694 
1695     // Try to resolve symbol for PC
1696     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1697     if (!Func) {
1698       LLVM_DEBUG(if (PC != 0) {
1699         dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x"
1700                << Twine::utohexstr(Addr) << "\n";
1701       });
1702       continue;
1703     }
1704 
1705     FuncName = Func->getOneName();
1706     PC -= Func->getAddress();
1707 
1708     // Try to resolve symbol for memory load
1709     if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1710       MemName = BD->getName();
1711       Addr -= BD->getAddress();
1712     } else if (opts::FilterMemProfile) {
1713       // Filter out heap/stack accesses
1714       continue;
1715     }
1716 
1717     const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1718     const Location AddrLoc(!MemName.empty(), MemName, Addr);
1719 
1720     FuncMemData *MemData = &NamesToMemEvents[FuncName];
1721     setMemData(*Func, MemData);
1722     MemData->update(FuncLoc, AddrLoc);
1723     LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1724   }
1725 }
1726 
1727 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1728   outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1729   NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1730                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1731   while (hasData()) {
1732     ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1733     if (std::error_code EC = AggrEntry.getError())
1734       return EC;
1735 
1736     if (BinaryFunction *BF =
1737             getBinaryFunctionContainingAddress(AggrEntry->From.Offset))
1738       BF->setHasProfileAvailable();
1739     if (BinaryFunction *BF =
1740             getBinaryFunctionContainingAddress(AggrEntry->To.Offset))
1741       BF->setHasProfileAvailable();
1742 
1743     AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1744   }
1745 
1746   return std::error_code();
1747 }
1748 
1749 void DataAggregator::processPreAggregated() {
1750   outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1751   NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1752                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1753 
1754   uint64_t NumTraces = 0;
1755   for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1756     switch (AggrEntry.EntryType) {
1757     case AggregatedLBREntry::BRANCH:
1758       doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1759                AggrEntry.Mispreds);
1760       break;
1761     case AggregatedLBREntry::FT:
1762     case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1763       LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1764                          ? AggrEntry.From.Offset
1765                          : 0,
1766                      AggrEntry.From.Offset, false};
1767       LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1768       doTrace(First, Second, AggrEntry.Count);
1769       NumTraces += AggrEntry.Count;
1770       break;
1771     }
1772     }
1773   }
1774 
1775   outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1776          << " aggregated LBR entries\n";
1777   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1778          << NumInvalidTraces;
1779   float Perc = 0.0f;
1780   if (NumTraces > 0) {
1781     outs() << " (";
1782     Perc = NumInvalidTraces * 100.0f / NumTraces;
1783     if (outs().has_colors()) {
1784       if (Perc > 10.0f)
1785         outs().changeColor(raw_ostream::RED);
1786       else if (Perc > 5.0f)
1787         outs().changeColor(raw_ostream::YELLOW);
1788       else
1789         outs().changeColor(raw_ostream::GREEN);
1790     }
1791     outs() << format("%.1f%%", Perc);
1792     if (outs().has_colors())
1793       outs().resetColor();
1794     outs() << ")";
1795   }
1796   outs() << "\n";
1797   if (Perc > 10.0f)
1798     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1799               "binary is probably not the same binary used during profiling "
1800               "collection. The generated data may be ineffective for improving "
1801               "performance.\n\n";
1802 
1803   outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1804          << NumLongRangeTraces;
1805   if (NumTraces > 0)
1806     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1807   outs() << "\n";
1808 }
1809 
1810 Optional<int32_t> DataAggregator::parseCommExecEvent() {
1811   size_t LineEnd = ParsingBuf.find_first_of("\n");
1812   if (LineEnd == StringRef::npos) {
1813     reportError("expected rest of line");
1814     Diag << "Found: " << ParsingBuf << "\n";
1815     return NoneType();
1816   }
1817   StringRef Line = ParsingBuf.substr(0, LineEnd);
1818 
1819   size_t Pos = Line.find("PERF_RECORD_COMM exec");
1820   if (Pos == StringRef::npos)
1821     return NoneType();
1822   Line = Line.drop_front(Pos);
1823 
1824   // Line:
1825   //  PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1826   StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1827   int32_t PID;
1828   if (PIDStr.getAsInteger(10, PID)) {
1829     reportError("expected PID");
1830     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1831     return NoneType();
1832   }
1833 
1834   return PID;
1835 }
1836 
1837 namespace {
1838 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1839   const StringRef SecTimeStr = TimeStr.split('.').first;
1840   const StringRef USecTimeStr = TimeStr.split('.').second;
1841   uint64_t SecTime;
1842   uint64_t USecTime;
1843   if (SecTimeStr.getAsInteger(10, SecTime) ||
1844       USecTimeStr.getAsInteger(10, USecTime))
1845     return NoneType();
1846   return SecTime * 1000000ULL + USecTime;
1847 }
1848 }
1849 
1850 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1851   while (checkAndConsumeFS()) {
1852   }
1853 
1854   size_t LineEnd = ParsingBuf.find_first_of("\n");
1855   if (LineEnd == StringRef::npos) {
1856     reportError("expected rest of line");
1857     Diag << "Found: " << ParsingBuf << "\n";
1858     return NoneType();
1859   }
1860   StringRef Line = ParsingBuf.substr(0, LineEnd);
1861 
1862   size_t Pos = Line.find("PERF_RECORD_FORK");
1863   if (Pos == StringRef::npos) {
1864     consumeRestOfLine();
1865     return NoneType();
1866   }
1867 
1868   ForkInfo FI;
1869 
1870   const StringRef TimeStr =
1871       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1872   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1873     FI.Time = *TimeRes;
1874   }
1875 
1876   Line = Line.drop_front(Pos);
1877 
1878   // Line:
1879   //  PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1880   const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1881   if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1882     reportError("expected PID");
1883     Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1884     return NoneType();
1885   }
1886 
1887   const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1888   if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1889     reportError("expected PID");
1890     Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1891     return NoneType();
1892   }
1893 
1894   consumeRestOfLine();
1895 
1896   return FI;
1897 }
1898 
1899 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1900 DataAggregator::parseMMapEvent() {
1901   while (checkAndConsumeFS()) {
1902   }
1903 
1904   MMapInfo ParsedInfo;
1905 
1906   size_t LineEnd = ParsingBuf.find_first_of("\n");
1907   if (LineEnd == StringRef::npos) {
1908     reportError("expected rest of line");
1909     Diag << "Found: " << ParsingBuf << "\n";
1910     return make_error_code(llvm::errc::io_error);
1911   }
1912   StringRef Line = ParsingBuf.substr(0, LineEnd);
1913 
1914   size_t Pos = Line.find("PERF_RECORD_MMAP2");
1915   if (Pos == StringRef::npos) {
1916     consumeRestOfLine();
1917     return std::make_pair(StringRef(), ParsedInfo);
1918   }
1919 
1920   // Line:
1921   //   {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1922 
1923   const StringRef TimeStr =
1924       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1925   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1926     ParsedInfo.Time = *TimeRes;
1927 
1928   Line = Line.drop_front(Pos);
1929 
1930   // Line:
1931   //   PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1932 
1933   StringRef FileName = Line.rsplit(FieldSeparator).second;
1934   if (FileName.startswith("//") || FileName.startswith("[")) {
1935     consumeRestOfLine();
1936     return std::make_pair(StringRef(), ParsedInfo);
1937   }
1938   FileName = sys::path::filename(FileName);
1939 
1940   const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
1941   if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
1942     reportError("expected PID");
1943     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1944     return make_error_code(llvm::errc::io_error);
1945   }
1946 
1947   const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
1948   if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) {
1949     reportError("expected base address");
1950     Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1951     return make_error_code(llvm::errc::io_error);
1952   }
1953 
1954   const StringRef SizeStr = Line.split('(').second.split(')').first;
1955   if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
1956     reportError("expected mmaped size");
1957     Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1958     return make_error_code(llvm::errc::io_error);
1959   }
1960 
1961   const StringRef OffsetStr =
1962       Line.split('@').second.ltrim().split(FieldSeparator).first;
1963   if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
1964     reportError("expected mmaped page-aligned offset");
1965     Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1966     return make_error_code(llvm::errc::io_error);
1967   }
1968 
1969   consumeRestOfLine();
1970 
1971   return std::make_pair(FileName, ParsedInfo);
1972 }
1973 
1974 std::error_code DataAggregator::parseMMapEvents() {
1975   outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1976   NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1977                      TimerGroupDesc, opts::TimeAggregator);
1978 
1979   std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1980   while (hasData()) {
1981     ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1982     if (std::error_code EC = FileMMapInfoRes.getError())
1983       return EC;
1984 
1985     std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1986     if (FileMMapInfo.second.PID == -1)
1987       continue;
1988 
1989     // Consider only the first mapping of the file for any given PID
1990     bool PIDExists = false;
1991     auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
1992     for (auto MI = Range.first; MI != Range.second; ++MI) {
1993       if (MI->second.PID == FileMMapInfo.second.PID) {
1994         PIDExists = true;
1995         break;
1996       }
1997     }
1998     if (PIDExists)
1999       continue;
2000 
2001     GlobalMMapInfo.insert(FileMMapInfo);
2002   }
2003 
2004   LLVM_DEBUG({
2005     dbgs() << "FileName -> mmap info:\n";
2006     for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo)
2007       dbgs() << "  " << Pair.first << " : " << Pair.second.PID << " [0x"
2008              << Twine::utohexstr(Pair.second.MMapAddress) << ", "
2009              << Twine::utohexstr(Pair.second.Size) << " @ "
2010              << Twine::utohexstr(Pair.second.Offset) << "]\n";
2011   });
2012 
2013   StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
2014   if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
2015     errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
2016            << "\" for profile matching\n";
2017     NameToUse = BuildIDBinaryName;
2018   }
2019 
2020   auto Range = GlobalMMapInfo.equal_range(NameToUse);
2021   for (auto I = Range.first; I != Range.second; ++I) {
2022     MMapInfo &MMapInfo = I->second;
2023     if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
2024       // Check that the binary mapping matches one of the segments.
2025       bool MatchFound = false;
2026       for (auto &KV : BC->SegmentMapInfo) {
2027         SegmentInfo &SegInfo = KV.second;
2028         // The mapping is page-aligned and hence the MMapAddress could be
2029         // different from the segment start address. We cannot know the page
2030         // size of the mapping, but we know it should not exceed the segment
2031         // alignment value. Hence we are performing an approximate check.
2032         if (SegInfo.Address >= MMapInfo.MMapAddress &&
2033             SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) {
2034           MatchFound = true;
2035           break;
2036         }
2037       }
2038       if (!MatchFound) {
2039         errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2040                << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n';
2041         continue;
2042       }
2043     }
2044 
2045     // Set base address for shared objects.
2046     if (!BC->HasFixedLoadAddress) {
2047       Optional<uint64_t> BaseAddress =
2048           BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset);
2049       if (!BaseAddress) {
2050         errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2051                   "binary when memory mapped at 0x"
2052                << Twine::utohexstr(MMapInfo.MMapAddress)
2053                << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
2054                << ". Ignoring profile data for this mapping\n";
2055         continue;
2056       } else {
2057         MMapInfo.BaseAddress = *BaseAddress;
2058       }
2059     }
2060 
2061     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2062   }
2063 
2064   if (BinaryMMapInfo.empty()) {
2065     if (errs().has_colors())
2066       errs().changeColor(raw_ostream::RED);
2067     errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2068            << BC->getFilename() << "\".";
2069     if (!GlobalMMapInfo.empty()) {
2070       errs() << " Profile for the following binary name(s) is available:\n";
2071       for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2072            I = GlobalMMapInfo.upper_bound(I->first))
2073         errs() << "  " << I->first << '\n';
2074       errs() << "Please rename the input binary.\n";
2075     } else {
2076       errs() << " Failed to extract any binary name from a profile.\n";
2077     }
2078     if (errs().has_colors())
2079       errs().resetColor();
2080 
2081     exit(1);
2082   }
2083 
2084   return std::error_code();
2085 }
2086 
2087 std::error_code DataAggregator::parseTaskEvents() {
2088   outs() << "PERF2BOLT: parsing perf-script task events output\n";
2089   NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2090                      TimerGroupDesc, opts::TimeAggregator);
2091 
2092   while (hasData()) {
2093     if (Optional<int32_t> CommInfo = parseCommExecEvent()) {
2094       // Remove forked child that ran execve
2095       auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2096       if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2097         BinaryMMapInfo.erase(MMapInfoIter);
2098       consumeRestOfLine();
2099       continue;
2100     }
2101 
2102     Optional<ForkInfo> ForkInfo = parseForkEvent();
2103     if (!ForkInfo)
2104       continue;
2105 
2106     if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2107       continue;
2108 
2109     if (ForkInfo->Time == 0) {
2110       // Process was forked and mmaped before perf ran. In this case the child
2111       // should have its own mmap entry unless it was execve'd.
2112       continue;
2113     }
2114 
2115     auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2116     if (MMapInfoIter == BinaryMMapInfo.end())
2117       continue;
2118 
2119     MMapInfo MMapInfo = MMapInfoIter->second;
2120     MMapInfo.PID = ForkInfo->ChildPID;
2121     MMapInfo.Forked = true;
2122     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2123   }
2124 
2125   outs() << "PERF2BOLT: input binary is associated with "
2126          << BinaryMMapInfo.size() << " PID(s)\n";
2127 
2128   LLVM_DEBUG({
2129     for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
2130       outs() << "  " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "")
2131              << ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x"
2132              << Twine::utohexstr(MMI.second.Size) << ")\n";
2133   });
2134 
2135   return std::error_code();
2136 }
2137 
2138 Optional<std::pair<StringRef, StringRef>>
2139 DataAggregator::parseNameBuildIDPair() {
2140   while (checkAndConsumeFS()) {
2141   }
2142 
2143   ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2144   if (std::error_code EC = BuildIDStr.getError())
2145     return NoneType();
2146 
2147   ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2148   if (std::error_code EC = NameStr.getError())
2149     return NoneType();
2150 
2151   consumeRestOfLine();
2152   return std::make_pair(NameStr.get(), BuildIDStr.get());
2153 }
2154 
2155 Optional<StringRef>
2156 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2157   while (hasData()) {
2158     Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair();
2159     if (!IDPair)
2160       return NoneType();
2161 
2162     if (IDPair->second.startswith(FileBuildID))
2163       return sys::path::filename(IDPair->first);
2164   }
2165   return NoneType();
2166 }
2167 
2168 std::error_code
2169 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2170   std::error_code EC;
2171   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2172   if (EC)
2173     return EC;
2174 
2175   bool WriteMemLocs = false;
2176 
2177   auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2178     if (WriteMemLocs)
2179       OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2180     else
2181       OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2182     OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2183             << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2184   };
2185 
2186   uint64_t BranchValues = 0;
2187   uint64_t MemValues = 0;
2188 
2189   if (BAT)
2190     OutFile << "boltedcollection\n";
2191   if (opts::BasicAggregation) {
2192     OutFile << "no_lbr";
2193     for (const StringMapEntry<NoneType> &Entry : EventNames)
2194       OutFile << " " << Entry.getKey();
2195     OutFile << "\n";
2196 
2197     for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
2198       for (const SampleInfo &SI : Func.getValue().Data) {
2199         writeLocation(SI.Loc);
2200         OutFile << SI.Hits << "\n";
2201         ++BranchValues;
2202       }
2203     }
2204   } else {
2205     for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
2206       for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) {
2207         writeLocation(BI.From);
2208         writeLocation(BI.To);
2209         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2210         ++BranchValues;
2211       }
2212       for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) {
2213         // Do not output if source is a known symbol, since this was already
2214         // accounted for in the source function
2215         if (BI.From.IsSymbol)
2216           continue;
2217         writeLocation(BI.From);
2218         writeLocation(BI.To);
2219         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2220         ++BranchValues;
2221       }
2222     }
2223 
2224     WriteMemLocs = true;
2225     for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
2226       for (const MemInfo &MemEvent : Func.getValue().Data) {
2227         writeLocation(MemEvent.Offset);
2228         writeLocation(MemEvent.Addr);
2229         OutFile << MemEvent.Count << "\n";
2230         ++MemValues;
2231       }
2232     }
2233   }
2234 
2235   outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2236          << " memory objects to " << OutputFilename << "\n";
2237 
2238   return std::error_code();
2239 }
2240 
2241 void DataAggregator::dump() const { DataReader::dump(); }
2242 
2243 void DataAggregator::dump(const LBREntry &LBR) const {
2244   Diag << "From: " << Twine::utohexstr(LBR.From)
2245        << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2246        << "\n";
2247 }
2248 
2249 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2250   Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2251   for (const LBREntry &LBR : Sample.LBR)
2252     dump(LBR);
2253 }
2254 
2255 void DataAggregator::dump(const PerfMemSample &Sample) const {
2256   Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
2257 }
2258