1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Profile/BoltAddressTranslation.h"
18 #include "bolt/Profile/Heatmap.h"
19 #include "bolt/Utils/CommandLineOpts.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/ScopeExit.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/Process.h"
27 #include "llvm/Support/Program.h"
28 #include "llvm/Support/Regex.h"
29 #include "llvm/Support/Timer.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <map>
32 #include <unordered_map>
33 #include <utility>
34 
35 #define DEBUG_TYPE "aggregator"
36 
37 using namespace llvm;
38 using namespace bolt;
39 
40 namespace opts {
41 
42 static cl::opt<bool>
43 BasicAggregation("nl",
44   cl::desc("aggregate basic samples (without LBR info)"),
45   cl::init(false),
46   cl::ZeroOrMore,
47   cl::cat(AggregatorCategory));
48 
49 static cl::opt<bool>
50 FilterMemProfile("filter-mem-profile",
51   cl::desc("if processing a memory profile, filter out stack or heap accesses "
52            "that won't be useful for BOLT to reduce profile file size"),
53   cl::init(true),
54   cl::cat(AggregatorCategory));
55 
56 static cl::opt<unsigned long long>
57 FilterPID("pid",
58   cl::desc("only use samples from process with specified PID"),
59   cl::init(0),
60   cl::Optional,
61   cl::cat(AggregatorCategory));
62 
63 static cl::opt<bool>
64 IgnoreBuildID("ignore-build-id",
65   cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
66   cl::init(false),
67   cl::cat(AggregatorCategory));
68 
69 static cl::opt<bool>
70 IgnoreInterruptLBR("ignore-interrupt-lbr",
71   cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
72   cl::init(true),
73   cl::ZeroOrMore,
74   cl::cat(AggregatorCategory));
75 
76 static cl::opt<unsigned long long>
77 MaxSamples("max-samples",
78   cl::init(-1ULL),
79   cl::desc("maximum number of samples to read from LBR profile"),
80   cl::Optional,
81   cl::Hidden,
82   cl::cat(AggregatorCategory));
83 
84 static cl::opt<bool>
85 ReadPreAggregated("pa",
86   cl::desc("skip perf and read data from a pre-aggregated file format"),
87   cl::init(false),
88   cl::ZeroOrMore,
89   cl::cat(AggregatorCategory));
90 
91 static cl::opt<bool>
92 TimeAggregator("time-aggr",
93   cl::desc("time BOLT aggregator"),
94   cl::init(false),
95   cl::ZeroOrMore,
96   cl::cat(AggregatorCategory));
97 
98 static cl::opt<bool>
99 UseEventPC("use-event-pc",
100   cl::desc("use event PC in combination with LBR sampling"),
101   cl::init(false),
102   cl::ZeroOrMore,
103   cl::cat(AggregatorCategory));
104 
105 static cl::opt<bool>
106 WriteAutoFDOData("autofdo",
107   cl::desc("generate autofdo textual data instead of bolt data"),
108   cl::init(false),
109   cl::ZeroOrMore,
110   cl::cat(AggregatorCategory));
111 
112 } // namespace opts
113 
114 namespace {
115 
116 const char TimerGroupName[] = "aggregator";
117 const char TimerGroupDesc[] = "Aggregator";
118 
119 }
120 
121 constexpr uint64_t DataAggregator::KernelBaseAddr;
122 
123 DataAggregator::~DataAggregator() { deleteTempFiles(); }
124 
125 namespace {
126 void deleteTempFile(const std::string &FileName) {
127   if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
128     errs() << "PERF2BOLT: failed to delete temporary file " << FileName
129            << " with error " << Errc.message() << "\n";
130 }
131 }
132 
133 void DataAggregator::deleteTempFiles() {
134   for (std::string &FileName : TempFiles)
135     deleteTempFile(FileName);
136   TempFiles.clear();
137 }
138 
139 void DataAggregator::findPerfExecutable() {
140   Optional<std::string> PerfExecutable =
141       sys::Process::FindInEnvPath("PATH", "perf");
142   if (!PerfExecutable) {
143     outs() << "PERF2BOLT: No perf executable found!\n";
144     exit(1);
145   }
146   PerfPath = *PerfExecutable;
147 }
148 
149 void DataAggregator::start() {
150   outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
151 
152   // Don't launch perf for pre-aggregated files
153   if (opts::ReadPreAggregated)
154     return;
155 
156   findPerfExecutable();
157 
158   if (opts::BasicAggregation)
159     launchPerfProcess("events without LBR",
160                       MainEventsPPI,
161                       "script -F pid,event,ip",
162                       /*Wait = */false);
163   else
164     launchPerfProcess("branch events",
165                       MainEventsPPI,
166                       "script -F pid,ip,brstack",
167                       /*Wait = */false);
168 
169   // Note: we launch script for mem events regardless of the option, as the
170   //       command fails fairly fast if mem events were not collected.
171   launchPerfProcess("mem events",
172                     MemEventsPPI,
173                     "script -F pid,event,addr,ip",
174                     /*Wait = */false);
175 
176   launchPerfProcess("process events",
177                     MMapEventsPPI,
178                     "script --show-mmap-events",
179                     /*Wait = */false);
180 
181   launchPerfProcess("task events",
182                     TaskEventsPPI,
183                     "script --show-task-events",
184                     /*Wait = */false);
185 }
186 
187 void DataAggregator::abort() {
188   if (opts::ReadPreAggregated)
189     return;
190 
191   std::string Error;
192 
193   // Kill subprocesses in case they are not finished
194   sys::Wait(TaskEventsPPI.PI, 1, false, &Error);
195   sys::Wait(MMapEventsPPI.PI, 1, false, &Error);
196   sys::Wait(MainEventsPPI.PI, 1, false, &Error);
197   sys::Wait(MemEventsPPI.PI, 1, false, &Error);
198 
199   deleteTempFiles();
200 
201   exit(1);
202 }
203 
204 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
205                                        const char *ArgsString, bool Wait) {
206   SmallVector<StringRef, 4> Argv;
207 
208   outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
209   Argv.push_back(PerfPath.data());
210 
211   char *WritableArgsString = strdup(ArgsString);
212   char *Str = WritableArgsString;
213   do {
214     Argv.push_back(Str);
215     while (*Str && *Str != ' ')
216       ++Str;
217     if (!*Str)
218       break;
219     *Str++ = 0;
220   } while (true);
221 
222   Argv.push_back("-f");
223   Argv.push_back("-i");
224   Argv.push_back(Filename.c_str());
225 
226   if (std::error_code Errc =
227           sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
228     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
229            << " with error " << Errc.message() << "\n";
230     exit(1);
231   }
232   TempFiles.push_back(PPI.StdoutPath.data());
233 
234   if (std::error_code Errc =
235           sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
236     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
237            << " with error " << Errc.message() << "\n";
238     exit(1);
239   }
240   TempFiles.push_back(PPI.StderrPath.data());
241 
242   Optional<StringRef> Redirects[] = {
243       llvm::None,                        // Stdin
244       StringRef(PPI.StdoutPath.data()),  // Stdout
245       StringRef(PPI.StderrPath.data())}; // Stderr
246 
247   LLVM_DEBUG({
248     dbgs() << "Launching perf: ";
249     for (StringRef Arg : Argv)
250       dbgs() << Arg << " ";
251     dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
252            << "\n";
253   });
254 
255   if (Wait)
256     PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
257                                             /*envp*/ llvm::None, Redirects);
258   else
259     PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None,
260                                 Redirects);
261 
262   free(WritableArgsString);
263 }
264 
265 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
266   PerfProcessInfo BuildIDProcessInfo;
267   launchPerfProcess("buildid list",
268                     BuildIDProcessInfo,
269                     "buildid-list",
270                     /*Wait = */true);
271 
272   if (BuildIDProcessInfo.PI.ReturnCode != 0) {
273     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
274         MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
275     StringRef ErrBuf = (*MB)->getBuffer();
276 
277     errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
278            << '\n';
279     errs() << ErrBuf;
280     return;
281   }
282 
283   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
284       MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
285   if (std::error_code EC = MB.getError()) {
286     errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
287            << EC.message() << "\n";
288     return;
289   }
290 
291   FileBuf = std::move(*MB);
292   ParsingBuf = FileBuf->getBuffer();
293   if (ParsingBuf.empty()) {
294     errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
295               "data was recorded without it\n";
296     return;
297   }
298 
299   Col = 0;
300   Line = 1;
301   Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
302   if (!FileName) {
303     errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
304               "This indicates the input binary supplied for data aggregation "
305               "is not the same recorded by perf when collecting profiling "
306               "data, or there were no samples recorded for the binary. "
307               "Use -ignore-build-id option to override.\n";
308     if (!opts::IgnoreBuildID)
309       abort();
310   } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
311     errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
312     BuildIDBinaryName = std::string(*FileName);
313   } else {
314     outs() << "PERF2BOLT: matched build-id and file name\n";
315   }
316 
317   return;
318 }
319 
320 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
321   if (opts::ReadPreAggregated)
322     return true;
323 
324   Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
325   if (!FD)
326     return false;
327 
328   char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
329 
330   auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
331   Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
332       *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0);
333   if (!BytesRead || *BytesRead != 7)
334     return false;
335 
336   if (strncmp(Buf, "PERFILE", 7) == 0)
337     return true;
338   return false;
339 }
340 
341 void DataAggregator::parsePreAggregated() {
342   std::string Error;
343 
344   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
345       MemoryBuffer::getFileOrSTDIN(Filename);
346   if (std::error_code EC = MB.getError()) {
347     errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
348            << EC.message() << "\n";
349     exit(1);
350   }
351 
352   FileBuf = std::move(*MB);
353   ParsingBuf = FileBuf->getBuffer();
354   Col = 0;
355   Line = 1;
356   if (parsePreAggregatedLBRSamples()) {
357     errs() << "PERF2BOLT: failed to parse samples\n";
358     exit(1);
359   }
360 }
361 
362 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
363   outs() << "PERF2BOLT: writing data for autofdo tools...\n";
364   NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
365                      TimerGroupDesc, opts::TimeAggregator);
366 
367   std::error_code EC;
368   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
369   if (EC)
370     return EC;
371 
372   // Format:
373   // number of unique traces
374   // from_1-to_1:count_1
375   // from_2-to_2:count_2
376   // ......
377   // from_n-to_n:count_n
378   // number of unique sample addresses
379   // addr_1:count_1
380   // addr_2:count_2
381   // ......
382   // addr_n:count_n
383   // number of unique LBR entries
384   // src_1->dst_1:count_1
385   // src_2->dst_2:count_2
386   // ......
387   // src_n->dst_n:count_n
388 
389   const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
390 
391   // AutoFDO addresses are relative to the first allocated loadable program
392   // segment
393   auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
394     if (Address < FirstAllocAddress)
395       return 0;
396     return Address - FirstAllocAddress;
397   };
398 
399   OutFile << FallthroughLBRs.size() << "\n";
400   for (const auto &AggrLBR : FallthroughLBRs) {
401     const Trace &Trace = AggrLBR.first;
402     const FTInfo &Info = AggrLBR.second;
403     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-"
404             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
405             << (Info.InternCount + Info.ExternCount) << "\n";
406   }
407 
408   OutFile << BasicSamples.size() << "\n";
409   for (const auto &Sample : BasicSamples) {
410     uint64_t PC = Sample.first;
411     uint64_t HitCount = Sample.second;
412     OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n";
413   }
414 
415   OutFile << BranchLBRs.size() << "\n";
416   for (const auto &AggrLBR : BranchLBRs) {
417     const Trace &Trace = AggrLBR.first;
418     const BranchInfo &Info = AggrLBR.second;
419     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->"
420             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
421             << Info.TakenCount << "\n";
422   }
423 
424   outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
425          << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
426          << " unique branches to " << OutputFilename << "\n";
427 
428   return std::error_code();
429 }
430 
431 void DataAggregator::filterBinaryMMapInfo() {
432   if (opts::FilterPID) {
433     auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
434     if (MMapInfoIter != BinaryMMapInfo.end()) {
435       MMapInfo MMap = MMapInfoIter->second;
436       BinaryMMapInfo.clear();
437       BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
438     } else {
439       if (errs().has_colors())
440         errs().changeColor(raw_ostream::RED);
441       errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
442              << opts::FilterPID << "\""
443              << " for binary \"" << BC->getFilename() << "\".";
444       assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
445       errs() << " Profile for the following process is available:\n";
446       for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
447         outs() << "  " << MMI.second.PID
448                << (MMI.second.Forked ? " (forked)\n" : "\n");
449 
450       if (errs().has_colors())
451         errs().resetColor();
452 
453       exit(1);
454     }
455   }
456 }
457 
458 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
459   this->BC = &BC;
460 
461   if (opts::ReadPreAggregated) {
462     parsePreAggregated();
463     return Error::success();
464   }
465 
466   if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) {
467     outs() << "BOLT-INFO: binary build-id is:     " << *FileBuildID << "\n";
468     processFileBuildID(*FileBuildID);
469   } else {
470     errs() << "BOLT-WARNING: build-id will not be checked because we could "
471               "not read one from input binary\n";
472   }
473 
474   auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) {
475     std::string Error;
476     outs() << "PERF2BOLT: waiting for perf " << Name
477            << " collection to finish...\n";
478     sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error);
479 
480     if (!Error.empty()) {
481       errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
482       deleteTempFiles();
483       exit(1);
484     }
485 
486     if (PI.ReturnCode != 0) {
487       ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
488           MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
489       StringRef ErrBuf = (*ErrorMB)->getBuffer();
490 
491       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
492       errs() << ErrBuf;
493       deleteTempFiles();
494       exit(1);
495     }
496 
497     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
498         MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
499     if (std::error_code EC = MB.getError()) {
500       errs() << "Cannot open " << Process.StdoutPath.data() << ": "
501              << EC.message() << "\n";
502       deleteTempFiles();
503       exit(1);
504     }
505 
506     FileBuf = std::move(*MB);
507     ParsingBuf = FileBuf->getBuffer();
508     Col = 0;
509     Line = 1;
510   };
511 
512   if (opts::LinuxKernelMode) {
513     // Current MMap parsing logic does not work with linux kernel.
514     // MMap entries for linux kernel uses PERF_RECORD_MMAP
515     // format instead of typical PERF_RECORD_MMAP2 format.
516     // Since linux kernel address mapping is absolute (same as
517     // in the ELF file), we avoid parsing MMap in linux kernel mode.
518     // While generating optimized linux kernel binary, we may need
519     // to parse MMap entries.
520 
521     // In linux kernel mode, we analyze and optimize
522     // all linux kernel binary instructions, irrespective
523     // of whether they are due to system calls or due to
524     // interrupts. Therefore, we cannot ignore interrupt
525     // in Linux kernel mode.
526     opts::IgnoreInterruptLBR = false;
527   } else {
528     prepareToParse("mmap events", MMapEventsPPI);
529     if (parseMMapEvents())
530       errs() << "PERF2BOLT: failed to parse mmap events\n";
531   }
532 
533   prepareToParse("task events", TaskEventsPPI);
534   if (parseTaskEvents())
535     errs() << "PERF2BOLT: failed to parse task events\n";
536 
537   filterBinaryMMapInfo();
538   prepareToParse("events", MainEventsPPI);
539 
540   if (opts::HeatmapMode) {
541     if (std::error_code EC = printLBRHeatMap()) {
542       errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
543       exit(1);
544     }
545     exit(0);
546   }
547 
548   if ((!opts::BasicAggregation && parseBranchEvents()) ||
549       (opts::BasicAggregation && parseBasicEvents()))
550     errs() << "PERF2BOLT: failed to parse samples\n";
551 
552   // We can finish early if the goal is just to generate data for autofdo
553   if (opts::WriteAutoFDOData) {
554     if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
555       errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
556 
557     deleteTempFiles();
558     exit(0);
559   }
560 
561   // Special handling for memory events
562   std::string Error;
563   sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error);
564   if (PI.ReturnCode != 0) {
565     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
566         MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data());
567     StringRef ErrBuf = (*MB)->getBuffer();
568 
569     deleteTempFiles();
570 
571     Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
572                  "Cannot print 'addr' field.");
573     if (!NoData.match(ErrBuf)) {
574       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
575       errs() << ErrBuf;
576       exit(1);
577     }
578     return Error::success();
579   }
580 
581   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
582       MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data());
583   if (std::error_code EC = MB.getError()) {
584     errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": "
585            << EC.message() << "\n";
586     deleteTempFiles();
587     exit(1);
588   }
589 
590   FileBuf = std::move(*MB);
591   ParsingBuf = FileBuf->getBuffer();
592   Col = 0;
593   Line = 1;
594   if (const std::error_code EC = parseMemEvents())
595     errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
596            << '\n';
597 
598   deleteTempFiles();
599 
600   return Error::success();
601 }
602 
603 Error DataAggregator::readProfile(BinaryContext &BC) {
604   processProfile(BC);
605 
606   for (auto &BFI : BC.getBinaryFunctions()) {
607     BinaryFunction &Function = BFI.second;
608     convertBranchData(Function);
609   }
610 
611   if (opts::AggregateOnly) {
612     if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
613       report_error("cannot create output data file", EC);
614   }
615 
616   return Error::success();
617 }
618 
619 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
620   return Function.hasProfileAvailable();
621 }
622 
623 void DataAggregator::processProfile(BinaryContext &BC) {
624   if (opts::ReadPreAggregated)
625     processPreAggregated();
626   else if (opts::BasicAggregation)
627     processBasicEvents();
628   else
629     processBranchEvents();
630 
631   processMemEvents();
632 
633   // Mark all functions with registered events as having a valid profile.
634   for (auto &BFI : BC.getBinaryFunctions()) {
635     BinaryFunction &BF = BFI.second;
636     if (getBranchData(BF)) {
637       const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
638                                                 : BinaryFunction::PF_LBR;
639       BF.markProfiled(Flags);
640     }
641   }
642 
643   // Release intermediate storage.
644   clear(BranchLBRs);
645   clear(FallthroughLBRs);
646   clear(AggregatedLBRs);
647   clear(BasicSamples);
648   clear(MemSamples);
649 }
650 
651 BinaryFunction *
652 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
653   if (!BC->containsAddress(Address))
654     return nullptr;
655 
656   return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
657                                                 /*UseMaxSize=*/true);
658 }
659 
660 StringRef DataAggregator::getLocationName(BinaryFunction &Func,
661                                           uint64_t Count) {
662   if (!BAT)
663     return Func.getOneName();
664 
665   const BinaryFunction *OrigFunc = &Func;
666   if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
667     NumColdSamples += Count;
668     BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
669     if (HotFunc)
670       OrigFunc = HotFunc;
671   }
672   // If it is a local function, prefer the name containing the file name where
673   // the local function was declared
674   for (StringRef AlternativeName : OrigFunc->getNames()) {
675     size_t FileNameIdx = AlternativeName.find('/');
676     // Confirm the alternative name has the pattern Symbol/FileName/1 before
677     // using it
678     if (FileNameIdx == StringRef::npos ||
679         AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
680       continue;
681     return AlternativeName;
682   }
683   return OrigFunc->getOneName();
684 }
685 
686 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
687                               uint64_t Count) {
688   auto I = NamesToSamples.find(Func.getOneName());
689   if (I == NamesToSamples.end()) {
690     bool Success;
691     StringRef LocName = getLocationName(Func, Count);
692     std::tie(I, Success) = NamesToSamples.insert(
693         std::make_pair(Func.getOneName(),
694                        FuncSampleData(LocName, FuncSampleData::ContainerTy())));
695   }
696 
697   Address -= Func.getAddress();
698   if (BAT)
699     Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false);
700 
701   I->second.bumpCount(Address, Count);
702   return true;
703 }
704 
705 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
706                                    uint64_t To, uint64_t Count,
707                                    uint64_t Mispreds) {
708   FuncBranchData *AggrData = getBranchData(Func);
709   if (!AggrData) {
710     AggrData = &NamesToBranches[Func.getOneName()];
711     AggrData->Name = getLocationName(Func, Count);
712     setBranchData(Func, AggrData);
713   }
714 
715   From -= Func.getAddress();
716   To -= Func.getAddress();
717   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName()
718                     << " @ " << Twine::utohexstr(From) << " -> "
719                     << Func.getPrintName() << " @ " << Twine::utohexstr(To)
720                     << '\n');
721   if (BAT) {
722     From = BAT->translate(Func, From, /*IsBranchSrc=*/true);
723     To = BAT->translate(Func, To, /*IsBranchSrc=*/false);
724     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
725                       << Func.getPrintName() << " @ " << Twine::utohexstr(From)
726                       << " -> " << Func.getPrintName() << " @ "
727                       << Twine::utohexstr(To) << '\n');
728   }
729 
730   AggrData->bumpBranchCount(From, To, Count, Mispreds);
731   return true;
732 }
733 
734 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
735                                    BinaryFunction *ToFunc, uint64_t From,
736                                    uint64_t To, uint64_t Count,
737                                    uint64_t Mispreds) {
738   FuncBranchData *FromAggrData = nullptr;
739   FuncBranchData *ToAggrData = nullptr;
740   StringRef SrcFunc;
741   StringRef DstFunc;
742   if (FromFunc) {
743     SrcFunc = getLocationName(*FromFunc, Count);
744     FromAggrData = getBranchData(*FromFunc);
745     if (!FromAggrData) {
746       FromAggrData = &NamesToBranches[FromFunc->getOneName()];
747       FromAggrData->Name = SrcFunc;
748       setBranchData(*FromFunc, FromAggrData);
749     }
750     From -= FromFunc->getAddress();
751     if (BAT)
752       From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true);
753 
754     recordExit(*FromFunc, From, Mispreds, Count);
755   }
756   if (ToFunc) {
757     DstFunc = getLocationName(*ToFunc, 0);
758     ToAggrData = getBranchData(*ToFunc);
759     if (!ToAggrData) {
760       ToAggrData = &NamesToBranches[ToFunc->getOneName()];
761       ToAggrData->Name = DstFunc;
762       setBranchData(*ToFunc, ToAggrData);
763     }
764     To -= ToFunc->getAddress();
765     if (BAT)
766       To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false);
767 
768     recordEntry(*ToFunc, To, Mispreds, Count);
769   }
770 
771   if (FromAggrData)
772     FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
773                                 Count, Mispreds);
774   if (ToAggrData)
775     ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
776                                Count, Mispreds);
777   return true;
778 }
779 
780 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
781                               uint64_t Mispreds) {
782   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
783   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
784   if (!FromFunc && !ToFunc)
785     return false;
786 
787   if (FromFunc == ToFunc) {
788     recordBranch(*FromFunc, From - FromFunc->getAddress(),
789                  To - FromFunc->getAddress(), Count, Mispreds);
790     return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
791   }
792 
793   return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
794 }
795 
796 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
797                              uint64_t Count) {
798   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
799   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
800   if (!FromFunc || !ToFunc) {
801     LLVM_DEBUG(
802         dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
803                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
804                << " and ending in " << ToFunc->getPrintName() << " @ "
805                << ToFunc->getPrintName() << " @ "
806                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
807     NumLongRangeTraces += Count;
808     return false;
809   }
810   if (FromFunc != ToFunc) {
811     NumInvalidTraces += Count;
812     LLVM_DEBUG(
813         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
814                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
815                << " and ending in " << ToFunc->getPrintName() << " @ "
816                << ToFunc->getPrintName() << " @ "
817                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
818     return false;
819   }
820 
821   Optional<BoltAddressTranslation::FallthroughListTy> FTs =
822       BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From)
823           : getFallthroughsInTrace(*FromFunc, First, Second, Count);
824   if (!FTs) {
825     LLVM_DEBUG(
826         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
827                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
828                << " and ending in " << ToFunc->getPrintName() << " @ "
829                << ToFunc->getPrintName() << " @ "
830                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
831     NumInvalidTraces += Count;
832     return false;
833   }
834 
835   LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
836                     << FromFunc->getPrintName() << ":"
837                     << Twine::utohexstr(First.To) << " to "
838                     << Twine::utohexstr(Second.From) << ".\n");
839   for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
840     doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
841                   Pair.second + FromFunc->getAddress(), Count, false);
842 
843   return true;
844 }
845 
846 bool DataAggregator::recordTrace(
847     BinaryFunction &BF,
848     const LBREntry &FirstLBR,
849     const LBREntry &SecondLBR,
850     uint64_t Count,
851     SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const {
852   BinaryContext &BC = BF.getBinaryContext();
853 
854   if (!BF.isSimple())
855     return false;
856 
857   assert(BF.hasCFG() && "can only record traces in CFG state");
858 
859   // Offsets of the trace within this function.
860   const uint64_t From = FirstLBR.To - BF.getAddress();
861   const uint64_t To = SecondLBR.From - BF.getAddress();
862 
863   if (From > To)
864     return false;
865 
866   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
867   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
868 
869   if (!FromBB || !ToBB)
870     return false;
871 
872   // Adjust FromBB if the first LBR is a return from the last instruction in
873   // the previous block (that instruction should be a call).
874   if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
875       !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
876     BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1];
877     if (PrevBB->getSuccessor(FromBB->getLabel())) {
878       const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
879       if (Instr && BC.MIB->isCall(*Instr))
880         FromBB = PrevBB;
881       else
882         LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
883                           << '\n');
884     } else {
885       LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
886     }
887   }
888 
889   // Fill out information for fall-through edges. The From and To could be
890   // within the same basic block, e.g. when two call instructions are in the
891   // same block. In this case we skip the processing.
892   if (FromBB == ToBB)
893     return true;
894 
895   // Process blocks in the original layout order.
896   BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()];
897   assert(BB == FromBB && "index mismatch");
898   while (BB != ToBB) {
899     BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1];
900     assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
901 
902     // Check for bad LBRs.
903     if (!BB->getSuccessor(NextBB->getLabel())) {
904       LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
905                         << "  " << FirstLBR << '\n'
906                         << "  " << SecondLBR << '\n');
907       return false;
908     }
909 
910     // Record fall-through jumps
911     BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB);
912     BI.Count += Count;
913 
914     if (Branches) {
915       const MCInst *Instr = BB->getLastNonPseudoInstr();
916       uint64_t Offset = 0;
917       if (Instr)
918         Offset = BC.MIB->getOffsetWithDefault(*Instr, 0);
919       else
920         Offset = BB->getOffset();
921 
922       Branches->emplace_back(Offset, NextBB->getOffset());
923     }
924 
925     BB = NextBB;
926   }
927 
928   return true;
929 }
930 
931 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
932 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
933                                        const LBREntry &FirstLBR,
934                                        const LBREntry &SecondLBR,
935                                        uint64_t Count) const {
936   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
937 
938   if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res))
939     return NoneType();
940 
941   return Res;
942 }
943 
944 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
945                                  uint64_t Count) const {
946   if (To > BF.getSize())
947     return false;
948 
949   if (!BF.hasProfile())
950     BF.ExecutionCount = 0;
951 
952   BinaryBasicBlock *EntryBB = nullptr;
953   if (To == 0) {
954     BF.ExecutionCount += Count;
955     if (!BF.empty())
956       EntryBB = &BF.front();
957   } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
958     if (BB->isEntryPoint())
959       EntryBB = BB;
960   }
961 
962   if (EntryBB)
963     EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
964 
965   return true;
966 }
967 
968 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
969                                 uint64_t Count) const {
970   if (!BF.isSimple() || From > BF.getSize())
971     return false;
972 
973   if (!BF.hasProfile())
974     BF.ExecutionCount = 0;
975 
976   return true;
977 }
978 
979 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
980   LBREntry Res;
981   ErrorOr<StringRef> FromStrRes = parseString('/');
982   if (std::error_code EC = FromStrRes.getError())
983     return EC;
984   StringRef OffsetStr = FromStrRes.get();
985   if (OffsetStr.getAsInteger(0, Res.From)) {
986     reportError("expected hexadecimal number with From address");
987     Diag << "Found: " << OffsetStr << "\n";
988     return make_error_code(llvm::errc::io_error);
989   }
990 
991   ErrorOr<StringRef> ToStrRes = parseString('/');
992   if (std::error_code EC = ToStrRes.getError())
993     return EC;
994   OffsetStr = ToStrRes.get();
995   if (OffsetStr.getAsInteger(0, Res.To)) {
996     reportError("expected hexadecimal number with To address");
997     Diag << "Found: " << OffsetStr << "\n";
998     return make_error_code(llvm::errc::io_error);
999   }
1000 
1001   ErrorOr<StringRef> MispredStrRes = parseString('/');
1002   if (std::error_code EC = MispredStrRes.getError())
1003     return EC;
1004   StringRef MispredStr = MispredStrRes.get();
1005   if (MispredStr.size() != 1 ||
1006       (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1007     reportError("expected single char for mispred bit");
1008     Diag << "Found: " << MispredStr << "\n";
1009     return make_error_code(llvm::errc::io_error);
1010   }
1011   Res.Mispred = MispredStr[0] == 'M';
1012 
1013   static bool MispredWarning = true;
1014   if (MispredStr[0] == '-' && MispredWarning) {
1015     errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1016     MispredWarning = false;
1017   }
1018 
1019   ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1020   if (std::error_code EC = Rest.getError())
1021     return EC;
1022   if (Rest.get().size() < 5) {
1023     reportError("expected rest of LBR entry");
1024     Diag << "Found: " << Rest.get() << "\n";
1025     return make_error_code(llvm::errc::io_error);
1026   }
1027   return Res;
1028 }
1029 
1030 bool DataAggregator::checkAndConsumeFS() {
1031   if (ParsingBuf[0] != FieldSeparator)
1032     return false;
1033 
1034   ParsingBuf = ParsingBuf.drop_front(1);
1035   Col += 1;
1036   return true;
1037 }
1038 
1039 void DataAggregator::consumeRestOfLine() {
1040   size_t LineEnd = ParsingBuf.find_first_of('\n');
1041   if (LineEnd == StringRef::npos) {
1042     ParsingBuf = StringRef();
1043     Col = 0;
1044     Line += 1;
1045     return;
1046   }
1047   ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1048   Col = 0;
1049   Line += 1;
1050 }
1051 
1052 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1053   PerfBranchSample Res;
1054 
1055   while (checkAndConsumeFS()) {
1056   }
1057 
1058   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1059   if (std::error_code EC = PIDRes.getError())
1060     return EC;
1061   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1062   if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
1063     consumeRestOfLine();
1064     return make_error_code(errc::no_such_process);
1065   }
1066 
1067   while (checkAndConsumeFS()) {
1068   }
1069 
1070   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1071   if (std::error_code EC = PCRes.getError())
1072     return EC;
1073   Res.PC = PCRes.get();
1074 
1075   if (checkAndConsumeNewLine())
1076     return Res;
1077 
1078   while (!checkAndConsumeNewLine()) {
1079     checkAndConsumeFS();
1080 
1081     ErrorOr<LBREntry> LBRRes = parseLBREntry();
1082     if (std::error_code EC = LBRRes.getError())
1083       return EC;
1084     LBREntry LBR = LBRRes.get();
1085     if (ignoreKernelInterrupt(LBR))
1086       continue;
1087     if (!BC->HasFixedLoadAddress)
1088       adjustLBR(LBR, MMapInfoIter->second);
1089     Res.LBR.push_back(LBR);
1090   }
1091 
1092   return Res;
1093 }
1094 
1095 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1096   while (checkAndConsumeFS()) {
1097   }
1098 
1099   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1100   if (std::error_code EC = PIDRes.getError())
1101     return EC;
1102 
1103   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1104   if (MMapInfoIter == BinaryMMapInfo.end()) {
1105     consumeRestOfLine();
1106     return PerfBasicSample{StringRef(), 0};
1107   }
1108 
1109   while (checkAndConsumeFS()) {
1110   }
1111 
1112   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1113   if (std::error_code EC = Event.getError())
1114     return EC;
1115 
1116   while (checkAndConsumeFS()) {
1117   }
1118 
1119   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1120   if (std::error_code EC = AddrRes.getError())
1121     return EC;
1122 
1123   if (!checkAndConsumeNewLine()) {
1124     reportError("expected end of line");
1125     return make_error_code(llvm::errc::io_error);
1126   }
1127 
1128   uint64_t Address = *AddrRes;
1129   if (!BC->HasFixedLoadAddress)
1130     adjustAddress(Address, MMapInfoIter->second);
1131 
1132   return PerfBasicSample{Event.get(), Address};
1133 }
1134 
1135 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1136   PerfMemSample Res{0, 0};
1137 
1138   while (checkAndConsumeFS()) {
1139   }
1140 
1141   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1142   if (std::error_code EC = PIDRes.getError())
1143     return EC;
1144 
1145   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1146   if (MMapInfoIter == BinaryMMapInfo.end()) {
1147     consumeRestOfLine();
1148     return Res;
1149   }
1150 
1151   while (checkAndConsumeFS()) {
1152   }
1153 
1154   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1155   if (std::error_code EC = Event.getError())
1156     return EC;
1157   if (Event.get().find("mem-loads") == StringRef::npos) {
1158     consumeRestOfLine();
1159     return Res;
1160   }
1161 
1162   while (checkAndConsumeFS()) {
1163   }
1164 
1165   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1166   if (std::error_code EC = AddrRes.getError())
1167     return EC;
1168 
1169   while (checkAndConsumeFS()) {
1170   }
1171 
1172   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1173   if (std::error_code EC = PCRes.getError()) {
1174     consumeRestOfLine();
1175     return EC;
1176   }
1177 
1178   if (!checkAndConsumeNewLine()) {
1179     reportError("expected end of line");
1180     return make_error_code(llvm::errc::io_error);
1181   }
1182 
1183   uint64_t Address = *AddrRes;
1184   if (!BC->HasFixedLoadAddress)
1185     adjustAddress(Address, MMapInfoIter->second);
1186 
1187   return PerfMemSample{PCRes.get(), Address};
1188 }
1189 
1190 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1191   auto parseOffset = [this]() -> ErrorOr<Location> {
1192     ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1193     if (std::error_code EC = Res.getError())
1194       return EC;
1195     return Location(Res.get());
1196   };
1197 
1198   size_t Sep = ParsingBuf.find_first_of(" \n");
1199   if (Sep == StringRef::npos)
1200     return parseOffset();
1201   StringRef LookAhead = ParsingBuf.substr(0, Sep);
1202   if (LookAhead.find_first_of(":") == StringRef::npos)
1203     return parseOffset();
1204 
1205   ErrorOr<StringRef> BuildID = parseString(':');
1206   if (std::error_code EC = BuildID.getError())
1207     return EC;
1208   ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1209   if (std::error_code EC = Offset.getError())
1210     return EC;
1211   return Location(true, BuildID.get(), Offset.get());
1212 }
1213 
1214 ErrorOr<DataAggregator::AggregatedLBREntry>
1215 DataAggregator::parseAggregatedLBREntry() {
1216   while (checkAndConsumeFS()) {
1217   }
1218 
1219   ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1220   if (std::error_code EC = TypeOrErr.getError())
1221     return EC;
1222   auto Type = AggregatedLBREntry::BRANCH;
1223   if (TypeOrErr.get() == "B") {
1224     Type = AggregatedLBREntry::BRANCH;
1225   } else if (TypeOrErr.get() == "F") {
1226     Type = AggregatedLBREntry::FT;
1227   } else if (TypeOrErr.get() == "f") {
1228     Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1229   } else {
1230     reportError("expected B, F or f");
1231     return make_error_code(llvm::errc::io_error);
1232   }
1233 
1234   while (checkAndConsumeFS()) {
1235   }
1236   ErrorOr<Location> From = parseLocationOrOffset();
1237   if (std::error_code EC = From.getError())
1238     return EC;
1239 
1240   while (checkAndConsumeFS()) {
1241   }
1242   ErrorOr<Location> To = parseLocationOrOffset();
1243   if (std::error_code EC = To.getError())
1244     return EC;
1245 
1246   while (checkAndConsumeFS()) {
1247   }
1248   ErrorOr<int64_t> Frequency =
1249       parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1250   if (std::error_code EC = Frequency.getError())
1251     return EC;
1252 
1253   uint64_t Mispreds = 0;
1254   if (Type == AggregatedLBREntry::BRANCH) {
1255     while (checkAndConsumeFS()) {
1256     }
1257     ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1258     if (std::error_code EC = MispredsOrErr.getError())
1259       return EC;
1260     Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1261   }
1262 
1263   if (!checkAndConsumeNewLine()) {
1264     reportError("expected end of line");
1265     return make_error_code(llvm::errc::io_error);
1266   }
1267 
1268   return AggregatedLBREntry{From.get(), To.get(),
1269                             static_cast<uint64_t>(Frequency.get()), Mispreds,
1270                             Type};
1271 }
1272 
1273 bool DataAggregator::hasData() {
1274   if (ParsingBuf.size() == 0)
1275     return false;
1276 
1277   return true;
1278 }
1279 
1280 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1281   return opts::IgnoreInterruptLBR &&
1282          (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1283 }
1284 
1285 std::error_code DataAggregator::printLBRHeatMap() {
1286   outs() << "PERF2BOLT: parse branch events...\n";
1287   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1288                      TimerGroupDesc, opts::TimeAggregator);
1289 
1290   if (opts::LinuxKernelMode) {
1291     opts::HeatmapMaxAddress = 0xffffffffffffffff;
1292     opts::HeatmapMinAddress = KernelBaseAddr;
1293   }
1294   Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1295              opts::HeatmapMaxAddress);
1296   uint64_t NumTotalSamples = 0;
1297 
1298   while (hasData()) {
1299     if (opts::BasicAggregation) {
1300       ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1301       if (std::error_code EC = SampleRes.getError()) {
1302         if (EC == errc::no_such_process)
1303           continue;
1304         return EC;
1305       }
1306       PerfBasicSample &Sample = SampleRes.get();
1307       HM.registerAddress(Sample.PC);
1308       NumTotalSamples++;
1309     } else {
1310       ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1311       if (std::error_code EC = SampleRes.getError()) {
1312         if (EC == errc::no_such_process)
1313           continue;
1314         return EC;
1315       }
1316 
1317       PerfBranchSample &Sample = SampleRes.get();
1318 
1319       // LBRs are stored in reverse execution order. NextLBR refers to the next
1320       // executed branch record.
1321       const LBREntry *NextLBR = nullptr;
1322       for (const LBREntry &LBR : Sample.LBR) {
1323         if (NextLBR) {
1324           // Record fall-through trace.
1325           const uint64_t TraceFrom = LBR.To;
1326           const uint64_t TraceTo = NextLBR->From;
1327           ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1328         }
1329         NextLBR = &LBR;
1330       }
1331       if (!Sample.LBR.empty()) {
1332         HM.registerAddress(Sample.LBR.front().To);
1333         HM.registerAddress(Sample.LBR.back().From);
1334       }
1335       NumTotalSamples += Sample.LBR.size();
1336     }
1337   }
1338 
1339   if (!NumTotalSamples) {
1340     if (!opts::BasicAggregation) {
1341       errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1342                 "Cannot build heatmap. Use -nl for building heatmap from "
1343                 "basic events.\n";
1344     } else {
1345       errs() << "HEATMAP-ERROR: no samples detected in profile. "
1346                 "Cannot build heatmap.";
1347     }
1348     exit(1);
1349   }
1350 
1351   outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1352   outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1353 
1354   outs() << "HEATMAP: building heat map...\n";
1355 
1356   for (const auto &LBR : FallthroughLBRs) {
1357     const Trace &Trace = LBR.first;
1358     const FTInfo &Info = LBR.second;
1359     HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1360   }
1361 
1362   if (HM.getNumInvalidRanges())
1363     outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1364 
1365   if (!HM.size()) {
1366     errs() << "HEATMAP-ERROR: no valid traces registered\n";
1367     exit(1);
1368   }
1369 
1370   HM.print(opts::OutputFilename);
1371   if (opts::OutputFilename == "-")
1372     HM.printCDF(opts::OutputFilename);
1373   else
1374     HM.printCDF(opts::OutputFilename + ".csv");
1375 
1376   return std::error_code();
1377 }
1378 
1379 std::error_code DataAggregator::parseBranchEvents() {
1380   outs() << "PERF2BOLT: parse branch events...\n";
1381   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1382                      TimerGroupDesc, opts::TimeAggregator);
1383 
1384   uint64_t NumTotalSamples = 0;
1385   uint64_t NumEntries = 0;
1386   uint64_t NumSamples = 0;
1387   uint64_t NumSamplesNoLBR = 0;
1388   uint64_t NumTraces = 0;
1389   bool NeedsSkylakeFix = false;
1390 
1391   while (hasData() && NumTotalSamples < opts::MaxSamples) {
1392     ++NumTotalSamples;
1393 
1394     ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1395     if (std::error_code EC = SampleRes.getError()) {
1396       if (EC == errc::no_such_process)
1397         continue;
1398       return EC;
1399     }
1400     ++NumSamples;
1401 
1402     PerfBranchSample &Sample = SampleRes.get();
1403     if (opts::WriteAutoFDOData)
1404       ++BasicSamples[Sample.PC];
1405 
1406     if (Sample.LBR.empty()) {
1407       ++NumSamplesNoLBR;
1408       continue;
1409     }
1410 
1411     NumEntries += Sample.LBR.size();
1412     if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1413       errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1414       NeedsSkylakeFix = true;
1415     }
1416 
1417     // LBRs are stored in reverse execution order. NextPC refers to the next
1418     // recorded executed PC.
1419     uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1420     uint32_t NumEntry = 0;
1421     for (const LBREntry &LBR : Sample.LBR) {
1422       ++NumEntry;
1423       // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1424       // sometimes record entry 32 as an exact copy of entry 31. This will cause
1425       // us to likely record an invalid trace and generate a stale function for
1426       // BAT mode (non BAT disassembles the function and is able to ignore this
1427       // trace at aggregation time). Drop first 2 entries (last two, in
1428       // chronological order)
1429       if (NeedsSkylakeFix && NumEntry <= 2)
1430         continue;
1431       if (NextPC) {
1432         // Record fall-through trace.
1433         const uint64_t TraceFrom = LBR.To;
1434         const uint64_t TraceTo = NextPC;
1435         const BinaryFunction *TraceBF =
1436             getBinaryFunctionContainingAddress(TraceFrom);
1437         if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1438           FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1439           if (TraceBF->containsAddress(LBR.From))
1440             ++Info.InternCount;
1441           else
1442             ++Info.ExternCount;
1443         } else {
1444           if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) {
1445             LLVM_DEBUG(dbgs()
1446                        << "Invalid trace starting in "
1447                        << TraceBF->getPrintName() << " @ "
1448                        << Twine::utohexstr(TraceFrom - TraceBF->getAddress())
1449                        << " and ending @ " << Twine::utohexstr(TraceTo)
1450                        << '\n');
1451             ++NumInvalidTraces;
1452           } else {
1453             LLVM_DEBUG(dbgs()
1454                        << "Out of range trace starting in "
1455                        << (TraceBF ? TraceBF->getPrintName() : "None") << " @ "
1456                        << Twine::utohexstr(
1457                               TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1458                        << " and ending in "
1459                        << (getBinaryFunctionContainingAddress(TraceTo)
1460                                ? getBinaryFunctionContainingAddress(TraceTo)
1461                                      ->getPrintName()
1462                                : "None")
1463                        << " @ "
1464                        << Twine::utohexstr(
1465                               TraceTo -
1466                               (getBinaryFunctionContainingAddress(TraceTo)
1467                                    ? getBinaryFunctionContainingAddress(TraceTo)
1468                                          ->getAddress()
1469                                    : 0))
1470                        << '\n');
1471             ++NumLongRangeTraces;
1472           }
1473         }
1474         ++NumTraces;
1475       }
1476       NextPC = LBR.From;
1477 
1478       uint64_t From = LBR.From;
1479       if (!getBinaryFunctionContainingAddress(From))
1480         From = 0;
1481       uint64_t To = LBR.To;
1482       if (!getBinaryFunctionContainingAddress(To))
1483         To = 0;
1484       if (!From && !To)
1485         continue;
1486       BranchInfo &Info = BranchLBRs[Trace(From, To)];
1487       ++Info.TakenCount;
1488       Info.MispredCount += LBR.Mispred;
1489     }
1490   }
1491 
1492   for (const auto &LBR : BranchLBRs) {
1493     const Trace &Trace = LBR.first;
1494     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From))
1495       BF->setHasProfileAvailable();
1496     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To))
1497       BF->setHasProfileAvailable();
1498   }
1499 
1500   auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1501     OS << " (";
1502     if (OS.has_colors()) {
1503       if (Percent > T2)
1504         OS.changeColor(raw_ostream::RED);
1505       else if (Percent > T1)
1506         OS.changeColor(raw_ostream::YELLOW);
1507       else
1508         OS.changeColor(raw_ostream::GREEN);
1509     }
1510     OS << format("%.1f%%", Percent);
1511     if (OS.has_colors())
1512       OS.resetColor();
1513     OS << ")";
1514   };
1515 
1516   outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1517          << " LBR entries\n";
1518   if (NumTotalSamples) {
1519     if (NumSamples && NumSamplesNoLBR == NumSamples) {
1520       // Note: we don't know if perf2bolt is being used to parse memory samples
1521       // at this point. In this case, it is OK to parse zero LBRs.
1522       errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1523                 "LBR. Record profile with perf record -j any or run perf2bolt "
1524                 "in no-LBR mode with -nl (the performance improvement in -nl "
1525                 "mode may be limited)\n";
1526     } else {
1527       const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1528       const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1529       outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1530       printColored(outs(), PercentIgnored, 20, 50);
1531       outs() << " were ignored\n";
1532       if (PercentIgnored > 50.0f)
1533         errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1534                   "were attributed to the input binary\n";
1535     }
1536   }
1537   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1538          << NumInvalidTraces;
1539   float Perc = 0.0f;
1540   if (NumTraces > 0) {
1541     Perc = NumInvalidTraces * 100.0f / NumTraces;
1542     printColored(outs(), Perc, 5, 10);
1543   }
1544   outs() << "\n";
1545   if (Perc > 10.0f)
1546     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1547               "binary is probably not the same binary used during profiling "
1548               "collection. The generated data may be ineffective for improving "
1549               "performance.\n\n";
1550 
1551   outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1552          << NumLongRangeTraces;
1553   if (NumTraces > 0)
1554     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1555   outs() << "\n";
1556 
1557   if (NumColdSamples > 0) {
1558     const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1559     outs() << "PERF2BOLT: " << NumColdSamples
1560            << format(" (%.1f%%)", ColdSamples)
1561            << " samples recorded in cold regions of split functions.\n";
1562     if (ColdSamples > 5.0f)
1563       outs()
1564           << "WARNING: The BOLT-processed binary where samples were collected "
1565              "likely used bad data or your service observed a large shift in "
1566              "profile. You may want to audit this.\n";
1567   }
1568 
1569   return std::error_code();
1570 }
1571 
1572 void DataAggregator::processBranchEvents() {
1573   outs() << "PERF2BOLT: processing branch events...\n";
1574   NamedRegionTimer T("processBranch", "Processing branch events",
1575                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1576 
1577   for (const auto &AggrLBR : FallthroughLBRs) {
1578     const Trace &Loc = AggrLBR.first;
1579     const FTInfo &Info = AggrLBR.second;
1580     LBREntry First{Loc.From, Loc.From, false};
1581     LBREntry Second{Loc.To, Loc.To, false};
1582     if (Info.InternCount)
1583       doTrace(First, Second, Info.InternCount);
1584     if (Info.ExternCount) {
1585       First.From = 0;
1586       doTrace(First, Second, Info.ExternCount);
1587     }
1588   }
1589 
1590   for (const auto &AggrLBR : BranchLBRs) {
1591     const Trace &Loc = AggrLBR.first;
1592     const BranchInfo &Info = AggrLBR.second;
1593     doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
1594   }
1595 }
1596 
1597 std::error_code DataAggregator::parseBasicEvents() {
1598   outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1599   NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1600                      TimerGroupDesc, opts::TimeAggregator);
1601   while (hasData()) {
1602     ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1603     if (std::error_code EC = Sample.getError())
1604       return EC;
1605 
1606     if (!Sample->PC)
1607       continue;
1608 
1609     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1610       BF->setHasProfileAvailable();
1611 
1612     ++BasicSamples[Sample->PC];
1613     EventNames.insert(Sample->EventName);
1614   }
1615 
1616   return std::error_code();
1617 }
1618 
1619 void DataAggregator::processBasicEvents() {
1620   outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1621   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1622                      TimerGroupDesc, opts::TimeAggregator);
1623   uint64_t OutOfRangeSamples = 0;
1624   uint64_t NumSamples = 0;
1625   for (auto &Sample : BasicSamples) {
1626     const uint64_t PC = Sample.first;
1627     const uint64_t HitCount = Sample.second;
1628     NumSamples += HitCount;
1629     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1630     if (!Func) {
1631       OutOfRangeSamples += HitCount;
1632       continue;
1633     }
1634 
1635     doSample(*Func, PC, HitCount);
1636   }
1637   outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1638 
1639   outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1640          << OutOfRangeSamples;
1641   float Perc = 0.0f;
1642   if (NumSamples > 0) {
1643     outs() << " (";
1644     Perc = OutOfRangeSamples * 100.0f / NumSamples;
1645     if (outs().has_colors()) {
1646       if (Perc > 60.0f)
1647         outs().changeColor(raw_ostream::RED);
1648       else if (Perc > 40.0f)
1649         outs().changeColor(raw_ostream::YELLOW);
1650       else
1651         outs().changeColor(raw_ostream::GREEN);
1652     }
1653     outs() << format("%.1f%%", Perc);
1654     if (outs().has_colors())
1655       outs().resetColor();
1656     outs() << ")";
1657   }
1658   outs() << "\n";
1659   if (Perc > 80.0f)
1660     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1661               "binary is probably not the same binary used during profiling "
1662               "collection. The generated data may be ineffective for improving "
1663               "performance.\n\n";
1664 }
1665 
1666 std::error_code DataAggregator::parseMemEvents() {
1667   outs() << "PERF2BOLT: parsing memory events...\n";
1668   NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1669                      TimerGroupDesc, opts::TimeAggregator);
1670   while (hasData()) {
1671     ErrorOr<PerfMemSample> Sample = parseMemSample();
1672     if (std::error_code EC = Sample.getError())
1673       return EC;
1674 
1675     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1676       BF->setHasProfileAvailable();
1677 
1678     MemSamples.emplace_back(std::move(Sample.get()));
1679   }
1680 
1681   return std::error_code();
1682 }
1683 
1684 void DataAggregator::processMemEvents() {
1685   NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1686                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1687   for (const PerfMemSample &Sample : MemSamples) {
1688     uint64_t PC = Sample.PC;
1689     uint64_t Addr = Sample.Addr;
1690     StringRef FuncName;
1691     StringRef MemName;
1692 
1693     // Try to resolve symbol for PC
1694     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1695     if (!Func) {
1696       LLVM_DEBUG(if (PC != 0) {
1697         dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x"
1698                << Twine::utohexstr(Addr) << "\n";
1699       });
1700       continue;
1701     }
1702 
1703     FuncName = Func->getOneName();
1704     PC -= Func->getAddress();
1705 
1706     // Try to resolve symbol for memory load
1707     if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1708       MemName = BD->getName();
1709       Addr -= BD->getAddress();
1710     } else if (opts::FilterMemProfile) {
1711       // Filter out heap/stack accesses
1712       continue;
1713     }
1714 
1715     const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1716     const Location AddrLoc(!MemName.empty(), MemName, Addr);
1717 
1718     FuncMemData *MemData = &NamesToMemEvents[FuncName];
1719     setMemData(*Func, MemData);
1720     MemData->update(FuncLoc, AddrLoc);
1721     LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1722   }
1723 }
1724 
1725 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1726   outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1727   NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1728                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1729   while (hasData()) {
1730     ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1731     if (std::error_code EC = AggrEntry.getError())
1732       return EC;
1733 
1734     if (BinaryFunction *BF =
1735             getBinaryFunctionContainingAddress(AggrEntry->From.Offset))
1736       BF->setHasProfileAvailable();
1737     if (BinaryFunction *BF =
1738             getBinaryFunctionContainingAddress(AggrEntry->To.Offset))
1739       BF->setHasProfileAvailable();
1740 
1741     AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1742   }
1743 
1744   return std::error_code();
1745 }
1746 
1747 void DataAggregator::processPreAggregated() {
1748   outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1749   NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1750                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1751 
1752   uint64_t NumTraces = 0;
1753   for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1754     switch (AggrEntry.EntryType) {
1755     case AggregatedLBREntry::BRANCH:
1756       doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1757                AggrEntry.Mispreds);
1758       break;
1759     case AggregatedLBREntry::FT:
1760     case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1761       LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1762                          ? AggrEntry.From.Offset
1763                          : 0,
1764                      AggrEntry.From.Offset, false};
1765       LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1766       doTrace(First, Second, AggrEntry.Count);
1767       NumTraces += AggrEntry.Count;
1768       break;
1769     }
1770     }
1771   }
1772 
1773   outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1774          << " aggregated LBR entries\n";
1775   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1776          << NumInvalidTraces;
1777   float Perc = 0.0f;
1778   if (NumTraces > 0) {
1779     outs() << " (";
1780     Perc = NumInvalidTraces * 100.0f / NumTraces;
1781     if (outs().has_colors()) {
1782       if (Perc > 10.0f)
1783         outs().changeColor(raw_ostream::RED);
1784       else if (Perc > 5.0f)
1785         outs().changeColor(raw_ostream::YELLOW);
1786       else
1787         outs().changeColor(raw_ostream::GREEN);
1788     }
1789     outs() << format("%.1f%%", Perc);
1790     if (outs().has_colors())
1791       outs().resetColor();
1792     outs() << ")";
1793   }
1794   outs() << "\n";
1795   if (Perc > 10.0f)
1796     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1797               "binary is probably not the same binary used during profiling "
1798               "collection. The generated data may be ineffective for improving "
1799               "performance.\n\n";
1800 
1801   outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1802          << NumLongRangeTraces;
1803   if (NumTraces > 0)
1804     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1805   outs() << "\n";
1806 }
1807 
1808 Optional<int32_t> DataAggregator::parseCommExecEvent() {
1809   size_t LineEnd = ParsingBuf.find_first_of("\n");
1810   if (LineEnd == StringRef::npos) {
1811     reportError("expected rest of line");
1812     Diag << "Found: " << ParsingBuf << "\n";
1813     return NoneType();
1814   }
1815   StringRef Line = ParsingBuf.substr(0, LineEnd);
1816 
1817   size_t Pos = Line.find("PERF_RECORD_COMM exec");
1818   if (Pos == StringRef::npos)
1819     return NoneType();
1820   Line = Line.drop_front(Pos);
1821 
1822   // Line:
1823   //  PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1824   StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1825   int32_t PID;
1826   if (PIDStr.getAsInteger(10, PID)) {
1827     reportError("expected PID");
1828     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1829     return NoneType();
1830   }
1831 
1832   return PID;
1833 }
1834 
1835 namespace {
1836 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1837   const StringRef SecTimeStr = TimeStr.split('.').first;
1838   const StringRef USecTimeStr = TimeStr.split('.').second;
1839   uint64_t SecTime;
1840   uint64_t USecTime;
1841   if (SecTimeStr.getAsInteger(10, SecTime) ||
1842       USecTimeStr.getAsInteger(10, USecTime))
1843     return NoneType();
1844   return SecTime * 1000000ULL + USecTime;
1845 }
1846 }
1847 
1848 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1849   while (checkAndConsumeFS()) {
1850   }
1851 
1852   size_t LineEnd = ParsingBuf.find_first_of("\n");
1853   if (LineEnd == StringRef::npos) {
1854     reportError("expected rest of line");
1855     Diag << "Found: " << ParsingBuf << "\n";
1856     return NoneType();
1857   }
1858   StringRef Line = ParsingBuf.substr(0, LineEnd);
1859 
1860   size_t Pos = Line.find("PERF_RECORD_FORK");
1861   if (Pos == StringRef::npos) {
1862     consumeRestOfLine();
1863     return NoneType();
1864   }
1865 
1866   ForkInfo FI;
1867 
1868   const StringRef TimeStr =
1869       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1870   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1871     FI.Time = *TimeRes;
1872   }
1873 
1874   Line = Line.drop_front(Pos);
1875 
1876   // Line:
1877   //  PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1878   const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1879   if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1880     reportError("expected PID");
1881     Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1882     return NoneType();
1883   }
1884 
1885   const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1886   if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1887     reportError("expected PID");
1888     Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1889     return NoneType();
1890   }
1891 
1892   consumeRestOfLine();
1893 
1894   return FI;
1895 }
1896 
1897 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1898 DataAggregator::parseMMapEvent() {
1899   while (checkAndConsumeFS()) {
1900   }
1901 
1902   MMapInfo ParsedInfo;
1903 
1904   size_t LineEnd = ParsingBuf.find_first_of("\n");
1905   if (LineEnd == StringRef::npos) {
1906     reportError("expected rest of line");
1907     Diag << "Found: " << ParsingBuf << "\n";
1908     return make_error_code(llvm::errc::io_error);
1909   }
1910   StringRef Line = ParsingBuf.substr(0, LineEnd);
1911 
1912   size_t Pos = Line.find("PERF_RECORD_MMAP2");
1913   if (Pos == StringRef::npos) {
1914     consumeRestOfLine();
1915     return std::make_pair(StringRef(), ParsedInfo);
1916   }
1917 
1918   // Line:
1919   //   {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1920 
1921   const StringRef TimeStr =
1922       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1923   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1924     ParsedInfo.Time = *TimeRes;
1925 
1926   Line = Line.drop_front(Pos);
1927 
1928   // Line:
1929   //   PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1930 
1931   StringRef FileName = Line.rsplit(FieldSeparator).second;
1932   if (FileName.startswith("//") || FileName.startswith("[")) {
1933     consumeRestOfLine();
1934     return std::make_pair(StringRef(), ParsedInfo);
1935   }
1936   FileName = sys::path::filename(FileName);
1937 
1938   const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
1939   if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
1940     reportError("expected PID");
1941     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1942     return make_error_code(llvm::errc::io_error);
1943   }
1944 
1945   const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
1946   if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) {
1947     reportError("expected base address");
1948     Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1949     return make_error_code(llvm::errc::io_error);
1950   }
1951 
1952   const StringRef SizeStr = Line.split('(').second.split(')').first;
1953   if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
1954     reportError("expected mmaped size");
1955     Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1956     return make_error_code(llvm::errc::io_error);
1957   }
1958 
1959   const StringRef OffsetStr =
1960       Line.split('@').second.ltrim().split(FieldSeparator).first;
1961   if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
1962     reportError("expected mmaped page-aligned offset");
1963     Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1964     return make_error_code(llvm::errc::io_error);
1965   }
1966 
1967   consumeRestOfLine();
1968 
1969   return std::make_pair(FileName, ParsedInfo);
1970 }
1971 
1972 std::error_code DataAggregator::parseMMapEvents() {
1973   outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1974   NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1975                      TimerGroupDesc, opts::TimeAggregator);
1976 
1977   std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1978   while (hasData()) {
1979     ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1980     if (std::error_code EC = FileMMapInfoRes.getError())
1981       return EC;
1982 
1983     std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1984     if (FileMMapInfo.second.PID == -1)
1985       continue;
1986 
1987     // Consider only the first mapping of the file for any given PID
1988     bool PIDExists = false;
1989     auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
1990     for (auto MI = Range.first; MI != Range.second; ++MI) {
1991       if (MI->second.PID == FileMMapInfo.second.PID) {
1992         PIDExists = true;
1993         break;
1994       }
1995     }
1996     if (PIDExists)
1997       continue;
1998 
1999     GlobalMMapInfo.insert(FileMMapInfo);
2000   }
2001 
2002   LLVM_DEBUG({
2003     dbgs() << "FileName -> mmap info:\n";
2004     for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo)
2005       dbgs() << "  " << Pair.first << " : " << Pair.second.PID << " [0x"
2006              << Twine::utohexstr(Pair.second.MMapAddress) << ", "
2007              << Twine::utohexstr(Pair.second.Size) << " @ "
2008              << Twine::utohexstr(Pair.second.Offset) << "]\n";
2009   });
2010 
2011   StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
2012   if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
2013     errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
2014            << "\" for profile matching\n";
2015     NameToUse = BuildIDBinaryName;
2016   }
2017 
2018   auto Range = GlobalMMapInfo.equal_range(NameToUse);
2019   for (auto I = Range.first; I != Range.second; ++I) {
2020     MMapInfo &MMapInfo = I->second;
2021     if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
2022       // Check that the binary mapping matches one of the segments.
2023       bool MatchFound = false;
2024       for (auto &KV : BC->SegmentMapInfo) {
2025         SegmentInfo &SegInfo = KV.second;
2026         // The mapping is page-aligned and hence the MMapAddress could be
2027         // different from the segment start address. We cannot know the page
2028         // size of the mapping, but we know it should not exceed the segment
2029         // alignment value. Hence we are performing an approximate check.
2030         if (SegInfo.Address >= MMapInfo.MMapAddress &&
2031             SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) {
2032           MatchFound = true;
2033           break;
2034         }
2035       }
2036       if (!MatchFound) {
2037         errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2038                << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n';
2039         continue;
2040       }
2041     }
2042 
2043     // Set base address for shared objects.
2044     if (!BC->HasFixedLoadAddress) {
2045       Optional<uint64_t> BaseAddress =
2046           BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset);
2047       if (!BaseAddress) {
2048         errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2049                   "binary when memory mapped at 0x"
2050                << Twine::utohexstr(MMapInfo.MMapAddress)
2051                << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
2052                << ". Ignoring profile data for this mapping\n";
2053         continue;
2054       } else {
2055         MMapInfo.BaseAddress = *BaseAddress;
2056       }
2057     }
2058 
2059     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2060   }
2061 
2062   if (BinaryMMapInfo.empty()) {
2063     if (errs().has_colors())
2064       errs().changeColor(raw_ostream::RED);
2065     errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2066            << BC->getFilename() << "\".";
2067     if (!GlobalMMapInfo.empty()) {
2068       errs() << " Profile for the following binary name(s) is available:\n";
2069       for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2070            I = GlobalMMapInfo.upper_bound(I->first))
2071         errs() << "  " << I->first << '\n';
2072       errs() << "Please rename the input binary.\n";
2073     } else {
2074       errs() << " Failed to extract any binary name from a profile.\n";
2075     }
2076     if (errs().has_colors())
2077       errs().resetColor();
2078 
2079     exit(1);
2080   }
2081 
2082   return std::error_code();
2083 }
2084 
2085 std::error_code DataAggregator::parseTaskEvents() {
2086   outs() << "PERF2BOLT: parsing perf-script task events output\n";
2087   NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2088                      TimerGroupDesc, opts::TimeAggregator);
2089 
2090   while (hasData()) {
2091     if (Optional<int32_t> CommInfo = parseCommExecEvent()) {
2092       // Remove forked child that ran execve
2093       auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2094       if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2095         BinaryMMapInfo.erase(MMapInfoIter);
2096       consumeRestOfLine();
2097       continue;
2098     }
2099 
2100     Optional<ForkInfo> ForkInfo = parseForkEvent();
2101     if (!ForkInfo)
2102       continue;
2103 
2104     if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2105       continue;
2106 
2107     if (ForkInfo->Time == 0) {
2108       // Process was forked and mmaped before perf ran. In this case the child
2109       // should have its own mmap entry unless it was execve'd.
2110       continue;
2111     }
2112 
2113     auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2114     if (MMapInfoIter == BinaryMMapInfo.end())
2115       continue;
2116 
2117     MMapInfo MMapInfo = MMapInfoIter->second;
2118     MMapInfo.PID = ForkInfo->ChildPID;
2119     MMapInfo.Forked = true;
2120     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2121   }
2122 
2123   outs() << "PERF2BOLT: input binary is associated with "
2124          << BinaryMMapInfo.size() << " PID(s)\n";
2125 
2126   LLVM_DEBUG({
2127     for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
2128       outs() << "  " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "")
2129              << ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x"
2130              << Twine::utohexstr(MMI.second.Size) << ")\n";
2131   });
2132 
2133   return std::error_code();
2134 }
2135 
2136 Optional<std::pair<StringRef, StringRef>>
2137 DataAggregator::parseNameBuildIDPair() {
2138   while (checkAndConsumeFS()) {
2139   }
2140 
2141   ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2142   if (std::error_code EC = BuildIDStr.getError())
2143     return NoneType();
2144 
2145   ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2146   if (std::error_code EC = NameStr.getError())
2147     return NoneType();
2148 
2149   consumeRestOfLine();
2150   return std::make_pair(NameStr.get(), BuildIDStr.get());
2151 }
2152 
2153 Optional<StringRef>
2154 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2155   while (hasData()) {
2156     Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair();
2157     if (!IDPair)
2158       return NoneType();
2159 
2160     if (IDPair->second.startswith(FileBuildID))
2161       return sys::path::filename(IDPair->first);
2162   }
2163   return NoneType();
2164 }
2165 
2166 std::error_code
2167 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2168   std::error_code EC;
2169   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2170   if (EC)
2171     return EC;
2172 
2173   bool WriteMemLocs = false;
2174 
2175   auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2176     if (WriteMemLocs)
2177       OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2178     else
2179       OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2180     OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2181             << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2182   };
2183 
2184   uint64_t BranchValues = 0;
2185   uint64_t MemValues = 0;
2186 
2187   if (BAT)
2188     OutFile << "boltedcollection\n";
2189   if (opts::BasicAggregation) {
2190     OutFile << "no_lbr";
2191     for (const StringMapEntry<NoneType> &Entry : EventNames)
2192       OutFile << " " << Entry.getKey();
2193     OutFile << "\n";
2194 
2195     for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
2196       for (const SampleInfo &SI : Func.getValue().Data) {
2197         writeLocation(SI.Loc);
2198         OutFile << SI.Hits << "\n";
2199         ++BranchValues;
2200       }
2201     }
2202   } else {
2203     for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
2204       for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) {
2205         writeLocation(BI.From);
2206         writeLocation(BI.To);
2207         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2208         ++BranchValues;
2209       }
2210       for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) {
2211         // Do not output if source is a known symbol, since this was already
2212         // accounted for in the source function
2213         if (BI.From.IsSymbol)
2214           continue;
2215         writeLocation(BI.From);
2216         writeLocation(BI.To);
2217         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2218         ++BranchValues;
2219       }
2220     }
2221 
2222     WriteMemLocs = true;
2223     for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
2224       for (const MemInfo &MemEvent : Func.getValue().Data) {
2225         writeLocation(MemEvent.Offset);
2226         writeLocation(MemEvent.Addr);
2227         OutFile << MemEvent.Count << "\n";
2228         ++MemValues;
2229       }
2230     }
2231   }
2232 
2233   outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2234          << " memory objects to " << OutputFilename << "\n";
2235 
2236   return std::error_code();
2237 }
2238 
2239 void DataAggregator::dump() const { DataReader::dump(); }
2240 
2241 void DataAggregator::dump(const LBREntry &LBR) const {
2242   Diag << "From: " << Twine::utohexstr(LBR.From)
2243        << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2244        << "\n";
2245 }
2246 
2247 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2248   Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2249   for (const LBREntry &LBR : Sample.LBR)
2250     dump(LBR);
2251 }
2252 
2253 void DataAggregator::dump(const PerfMemSample &Sample) const {
2254   Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
2255 }
2256