1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Profile/BoltAddressTranslation.h"
18 #include "bolt/Profile/Heatmap.h"
19 #include "bolt/Utils/CommandLineOpts.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/ScopeExit.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/Process.h"
27 #include "llvm/Support/Program.h"
28 #include "llvm/Support/Regex.h"
29 #include "llvm/Support/Timer.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <map>
32 #include <unordered_map>
33 #include <utility>
34 
35 #define DEBUG_TYPE "aggregator"
36 
37 using namespace llvm;
38 using namespace bolt;
39 
40 namespace opts {
41 
42 static cl::opt<bool>
43 BasicAggregation("nl",
44   cl::desc("aggregate basic samples (without LBR info)"),
45   cl::init(false),
46   cl::ZeroOrMore,
47   cl::cat(AggregatorCategory));
48 
49 static cl::opt<bool>
50 FilterMemProfile("filter-mem-profile",
51   cl::desc("if processing a memory profile, filter out stack or heap accesses "
52            "that won't be useful for BOLT to reduce profile file size"),
53   cl::init(true),
54   cl::cat(AggregatorCategory));
55 
56 static cl::opt<unsigned long long>
57 FilterPID("pid",
58   cl::desc("only use samples from process with specified PID"),
59   cl::init(0),
60   cl::Optional,
61   cl::cat(AggregatorCategory));
62 
63 static cl::opt<bool>
64 IgnoreBuildID("ignore-build-id",
65   cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
66   cl::init(false),
67   cl::cat(AggregatorCategory));
68 
69 static cl::opt<bool>
70 IgnoreInterruptLBR("ignore-interrupt-lbr",
71   cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
72   cl::init(true),
73   cl::ZeroOrMore,
74   cl::cat(AggregatorCategory));
75 
76 static cl::opt<unsigned long long>
77 MaxSamples("max-samples",
78   cl::init(-1ULL),
79   cl::desc("maximum number of samples to read from LBR profile"),
80   cl::Optional,
81   cl::Hidden,
82   cl::cat(AggregatorCategory));
83 
84 static cl::opt<bool>
85 ReadPreAggregated("pa",
86   cl::desc("skip perf and read data from a pre-aggregated file format"),
87   cl::init(false),
88   cl::ZeroOrMore,
89   cl::cat(AggregatorCategory));
90 
91 static cl::opt<bool>
92 TimeAggregator("time-aggr",
93   cl::desc("time BOLT aggregator"),
94   cl::init(false),
95   cl::ZeroOrMore,
96   cl::cat(AggregatorCategory));
97 
98 static cl::opt<bool>
99 UseEventPC("use-event-pc",
100   cl::desc("use event PC in combination with LBR sampling"),
101   cl::init(false),
102   cl::ZeroOrMore,
103   cl::cat(AggregatorCategory));
104 
105 static cl::opt<bool>
106 WriteAutoFDOData("autofdo",
107   cl::desc("generate autofdo textual data instead of bolt data"),
108   cl::init(false),
109   cl::ZeroOrMore,
110   cl::cat(AggregatorCategory));
111 
112 } // namespace opts
113 
114 namespace {
115 
116 const char TimerGroupName[] = "aggregator";
117 const char TimerGroupDesc[] = "Aggregator";
118 
119 }
120 
121 constexpr uint64_t DataAggregator::KernelBaseAddr;
122 
123 DataAggregator::~DataAggregator() { deleteTempFiles(); }
124 
125 namespace {
126 void deleteTempFile(const std::string &FileName) {
127   if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
128     errs() << "PERF2BOLT: failed to delete temporary file " << FileName
129            << " with error " << Errc.message() << "\n";
130 }
131 }
132 
133 void DataAggregator::deleteTempFiles() {
134   for (std::string &FileName : TempFiles)
135     deleteTempFile(FileName);
136   TempFiles.clear();
137 }
138 
139 void DataAggregator::findPerfExecutable() {
140   Optional<std::string> PerfExecutable =
141       sys::Process::FindInEnvPath("PATH", "perf");
142   if (!PerfExecutable) {
143     outs() << "PERF2BOLT: No perf executable found!\n";
144     exit(1);
145   }
146   PerfPath = *PerfExecutable;
147 }
148 
149 void DataAggregator::start() {
150   outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
151 
152   // Don't launch perf for pre-aggregated files
153   if (opts::ReadPreAggregated)
154     return;
155 
156   findPerfExecutable();
157 
158   if (opts::BasicAggregation)
159     launchPerfProcess("events without LBR",
160                       MainEventsPPI,
161                       "script -F pid,event,ip",
162                       /*Wait = */false);
163   else
164     launchPerfProcess("branch events",
165                       MainEventsPPI,
166                       "script -F pid,ip,brstack",
167                       /*Wait = */false);
168 
169   // Note: we launch script for mem events regardless of the option, as the
170   //       command fails fairly fast if mem events were not collected.
171   launchPerfProcess("mem events",
172                     MemEventsPPI,
173                     "script -F pid,event,addr,ip",
174                     /*Wait = */false);
175 
176   launchPerfProcess("process events",
177                     MMapEventsPPI,
178                     "script --show-mmap-events",
179                     /*Wait = */false);
180 
181   launchPerfProcess("task events",
182                     TaskEventsPPI,
183                     "script --show-task-events",
184                     /*Wait = */false);
185 }
186 
187 void DataAggregator::abort() {
188   if (opts::ReadPreAggregated)
189     return;
190 
191   std::string Error;
192 
193   // Kill subprocesses in case they are not finished
194   sys::Wait(TaskEventsPPI.PI, 1, false, &Error);
195   sys::Wait(MMapEventsPPI.PI, 1, false, &Error);
196   sys::Wait(MainEventsPPI.PI, 1, false, &Error);
197   sys::Wait(MemEventsPPI.PI, 1, false, &Error);
198 
199   deleteTempFiles();
200 
201   exit(1);
202 }
203 
204 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
205                                        const char *ArgsString, bool Wait) {
206   SmallVector<StringRef, 4> Argv;
207 
208   outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
209   Argv.push_back(PerfPath.data());
210 
211   char *WritableArgsString = strdup(ArgsString);
212   char *Str = WritableArgsString;
213   do {
214     Argv.push_back(Str);
215     while (*Str && *Str != ' ')
216       ++Str;
217     if (!*Str)
218       break;
219     *Str++ = 0;
220   } while (true);
221 
222   Argv.push_back("-f");
223   Argv.push_back("-i");
224   Argv.push_back(Filename.c_str());
225 
226   if (std::error_code Errc =
227           sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
228     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
229            << " with error " << Errc.message() << "\n";
230     exit(1);
231   }
232   TempFiles.push_back(PPI.StdoutPath.data());
233 
234   if (std::error_code Errc =
235           sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
236     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
237            << " with error " << Errc.message() << "\n";
238     exit(1);
239   }
240   TempFiles.push_back(PPI.StderrPath.data());
241 
242   Optional<StringRef> Redirects[] = {
243       llvm::None,                        // Stdin
244       StringRef(PPI.StdoutPath.data()),  // Stdout
245       StringRef(PPI.StderrPath.data())}; // Stderr
246 
247   LLVM_DEBUG({
248     dbgs() << "Launching perf: ";
249     for (StringRef Arg : Argv)
250       dbgs() << Arg << " ";
251     dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
252            << "\n";
253   });
254 
255   if (Wait)
256     PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
257                                             /*envp*/ llvm::None, Redirects);
258   else
259     PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None,
260                                 Redirects);
261 
262   free(WritableArgsString);
263 }
264 
265 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
266   PerfProcessInfo BuildIDProcessInfo;
267   launchPerfProcess("buildid list",
268                     BuildIDProcessInfo,
269                     "buildid-list",
270                     /*Wait = */true);
271 
272   if (BuildIDProcessInfo.PI.ReturnCode != 0) {
273     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
274         MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
275     StringRef ErrBuf = (*MB)->getBuffer();
276 
277     errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
278            << '\n';
279     errs() << ErrBuf;
280     return;
281   }
282 
283   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
284       MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
285   if (std::error_code EC = MB.getError()) {
286     errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
287            << EC.message() << "\n";
288     return;
289   }
290 
291   FileBuf = std::move(*MB);
292   ParsingBuf = FileBuf->getBuffer();
293   if (ParsingBuf.empty()) {
294     errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
295               "data was recorded without it\n";
296     return;
297   }
298 
299   Col = 0;
300   Line = 1;
301   Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
302   if (!FileName) {
303     errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
304               "This indicates the input binary supplied for data aggregation "
305               "is not the same recorded by perf when collecting profiling "
306               "data, or there were no samples recorded for the binary. "
307               "Use -ignore-build-id option to override.\n";
308     if (!opts::IgnoreBuildID)
309       abort();
310   } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
311     errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
312     BuildIDBinaryName = std::string(*FileName);
313   } else {
314     outs() << "PERF2BOLT: matched build-id and file name\n";
315   }
316 
317   return;
318 }
319 
320 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
321   if (opts::ReadPreAggregated)
322     return true;
323 
324   Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
325   if (!FD)
326     return false;
327 
328   char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
329 
330   auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
331   Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
332       *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0);
333   if (!BytesRead || *BytesRead != 7)
334     return false;
335 
336   if (strncmp(Buf, "PERFILE", 7) == 0)
337     return true;
338   return false;
339 }
340 
341 void DataAggregator::parsePreAggregated() {
342   std::string Error;
343 
344   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
345       MemoryBuffer::getFileOrSTDIN(Filename);
346   if (std::error_code EC = MB.getError()) {
347     errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
348            << EC.message() << "\n";
349     exit(1);
350   }
351 
352   FileBuf = std::move(*MB);
353   ParsingBuf = FileBuf->getBuffer();
354   Col = 0;
355   Line = 1;
356   if (parsePreAggregatedLBRSamples()) {
357     errs() << "PERF2BOLT: failed to parse samples\n";
358     exit(1);
359   }
360 }
361 
362 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
363   outs() << "PERF2BOLT: writing data for autofdo tools...\n";
364   NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
365                      TimerGroupDesc, opts::TimeAggregator);
366 
367   std::error_code EC;
368   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
369   if (EC)
370     return EC;
371 
372   // Format:
373   // number of unique traces
374   // from_1-to_1:count_1
375   // from_2-to_2:count_2
376   // ......
377   // from_n-to_n:count_n
378   // number of unique sample addresses
379   // addr_1:count_1
380   // addr_2:count_2
381   // ......
382   // addr_n:count_n
383   // number of unique LBR entries
384   // src_1->dst_1:count_1
385   // src_2->dst_2:count_2
386   // ......
387   // src_n->dst_n:count_n
388 
389   const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
390 
391   // AutoFDO addresses are relative to the first allocated loadable program
392   // segment
393   auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
394     if (Address < FirstAllocAddress)
395       return 0;
396     return Address - FirstAllocAddress;
397   };
398 
399   OutFile << FallthroughLBRs.size() << "\n";
400   for (const auto &AggrLBR : FallthroughLBRs) {
401     const Trace &Trace = AggrLBR.first;
402     const FTInfo &Info = AggrLBR.second;
403     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-"
404             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
405             << (Info.InternCount + Info.ExternCount) << "\n";
406   }
407 
408   OutFile << BasicSamples.size() << "\n";
409   for (const auto &Sample : BasicSamples) {
410     uint64_t PC = Sample.first;
411     uint64_t HitCount = Sample.second;
412     OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n";
413   }
414 
415   OutFile << BranchLBRs.size() << "\n";
416   for (const auto &AggrLBR : BranchLBRs) {
417     const Trace &Trace = AggrLBR.first;
418     const BranchInfo &Info = AggrLBR.second;
419     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->"
420             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
421             << Info.TakenCount << "\n";
422   }
423 
424   outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
425          << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
426          << " unique branches to " << OutputFilename << "\n";
427 
428   return std::error_code();
429 }
430 
431 void DataAggregator::filterBinaryMMapInfo() {
432   if (opts::FilterPID) {
433     auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
434     if (MMapInfoIter != BinaryMMapInfo.end()) {
435       MMapInfo MMap = MMapInfoIter->second;
436       BinaryMMapInfo.clear();
437       BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
438     } else {
439       if (errs().has_colors())
440         errs().changeColor(raw_ostream::RED);
441       errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
442              << opts::FilterPID << "\""
443              << " for binary \"" << BC->getFilename() << "\".";
444       assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
445       errs() << " Profile for the following process is available:\n";
446       for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
447         outs() << "  " << MMI.second.PID
448                << (MMI.second.Forked ? " (forked)\n" : "\n");
449 
450       if (errs().has_colors())
451         errs().resetColor();
452 
453       exit(1);
454     }
455   }
456 }
457 
458 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
459   this->BC = &BC;
460 
461   if (opts::ReadPreAggregated) {
462     parsePreAggregated();
463     return Error::success();
464   }
465 
466   if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) {
467     outs() << "BOLT-INFO: binary build-id is:     " << *FileBuildID << "\n";
468     processFileBuildID(*FileBuildID);
469   } else {
470     errs() << "BOLT-WARNING: build-id will not be checked because we could "
471               "not read one from input binary\n";
472   }
473 
474   auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) {
475     std::string Error;
476     outs() << "PERF2BOLT: waiting for perf " << Name
477            << " collection to finish...\n";
478     sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error);
479 
480     if (!Error.empty()) {
481       errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
482       deleteTempFiles();
483       exit(1);
484     }
485 
486     if (PI.ReturnCode != 0) {
487       ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
488           MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
489       StringRef ErrBuf = (*ErrorMB)->getBuffer();
490 
491       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
492       errs() << ErrBuf;
493       deleteTempFiles();
494       exit(1);
495     }
496 
497     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
498         MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
499     if (std::error_code EC = MB.getError()) {
500       errs() << "Cannot open " << Process.StdoutPath.data() << ": "
501              << EC.message() << "\n";
502       deleteTempFiles();
503       exit(1);
504     }
505 
506     FileBuf = std::move(*MB);
507     ParsingBuf = FileBuf->getBuffer();
508     Col = 0;
509     Line = 1;
510   };
511 
512   if (opts::LinuxKernelMode) {
513     // Current MMap parsing logic does not work with linux kernel.
514     // MMap entries for linux kernel uses PERF_RECORD_MMAP
515     // format instead of typical PERF_RECORD_MMAP2 format.
516     // Since linux kernel address mapping is absolute (same as
517     // in the ELF file), we avoid parsing MMap in linux kernel mode.
518     // While generating optimized linux kernel binary, we may need
519     // to parse MMap entries.
520 
521     // In linux kernel mode, we analyze and optimize
522     // all linux kernel binary instructions, irrespective
523     // of whether they are due to system calls or due to
524     // interrupts. Therefore, we cannot ignore interrupt
525     // in Linux kernel mode.
526     opts::IgnoreInterruptLBR = false;
527   } else {
528     prepareToParse("mmap events", MMapEventsPPI);
529     if (parseMMapEvents())
530       errs() << "PERF2BOLT: failed to parse mmap events\n";
531   }
532 
533   prepareToParse("task events", TaskEventsPPI);
534   if (parseTaskEvents())
535     errs() << "PERF2BOLT: failed to parse task events\n";
536 
537   filterBinaryMMapInfo();
538   prepareToParse("events", MainEventsPPI);
539 
540   if (opts::HeatmapMode) {
541     if (std::error_code EC = printLBRHeatMap()) {
542       errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
543       exit(1);
544     }
545     exit(0);
546   }
547 
548   if ((!opts::BasicAggregation && parseBranchEvents()) ||
549       (opts::BasicAggregation && parseBasicEvents()))
550     errs() << "PERF2BOLT: failed to parse samples\n";
551 
552   // We can finish early if the goal is just to generate data for autofdo
553   if (opts::WriteAutoFDOData) {
554     if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
555       errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
556 
557     deleteTempFiles();
558     exit(0);
559   }
560 
561   // Special handling for memory events
562   std::string Error;
563   sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error);
564   if (PI.ReturnCode != 0) {
565     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
566         MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data());
567     StringRef ErrBuf = (*MB)->getBuffer();
568 
569     deleteTempFiles();
570 
571     Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
572                  "Cannot print 'addr' field.");
573     if (!NoData.match(ErrBuf)) {
574       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
575       errs() << ErrBuf;
576       exit(1);
577     }
578     return Error::success();
579   }
580 
581   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
582       MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data());
583   if (std::error_code EC = MB.getError()) {
584     errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": "
585            << EC.message() << "\n";
586     deleteTempFiles();
587     exit(1);
588   }
589 
590   FileBuf = std::move(*MB);
591   ParsingBuf = FileBuf->getBuffer();
592   Col = 0;
593   Line = 1;
594   if (const std::error_code EC = parseMemEvents())
595     errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
596            << '\n';
597 
598   deleteTempFiles();
599 
600   return Error::success();
601 }
602 
603 Error DataAggregator::readProfile(BinaryContext &BC) {
604   processProfile(BC);
605 
606   for (auto &BFI : BC.getBinaryFunctions()) {
607     BinaryFunction &Function = BFI.second;
608     convertBranchData(Function);
609   }
610 
611   if (opts::AggregateOnly) {
612     if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
613       report_error("cannot create output data file", EC);
614   }
615 
616   return Error::success();
617 }
618 
619 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
620   return Function.hasProfileAvailable();
621 }
622 
623 void DataAggregator::processProfile(BinaryContext &BC) {
624   if (opts::ReadPreAggregated)
625     processPreAggregated();
626   else if (opts::BasicAggregation)
627     processBasicEvents();
628   else
629     processBranchEvents();
630 
631   processMemEvents();
632 
633   // Mark all functions with registered events as having a valid profile.
634   for (auto &BFI : BC.getBinaryFunctions()) {
635     BinaryFunction &BF = BFI.second;
636     if (getBranchData(BF)) {
637       const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
638                                                 : BinaryFunction::PF_LBR;
639       BF.markProfiled(Flags);
640     }
641   }
642 
643   // Release intermediate storage.
644   clear(BranchLBRs);
645   clear(FallthroughLBRs);
646   clear(AggregatedLBRs);
647   clear(BasicSamples);
648   clear(MemSamples);
649 }
650 
651 BinaryFunction *
652 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
653   if (!BC->containsAddress(Address))
654     return nullptr;
655 
656   return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
657                                                 /*UseMaxSize=*/true);
658 }
659 
660 StringRef DataAggregator::getLocationName(BinaryFunction &Func,
661                                           uint64_t Count) {
662   if (!BAT)
663     return Func.getOneName();
664 
665   const BinaryFunction *OrigFunc = &Func;
666   if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
667     NumColdSamples += Count;
668     BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
669     if (HotFunc)
670       OrigFunc = HotFunc;
671   }
672   // If it is a local function, prefer the name containing the file name where
673   // the local function was declared
674   for (StringRef AlternativeName : OrigFunc->getNames()) {
675     size_t FileNameIdx = AlternativeName.find('/');
676     // Confirm the alternative name has the pattern Symbol/FileName/1 before
677     // using it
678     if (FileNameIdx == StringRef::npos ||
679         AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
680       continue;
681     return AlternativeName;
682   }
683   return OrigFunc->getOneName();
684 }
685 
686 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
687                               uint64_t Count) {
688   auto I = NamesToSamples.find(Func.getOneName());
689   if (I == NamesToSamples.end()) {
690     bool Success;
691     StringRef LocName = getLocationName(Func, Count);
692     std::tie(I, Success) = NamesToSamples.insert(
693         std::make_pair(Func.getOneName(),
694                        FuncSampleData(LocName, FuncSampleData::ContainerTy())));
695   }
696 
697   Address -= Func.getAddress();
698   if (BAT)
699     Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false);
700 
701   I->second.bumpCount(Address, Count);
702   return true;
703 }
704 
705 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
706                                    uint64_t To, uint64_t Count,
707                                    uint64_t Mispreds) {
708   FuncBranchData *AggrData = getBranchData(Func);
709   if (!AggrData) {
710     AggrData = &NamesToBranches[Func.getOneName()];
711     AggrData->Name = getLocationName(Func, Count);
712     setBranchData(Func, AggrData);
713   }
714 
715   From -= Func.getAddress();
716   To -= Func.getAddress();
717   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName()
718                     << " @ " << Twine::utohexstr(From) << " -> "
719                     << Func.getPrintName() << " @ " << Twine::utohexstr(To)
720                     << '\n');
721   if (BAT) {
722     From = BAT->translate(Func, From, /*IsBranchSrc=*/true);
723     To = BAT->translate(Func, To, /*IsBranchSrc=*/false);
724     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
725                       << Func.getPrintName() << " @ " << Twine::utohexstr(From)
726                       << " -> " << Func.getPrintName() << " @ "
727                       << Twine::utohexstr(To) << '\n');
728   }
729 
730   AggrData->bumpBranchCount(From, To, Count, Mispreds);
731   return true;
732 }
733 
734 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
735                                    BinaryFunction *ToFunc, uint64_t From,
736                                    uint64_t To, uint64_t Count,
737                                    uint64_t Mispreds) {
738   FuncBranchData *FromAggrData = nullptr;
739   FuncBranchData *ToAggrData = nullptr;
740   StringRef SrcFunc;
741   StringRef DstFunc;
742   if (FromFunc) {
743     SrcFunc = getLocationName(*FromFunc, Count);
744     FromAggrData = getBranchData(*FromFunc);
745     if (!FromAggrData) {
746       FromAggrData = &NamesToBranches[FromFunc->getOneName()];
747       FromAggrData->Name = SrcFunc;
748       setBranchData(*FromFunc, FromAggrData);
749     }
750     From -= FromFunc->getAddress();
751     if (BAT)
752       From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true);
753 
754     recordExit(*FromFunc, From, Mispreds, Count);
755   }
756   if (ToFunc) {
757     DstFunc = getLocationName(*ToFunc, 0);
758     ToAggrData = getBranchData(*ToFunc);
759     if (!ToAggrData) {
760       ToAggrData = &NamesToBranches[ToFunc->getOneName()];
761       ToAggrData->Name = DstFunc;
762       setBranchData(*ToFunc, ToAggrData);
763     }
764     To -= ToFunc->getAddress();
765     if (BAT)
766       To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false);
767 
768     recordEntry(*ToFunc, To, Mispreds, Count);
769   }
770 
771   if (FromAggrData)
772     FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
773                                 Count, Mispreds);
774   if (ToAggrData)
775     ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
776                                Count, Mispreds);
777   return true;
778 }
779 
780 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
781                               uint64_t Mispreds) {
782   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
783   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
784   if (!FromFunc && !ToFunc)
785     return false;
786 
787   if (FromFunc == ToFunc) {
788     recordBranch(*FromFunc, From - FromFunc->getAddress(),
789                  To - FromFunc->getAddress(), Count, Mispreds);
790     return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
791   }
792 
793   return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
794 }
795 
796 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
797                              uint64_t Count) {
798   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
799   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
800   if (!FromFunc || !ToFunc) {
801     LLVM_DEBUG(
802         dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
803                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
804                << " and ending in " << ToFunc->getPrintName() << " @ "
805                << ToFunc->getPrintName() << " @ "
806                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
807     NumLongRangeTraces += Count;
808     return false;
809   }
810   if (FromFunc != ToFunc) {
811     NumInvalidTraces += Count;
812     LLVM_DEBUG(
813         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
814                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
815                << " and ending in " << ToFunc->getPrintName() << " @ "
816                << ToFunc->getPrintName() << " @ "
817                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
818     return false;
819   }
820 
821   Optional<BoltAddressTranslation::FallthroughListTy> FTs =
822       BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From)
823           : getFallthroughsInTrace(*FromFunc, First, Second, Count);
824   if (!FTs) {
825     LLVM_DEBUG(
826         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
827                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
828                << " and ending in " << ToFunc->getPrintName() << " @ "
829                << ToFunc->getPrintName() << " @ "
830                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
831     NumInvalidTraces += Count;
832     return false;
833   }
834 
835   LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
836                     << FromFunc->getPrintName() << ":"
837                     << Twine::utohexstr(First.To) << " to "
838                     << Twine::utohexstr(Second.From) << ".\n");
839   for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
840     doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
841                   Pair.second + FromFunc->getAddress(), Count, false);
842 
843   return true;
844 }
845 
846 bool DataAggregator::recordTrace(
847     BinaryFunction &BF,
848     const LBREntry &FirstLBR,
849     const LBREntry &SecondLBR,
850     uint64_t Count,
851     SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const {
852   BinaryContext &BC = BF.getBinaryContext();
853 
854   if (!BF.isSimple())
855     return false;
856 
857   assert(BF.hasCFG() && "can only record traces in CFG state");
858 
859   // Offsets of the trace within this function.
860   const uint64_t From = FirstLBR.To - BF.getAddress();
861   const uint64_t To = SecondLBR.From - BF.getAddress();
862 
863   if (From > To)
864     return false;
865 
866   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
867   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
868 
869   if (!FromBB || !ToBB)
870     return false;
871 
872   // Adjust FromBB if the first LBR is a return from the last instruction in
873   // the previous block (that instruction should be a call).
874   if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
875       !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
876     BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1];
877     if (PrevBB->getSuccessor(FromBB->getLabel())) {
878       const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
879       if (Instr && BC.MIB->isCall(*Instr))
880         FromBB = PrevBB;
881       else
882         LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
883                           << '\n');
884     } else {
885       LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
886     }
887   }
888 
889   // Fill out information for fall-through edges. The From and To could be
890   // within the same basic block, e.g. when two call instructions are in the
891   // same block. In this case we skip the processing.
892   if (FromBB == ToBB)
893     return true;
894 
895   // Process blocks in the original layout order.
896   BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()];
897   assert(BB == FromBB && "index mismatch");
898   while (BB != ToBB) {
899     BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1];
900     assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
901 
902     // Check for bad LBRs.
903     if (!BB->getSuccessor(NextBB->getLabel())) {
904       LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
905                         << "  " << FirstLBR << '\n'
906                         << "  " << SecondLBR << '\n');
907       return false;
908     }
909 
910     // Record fall-through jumps
911     BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB);
912     BI.Count += Count;
913 
914     if (Branches) {
915       const MCInst *Instr = BB->getLastNonPseudoInstr();
916       uint64_t Offset = 0;
917       if (Instr)
918         Offset = BC.MIB->getOffsetWithDefault(*Instr, 0);
919       else
920         Offset = BB->getOffset();
921 
922       Branches->emplace_back(Offset, NextBB->getOffset());
923     }
924 
925     BB = NextBB;
926   }
927 
928   return true;
929 }
930 
931 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
932 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
933                                        const LBREntry &FirstLBR,
934                                        const LBREntry &SecondLBR,
935                                        uint64_t Count) const {
936   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
937 
938   if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res))
939     return NoneType();
940 
941   return Res;
942 }
943 
944 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
945                                  uint64_t Count) const {
946   if (To > BF.getSize())
947     return false;
948 
949   if (!BF.hasProfile())
950     BF.ExecutionCount = 0;
951 
952   BinaryBasicBlock *EntryBB = nullptr;
953   if (To == 0) {
954     BF.ExecutionCount += Count;
955     if (!BF.empty())
956       EntryBB = &BF.front();
957   } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
958     if (BB->isEntryPoint())
959       EntryBB = BB;
960   }
961 
962   if (EntryBB)
963     EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
964 
965   return true;
966 }
967 
968 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
969                                 uint64_t Count) const {
970   if (!BF.isSimple() || From > BF.getSize())
971     return false;
972 
973   if (!BF.hasProfile())
974     BF.ExecutionCount = 0;
975 
976   return true;
977 }
978 
979 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
980   LBREntry Res;
981   ErrorOr<StringRef> FromStrRes = parseString('/');
982   if (std::error_code EC = FromStrRes.getError())
983     return EC;
984   StringRef OffsetStr = FromStrRes.get();
985   if (OffsetStr.getAsInteger(0, Res.From)) {
986     reportError("expected hexadecimal number with From address");
987     Diag << "Found: " << OffsetStr << "\n";
988     return make_error_code(llvm::errc::io_error);
989   }
990 
991   ErrorOr<StringRef> ToStrRes = parseString('/');
992   if (std::error_code EC = ToStrRes.getError())
993     return EC;
994   OffsetStr = ToStrRes.get();
995   if (OffsetStr.getAsInteger(0, Res.To)) {
996     reportError("expected hexadecimal number with To address");
997     Diag << "Found: " << OffsetStr << "\n";
998     return make_error_code(llvm::errc::io_error);
999   }
1000 
1001   ErrorOr<StringRef> MispredStrRes = parseString('/');
1002   if (std::error_code EC = MispredStrRes.getError())
1003     return EC;
1004   StringRef MispredStr = MispredStrRes.get();
1005   if (MispredStr.size() != 1 ||
1006       (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1007     reportError("expected single char for mispred bit");
1008     Diag << "Found: " << MispredStr << "\n";
1009     return make_error_code(llvm::errc::io_error);
1010   }
1011   Res.Mispred = MispredStr[0] == 'M';
1012 
1013   static bool MispredWarning = true;
1014   if (MispredStr[0] == '-' && MispredWarning) {
1015     errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1016     MispredWarning = false;
1017   }
1018 
1019   ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1020   if (std::error_code EC = Rest.getError())
1021     return EC;
1022   if (Rest.get().size() < 5) {
1023     reportError("expected rest of LBR entry");
1024     Diag << "Found: " << Rest.get() << "\n";
1025     return make_error_code(llvm::errc::io_error);
1026   }
1027   return Res;
1028 }
1029 
1030 bool DataAggregator::checkAndConsumeFS() {
1031   if (ParsingBuf[0] != FieldSeparator)
1032     return false;
1033 
1034   ParsingBuf = ParsingBuf.drop_front(1);
1035   Col += 1;
1036   return true;
1037 }
1038 
1039 void DataAggregator::consumeRestOfLine() {
1040   size_t LineEnd = ParsingBuf.find_first_of('\n');
1041   if (LineEnd == StringRef::npos) {
1042     ParsingBuf = StringRef();
1043     Col = 0;
1044     Line += 1;
1045     return;
1046   }
1047   ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1048   Col = 0;
1049   Line += 1;
1050 }
1051 
1052 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1053   PerfBranchSample Res;
1054 
1055   while (checkAndConsumeFS()) {
1056   }
1057 
1058   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1059   if (std::error_code EC = PIDRes.getError())
1060     return EC;
1061   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1062   if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
1063     consumeRestOfLine();
1064     return make_error_code(errc::no_such_process);
1065   }
1066 
1067   while (checkAndConsumeFS()) {
1068   }
1069 
1070   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1071   if (std::error_code EC = PCRes.getError())
1072     return EC;
1073   Res.PC = PCRes.get();
1074 
1075   if (checkAndConsumeNewLine())
1076     return Res;
1077 
1078   while (!checkAndConsumeNewLine()) {
1079     checkAndConsumeFS();
1080 
1081     ErrorOr<LBREntry> LBRRes = parseLBREntry();
1082     if (std::error_code EC = LBRRes.getError())
1083       return EC;
1084     LBREntry LBR = LBRRes.get();
1085     if (ignoreKernelInterrupt(LBR))
1086       continue;
1087     if (!BC->HasFixedLoadAddress)
1088       adjustLBR(LBR, MMapInfoIter->second);
1089     Res.LBR.push_back(LBR);
1090   }
1091 
1092   return Res;
1093 }
1094 
1095 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1096   while (checkAndConsumeFS()) {
1097   }
1098 
1099   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1100   if (std::error_code EC = PIDRes.getError())
1101     return EC;
1102 
1103   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1104   if (MMapInfoIter == BinaryMMapInfo.end()) {
1105     consumeRestOfLine();
1106     return PerfBasicSample{StringRef(), 0};
1107   }
1108 
1109   while (checkAndConsumeFS()) {
1110   }
1111 
1112   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1113   if (std::error_code EC = Event.getError())
1114     return EC;
1115 
1116   while (checkAndConsumeFS()) {
1117   }
1118 
1119   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1120   if (std::error_code EC = AddrRes.getError())
1121     return EC;
1122 
1123   if (!checkAndConsumeNewLine()) {
1124     reportError("expected end of line");
1125     return make_error_code(llvm::errc::io_error);
1126   }
1127 
1128   uint64_t Address = *AddrRes;
1129   if (!BC->HasFixedLoadAddress)
1130     adjustAddress(Address, MMapInfoIter->second);
1131 
1132   return PerfBasicSample{Event.get(), Address};
1133 }
1134 
1135 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1136   PerfMemSample Res{0, 0};
1137 
1138   while (checkAndConsumeFS()) {
1139   }
1140 
1141   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1142   if (std::error_code EC = PIDRes.getError())
1143     return EC;
1144 
1145   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1146   if (MMapInfoIter == BinaryMMapInfo.end()) {
1147     consumeRestOfLine();
1148     return Res;
1149   }
1150 
1151   while (checkAndConsumeFS()) {
1152   }
1153 
1154   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1155   if (std::error_code EC = Event.getError())
1156     return EC;
1157   if (Event.get().find("mem-loads") == StringRef::npos) {
1158     consumeRestOfLine();
1159     return Res;
1160   }
1161 
1162   while (checkAndConsumeFS()) {
1163   }
1164 
1165   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1166   if (std::error_code EC = AddrRes.getError())
1167     return EC;
1168 
1169   while (checkAndConsumeFS()) {
1170   }
1171 
1172   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1173   if (std::error_code EC = PCRes.getError()) {
1174     consumeRestOfLine();
1175     return EC;
1176   }
1177 
1178   if (!checkAndConsumeNewLine()) {
1179     reportError("expected end of line");
1180     return make_error_code(llvm::errc::io_error);
1181   }
1182 
1183   uint64_t Address = *AddrRes;
1184   if (!BC->HasFixedLoadAddress)
1185     adjustAddress(Address, MMapInfoIter->second);
1186 
1187   return PerfMemSample{PCRes.get(), Address};
1188 }
1189 
1190 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1191   auto parseOffset = [this]() -> ErrorOr<Location> {
1192     ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1193     if (std::error_code EC = Res.getError())
1194       return EC;
1195     return Location(Res.get());
1196   };
1197 
1198   size_t Sep = ParsingBuf.find_first_of(" \n");
1199   if (Sep == StringRef::npos)
1200     return parseOffset();
1201   StringRef LookAhead = ParsingBuf.substr(0, Sep);
1202   if (LookAhead.find_first_of(":") == StringRef::npos)
1203     return parseOffset();
1204 
1205   ErrorOr<StringRef> BuildID = parseString(':');
1206   if (std::error_code EC = BuildID.getError())
1207     return EC;
1208   ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1209   if (std::error_code EC = Offset.getError())
1210     return EC;
1211   return Location(true, BuildID.get(), Offset.get());
1212 }
1213 
1214 ErrorOr<DataAggregator::AggregatedLBREntry>
1215 DataAggregator::parseAggregatedLBREntry() {
1216   while (checkAndConsumeFS()) {
1217   }
1218 
1219   ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1220   if (std::error_code EC = TypeOrErr.getError())
1221     return EC;
1222   auto Type = AggregatedLBREntry::BRANCH;
1223   if (TypeOrErr.get() == "B") {
1224     Type = AggregatedLBREntry::BRANCH;
1225   } else if (TypeOrErr.get() == "F") {
1226     Type = AggregatedLBREntry::FT;
1227   } else if (TypeOrErr.get() == "f") {
1228     Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1229   } else {
1230     reportError("expected B, F or f");
1231     return make_error_code(llvm::errc::io_error);
1232   }
1233 
1234   while (checkAndConsumeFS()) {
1235   }
1236   ErrorOr<Location> From = parseLocationOrOffset();
1237   if (std::error_code EC = From.getError())
1238     return EC;
1239 
1240   while (checkAndConsumeFS()) {
1241   }
1242   ErrorOr<Location> To = parseLocationOrOffset();
1243   if (std::error_code EC = To.getError())
1244     return EC;
1245 
1246   while (checkAndConsumeFS()) {
1247   }
1248   ErrorOr<int64_t> Frequency =
1249       parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1250   if (std::error_code EC = Frequency.getError())
1251     return EC;
1252 
1253   uint64_t Mispreds = 0;
1254   if (Type == AggregatedLBREntry::BRANCH) {
1255     while (checkAndConsumeFS()) {
1256     }
1257     ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1258     if (std::error_code EC = MispredsOrErr.getError())
1259       return EC;
1260     Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1261   }
1262 
1263   if (!checkAndConsumeNewLine()) {
1264     reportError("expected end of line");
1265     return make_error_code(llvm::errc::io_error);
1266   }
1267 
1268   return AggregatedLBREntry{From.get(), To.get(),
1269                             static_cast<uint64_t>(Frequency.get()), Mispreds,
1270                             Type};
1271 }
1272 
1273 bool DataAggregator::hasData() {
1274   if (ParsingBuf.size() == 0)
1275     return false;
1276 
1277   return true;
1278 }
1279 
1280 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1281   return opts::IgnoreInterruptLBR &&
1282          (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1283 }
1284 
1285 std::error_code DataAggregator::printLBRHeatMap() {
1286   outs() << "PERF2BOLT: parse branch events...\n";
1287   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1288                      TimerGroupDesc, opts::TimeAggregator);
1289 
1290   if (opts::LinuxKernelMode) {
1291     opts::HeatmapMaxAddress = 0xffffffffffffffff;
1292     opts::HeatmapMinAddress = KernelBaseAddr;
1293   }
1294   Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1295              opts::HeatmapMaxAddress);
1296   uint64_t NumTotalSamples = 0;
1297 
1298   while (hasData()) {
1299     ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1300     if (std::error_code EC = SampleRes.getError()) {
1301       if (EC == errc::no_such_process)
1302         continue;
1303       return EC;
1304     }
1305 
1306     PerfBranchSample &Sample = SampleRes.get();
1307 
1308     // LBRs are stored in reverse execution order. NextLBR refers to the next
1309     // executed branch record.
1310     const LBREntry *NextLBR = nullptr;
1311     for (const LBREntry &LBR : Sample.LBR) {
1312       if (NextLBR) {
1313         // Record fall-through trace.
1314         const uint64_t TraceFrom = LBR.To;
1315         const uint64_t TraceTo = NextLBR->From;
1316         ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1317       }
1318       NextLBR = &LBR;
1319     }
1320     if (!Sample.LBR.empty()) {
1321       HM.registerAddress(Sample.LBR.front().To);
1322       HM.registerAddress(Sample.LBR.back().From);
1323     }
1324     NumTotalSamples += Sample.LBR.size();
1325   }
1326 
1327   if (!NumTotalSamples) {
1328     errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1329               "Cannot build heatmap.\n";
1330     exit(1);
1331   }
1332 
1333   outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1334   outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1335 
1336   outs() << "HEATMAP: building heat map...\n";
1337 
1338   for (const auto &LBR : FallthroughLBRs) {
1339     const Trace &Trace = LBR.first;
1340     const FTInfo &Info = LBR.second;
1341     HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1342   }
1343 
1344   if (HM.getNumInvalidRanges())
1345     outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1346 
1347   if (!HM.size()) {
1348     errs() << "HEATMAP-ERROR: no valid traces registered\n";
1349     exit(1);
1350   }
1351 
1352   HM.print(opts::OutputFilename);
1353   if (opts::OutputFilename == "-")
1354     HM.printCDF(opts::OutputFilename);
1355   else
1356     HM.printCDF(opts::OutputFilename + ".csv");
1357 
1358   return std::error_code();
1359 }
1360 
1361 std::error_code DataAggregator::parseBranchEvents() {
1362   outs() << "PERF2BOLT: parse branch events...\n";
1363   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1364                      TimerGroupDesc, opts::TimeAggregator);
1365 
1366   uint64_t NumTotalSamples = 0;
1367   uint64_t NumEntries = 0;
1368   uint64_t NumSamples = 0;
1369   uint64_t NumSamplesNoLBR = 0;
1370   uint64_t NumTraces = 0;
1371   bool NeedsSkylakeFix = false;
1372 
1373   while (hasData() && NumTotalSamples < opts::MaxSamples) {
1374     ++NumTotalSamples;
1375 
1376     ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1377     if (std::error_code EC = SampleRes.getError()) {
1378       if (EC == errc::no_such_process)
1379         continue;
1380       return EC;
1381     }
1382     ++NumSamples;
1383 
1384     PerfBranchSample &Sample = SampleRes.get();
1385     if (opts::WriteAutoFDOData)
1386       ++BasicSamples[Sample.PC];
1387 
1388     if (Sample.LBR.empty()) {
1389       ++NumSamplesNoLBR;
1390       continue;
1391     }
1392 
1393     NumEntries += Sample.LBR.size();
1394     if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1395       errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1396       NeedsSkylakeFix = true;
1397     }
1398 
1399     // LBRs are stored in reverse execution order. NextPC refers to the next
1400     // recorded executed PC.
1401     uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1402     uint32_t NumEntry = 0;
1403     for (const LBREntry &LBR : Sample.LBR) {
1404       ++NumEntry;
1405       // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1406       // sometimes record entry 32 as an exact copy of entry 31. This will cause
1407       // us to likely record an invalid trace and generate a stale function for
1408       // BAT mode (non BAT disassembles the function and is able to ignore this
1409       // trace at aggregation time). Drop first 2 entries (last two, in
1410       // chronological order)
1411       if (NeedsSkylakeFix && NumEntry <= 2)
1412         continue;
1413       if (NextPC) {
1414         // Record fall-through trace.
1415         const uint64_t TraceFrom = LBR.To;
1416         const uint64_t TraceTo = NextPC;
1417         const BinaryFunction *TraceBF =
1418             getBinaryFunctionContainingAddress(TraceFrom);
1419         if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1420           FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1421           if (TraceBF->containsAddress(LBR.From))
1422             ++Info.InternCount;
1423           else
1424             ++Info.ExternCount;
1425         } else {
1426           if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) {
1427             LLVM_DEBUG(dbgs()
1428                        << "Invalid trace starting in "
1429                        << TraceBF->getPrintName() << " @ "
1430                        << Twine::utohexstr(TraceFrom - TraceBF->getAddress())
1431                        << " and ending @ " << Twine::utohexstr(TraceTo)
1432                        << '\n');
1433             ++NumInvalidTraces;
1434           } else {
1435             LLVM_DEBUG(dbgs()
1436                        << "Out of range trace starting in "
1437                        << (TraceBF ? TraceBF->getPrintName() : "None") << " @ "
1438                        << Twine::utohexstr(
1439                               TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1440                        << " and ending in "
1441                        << (getBinaryFunctionContainingAddress(TraceTo)
1442                                ? getBinaryFunctionContainingAddress(TraceTo)
1443                                      ->getPrintName()
1444                                : "None")
1445                        << " @ "
1446                        << Twine::utohexstr(
1447                               TraceTo -
1448                               (getBinaryFunctionContainingAddress(TraceTo)
1449                                    ? getBinaryFunctionContainingAddress(TraceTo)
1450                                          ->getAddress()
1451                                    : 0))
1452                        << '\n');
1453             ++NumLongRangeTraces;
1454           }
1455         }
1456         ++NumTraces;
1457       }
1458       NextPC = LBR.From;
1459 
1460       uint64_t From = LBR.From;
1461       if (!getBinaryFunctionContainingAddress(From))
1462         From = 0;
1463       uint64_t To = LBR.To;
1464       if (!getBinaryFunctionContainingAddress(To))
1465         To = 0;
1466       if (!From && !To)
1467         continue;
1468       BranchInfo &Info = BranchLBRs[Trace(From, To)];
1469       ++Info.TakenCount;
1470       Info.MispredCount += LBR.Mispred;
1471     }
1472   }
1473 
1474   for (const auto &LBR : BranchLBRs) {
1475     const Trace &Trace = LBR.first;
1476     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From))
1477       BF->setHasProfileAvailable();
1478     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To))
1479       BF->setHasProfileAvailable();
1480   }
1481 
1482   auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1483     OS << " (";
1484     if (OS.has_colors()) {
1485       if (Percent > T2)
1486         OS.changeColor(raw_ostream::RED);
1487       else if (Percent > T1)
1488         OS.changeColor(raw_ostream::YELLOW);
1489       else
1490         OS.changeColor(raw_ostream::GREEN);
1491     }
1492     OS << format("%.1f%%", Percent);
1493     if (OS.has_colors())
1494       OS.resetColor();
1495     OS << ")";
1496   };
1497 
1498   outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1499          << " LBR entries\n";
1500   if (NumTotalSamples) {
1501     if (NumSamples && NumSamplesNoLBR == NumSamples) {
1502       // Note: we don't know if perf2bolt is being used to parse memory samples
1503       // at this point. In this case, it is OK to parse zero LBRs.
1504       errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1505                 "LBR. Record profile with perf record -j any or run perf2bolt "
1506                 "in no-LBR mode with -nl (the performance improvement in -nl "
1507                 "mode may be limited)\n";
1508     } else {
1509       const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1510       const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1511       outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1512       printColored(outs(), PercentIgnored, 20, 50);
1513       outs() << " were ignored\n";
1514       if (PercentIgnored > 50.0f)
1515         errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1516                   "were attributed to the input binary\n";
1517     }
1518   }
1519   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1520          << NumInvalidTraces;
1521   float Perc = 0.0f;
1522   if (NumTraces > 0) {
1523     Perc = NumInvalidTraces * 100.0f / NumTraces;
1524     printColored(outs(), Perc, 5, 10);
1525   }
1526   outs() << "\n";
1527   if (Perc > 10.0f)
1528     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1529               "binary is probably not the same binary used during profiling "
1530               "collection. The generated data may be ineffective for improving "
1531               "performance.\n\n";
1532 
1533   outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1534          << NumLongRangeTraces;
1535   if (NumTraces > 0)
1536     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1537   outs() << "\n";
1538 
1539   if (NumColdSamples > 0) {
1540     const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1541     outs() << "PERF2BOLT: " << NumColdSamples
1542            << format(" (%.1f%%)", ColdSamples)
1543            << " samples recorded in cold regions of split functions.\n";
1544     if (ColdSamples > 5.0f)
1545       outs()
1546           << "WARNING: The BOLT-processed binary where samples were collected "
1547              "likely used bad data or your service observed a large shift in "
1548              "profile. You may want to audit this.\n";
1549   }
1550 
1551   return std::error_code();
1552 }
1553 
1554 void DataAggregator::processBranchEvents() {
1555   outs() << "PERF2BOLT: processing branch events...\n";
1556   NamedRegionTimer T("processBranch", "Processing branch events",
1557                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1558 
1559   for (const auto &AggrLBR : FallthroughLBRs) {
1560     const Trace &Loc = AggrLBR.first;
1561     const FTInfo &Info = AggrLBR.second;
1562     LBREntry First{Loc.From, Loc.From, false};
1563     LBREntry Second{Loc.To, Loc.To, false};
1564     if (Info.InternCount)
1565       doTrace(First, Second, Info.InternCount);
1566     if (Info.ExternCount) {
1567       First.From = 0;
1568       doTrace(First, Second, Info.ExternCount);
1569     }
1570   }
1571 
1572   for (const auto &AggrLBR : BranchLBRs) {
1573     const Trace &Loc = AggrLBR.first;
1574     const BranchInfo &Info = AggrLBR.second;
1575     doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
1576   }
1577 }
1578 
1579 std::error_code DataAggregator::parseBasicEvents() {
1580   outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1581   NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1582                      TimerGroupDesc, opts::TimeAggregator);
1583   while (hasData()) {
1584     ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1585     if (std::error_code EC = Sample.getError())
1586       return EC;
1587 
1588     if (!Sample->PC)
1589       continue;
1590 
1591     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1592       BF->setHasProfileAvailable();
1593 
1594     ++BasicSamples[Sample->PC];
1595     EventNames.insert(Sample->EventName);
1596   }
1597 
1598   return std::error_code();
1599 }
1600 
1601 void DataAggregator::processBasicEvents() {
1602   outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1603   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1604                      TimerGroupDesc, opts::TimeAggregator);
1605   uint64_t OutOfRangeSamples = 0;
1606   uint64_t NumSamples = 0;
1607   for (auto &Sample : BasicSamples) {
1608     const uint64_t PC = Sample.first;
1609     const uint64_t HitCount = Sample.second;
1610     NumSamples += HitCount;
1611     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1612     if (!Func) {
1613       OutOfRangeSamples += HitCount;
1614       continue;
1615     }
1616 
1617     doSample(*Func, PC, HitCount);
1618   }
1619   outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1620 
1621   outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1622          << OutOfRangeSamples;
1623   float Perc = 0.0f;
1624   if (NumSamples > 0) {
1625     outs() << " (";
1626     Perc = OutOfRangeSamples * 100.0f / NumSamples;
1627     if (outs().has_colors()) {
1628       if (Perc > 60.0f)
1629         outs().changeColor(raw_ostream::RED);
1630       else if (Perc > 40.0f)
1631         outs().changeColor(raw_ostream::YELLOW);
1632       else
1633         outs().changeColor(raw_ostream::GREEN);
1634     }
1635     outs() << format("%.1f%%", Perc);
1636     if (outs().has_colors())
1637       outs().resetColor();
1638     outs() << ")";
1639   }
1640   outs() << "\n";
1641   if (Perc > 80.0f)
1642     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1643               "binary is probably not the same binary used during profiling "
1644               "collection. The generated data may be ineffective for improving "
1645               "performance.\n\n";
1646 }
1647 
1648 std::error_code DataAggregator::parseMemEvents() {
1649   outs() << "PERF2BOLT: parsing memory events...\n";
1650   NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1651                      TimerGroupDesc, opts::TimeAggregator);
1652   while (hasData()) {
1653     ErrorOr<PerfMemSample> Sample = parseMemSample();
1654     if (std::error_code EC = Sample.getError())
1655       return EC;
1656 
1657     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1658       BF->setHasProfileAvailable();
1659 
1660     MemSamples.emplace_back(std::move(Sample.get()));
1661   }
1662 
1663   return std::error_code();
1664 }
1665 
1666 void DataAggregator::processMemEvents() {
1667   NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1668                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1669   for (const PerfMemSample &Sample : MemSamples) {
1670     uint64_t PC = Sample.PC;
1671     uint64_t Addr = Sample.Addr;
1672     StringRef FuncName;
1673     StringRef MemName;
1674 
1675     // Try to resolve symbol for PC
1676     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1677     if (!Func) {
1678       LLVM_DEBUG(if (PC != 0) {
1679         dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x"
1680                << Twine::utohexstr(Addr) << "\n";
1681       });
1682       continue;
1683     }
1684 
1685     FuncName = Func->getOneName();
1686     PC -= Func->getAddress();
1687 
1688     // Try to resolve symbol for memory load
1689     if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1690       MemName = BD->getName();
1691       Addr -= BD->getAddress();
1692     } else if (opts::FilterMemProfile) {
1693       // Filter out heap/stack accesses
1694       continue;
1695     }
1696 
1697     const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1698     const Location AddrLoc(!MemName.empty(), MemName, Addr);
1699 
1700     FuncMemData *MemData = &NamesToMemEvents[FuncName];
1701     setMemData(*Func, MemData);
1702     MemData->update(FuncLoc, AddrLoc);
1703     LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1704   }
1705 }
1706 
1707 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1708   outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1709   NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1710                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1711   while (hasData()) {
1712     ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1713     if (std::error_code EC = AggrEntry.getError())
1714       return EC;
1715 
1716     if (BinaryFunction *BF =
1717             getBinaryFunctionContainingAddress(AggrEntry->From.Offset))
1718       BF->setHasProfileAvailable();
1719     if (BinaryFunction *BF =
1720             getBinaryFunctionContainingAddress(AggrEntry->To.Offset))
1721       BF->setHasProfileAvailable();
1722 
1723     AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1724   }
1725 
1726   return std::error_code();
1727 }
1728 
1729 void DataAggregator::processPreAggregated() {
1730   outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1731   NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1732                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1733 
1734   uint64_t NumTraces = 0;
1735   for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1736     switch (AggrEntry.EntryType) {
1737     case AggregatedLBREntry::BRANCH:
1738       doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1739                AggrEntry.Mispreds);
1740       break;
1741     case AggregatedLBREntry::FT:
1742     case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1743       LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1744                          ? AggrEntry.From.Offset
1745                          : 0,
1746                      AggrEntry.From.Offset, false};
1747       LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1748       doTrace(First, Second, AggrEntry.Count);
1749       NumTraces += AggrEntry.Count;
1750       break;
1751     }
1752     }
1753   }
1754 
1755   outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1756          << " aggregated LBR entries\n";
1757   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1758          << NumInvalidTraces;
1759   float Perc = 0.0f;
1760   if (NumTraces > 0) {
1761     outs() << " (";
1762     Perc = NumInvalidTraces * 100.0f / NumTraces;
1763     if (outs().has_colors()) {
1764       if (Perc > 10.0f)
1765         outs().changeColor(raw_ostream::RED);
1766       else if (Perc > 5.0f)
1767         outs().changeColor(raw_ostream::YELLOW);
1768       else
1769         outs().changeColor(raw_ostream::GREEN);
1770     }
1771     outs() << format("%.1f%%", Perc);
1772     if (outs().has_colors())
1773       outs().resetColor();
1774     outs() << ")";
1775   }
1776   outs() << "\n";
1777   if (Perc > 10.0f)
1778     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1779               "binary is probably not the same binary used during profiling "
1780               "collection. The generated data may be ineffective for improving "
1781               "performance.\n\n";
1782 
1783   outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1784          << NumLongRangeTraces;
1785   if (NumTraces > 0)
1786     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1787   outs() << "\n";
1788 }
1789 
1790 Optional<int32_t> DataAggregator::parseCommExecEvent() {
1791   size_t LineEnd = ParsingBuf.find_first_of("\n");
1792   if (LineEnd == StringRef::npos) {
1793     reportError("expected rest of line");
1794     Diag << "Found: " << ParsingBuf << "\n";
1795     return NoneType();
1796   }
1797   StringRef Line = ParsingBuf.substr(0, LineEnd);
1798 
1799   size_t Pos = Line.find("PERF_RECORD_COMM exec");
1800   if (Pos == StringRef::npos)
1801     return NoneType();
1802   Line = Line.drop_front(Pos);
1803 
1804   // Line:
1805   //  PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1806   StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1807   int32_t PID;
1808   if (PIDStr.getAsInteger(10, PID)) {
1809     reportError("expected PID");
1810     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1811     return NoneType();
1812   }
1813 
1814   return PID;
1815 }
1816 
1817 namespace {
1818 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1819   const StringRef SecTimeStr = TimeStr.split('.').first;
1820   const StringRef USecTimeStr = TimeStr.split('.').second;
1821   uint64_t SecTime;
1822   uint64_t USecTime;
1823   if (SecTimeStr.getAsInteger(10, SecTime) ||
1824       USecTimeStr.getAsInteger(10, USecTime))
1825     return NoneType();
1826   return SecTime * 1000000ULL + USecTime;
1827 }
1828 }
1829 
1830 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1831   while (checkAndConsumeFS()) {
1832   }
1833 
1834   size_t LineEnd = ParsingBuf.find_first_of("\n");
1835   if (LineEnd == StringRef::npos) {
1836     reportError("expected rest of line");
1837     Diag << "Found: " << ParsingBuf << "\n";
1838     return NoneType();
1839   }
1840   StringRef Line = ParsingBuf.substr(0, LineEnd);
1841 
1842   size_t Pos = Line.find("PERF_RECORD_FORK");
1843   if (Pos == StringRef::npos) {
1844     consumeRestOfLine();
1845     return NoneType();
1846   }
1847 
1848   ForkInfo FI;
1849 
1850   const StringRef TimeStr =
1851       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1852   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1853     FI.Time = *TimeRes;
1854   }
1855 
1856   Line = Line.drop_front(Pos);
1857 
1858   // Line:
1859   //  PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1860   const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1861   if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1862     reportError("expected PID");
1863     Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1864     return NoneType();
1865   }
1866 
1867   const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1868   if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1869     reportError("expected PID");
1870     Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1871     return NoneType();
1872   }
1873 
1874   consumeRestOfLine();
1875 
1876   return FI;
1877 }
1878 
1879 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1880 DataAggregator::parseMMapEvent() {
1881   while (checkAndConsumeFS()) {
1882   }
1883 
1884   MMapInfo ParsedInfo;
1885 
1886   size_t LineEnd = ParsingBuf.find_first_of("\n");
1887   if (LineEnd == StringRef::npos) {
1888     reportError("expected rest of line");
1889     Diag << "Found: " << ParsingBuf << "\n";
1890     return make_error_code(llvm::errc::io_error);
1891   }
1892   StringRef Line = ParsingBuf.substr(0, LineEnd);
1893 
1894   size_t Pos = Line.find("PERF_RECORD_MMAP2");
1895   if (Pos == StringRef::npos) {
1896     consumeRestOfLine();
1897     return std::make_pair(StringRef(), ParsedInfo);
1898   }
1899 
1900   // Line:
1901   //   {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1902 
1903   const StringRef TimeStr =
1904       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1905   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1906     ParsedInfo.Time = *TimeRes;
1907 
1908   Line = Line.drop_front(Pos);
1909 
1910   // Line:
1911   //   PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1912 
1913   StringRef FileName = Line.rsplit(FieldSeparator).second;
1914   if (FileName.startswith("//") || FileName.startswith("[")) {
1915     consumeRestOfLine();
1916     return std::make_pair(StringRef(), ParsedInfo);
1917   }
1918   FileName = sys::path::filename(FileName);
1919 
1920   const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
1921   if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
1922     reportError("expected PID");
1923     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1924     return make_error_code(llvm::errc::io_error);
1925   }
1926 
1927   const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
1928   if (BaseAddressStr.getAsInteger(0, ParsedInfo.BaseAddress)) {
1929     reportError("expected base address");
1930     Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1931     return make_error_code(llvm::errc::io_error);
1932   }
1933 
1934   const StringRef SizeStr = Line.split('(').second.split(')').first;
1935   if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
1936     reportError("expected mmaped size");
1937     Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1938     return make_error_code(llvm::errc::io_error);
1939   }
1940 
1941   const StringRef OffsetStr =
1942       Line.split('@').second.ltrim().split(FieldSeparator).first;
1943   if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
1944     reportError("expected mmaped page-aligned offset");
1945     Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1946     return make_error_code(llvm::errc::io_error);
1947   }
1948 
1949   consumeRestOfLine();
1950 
1951   return std::make_pair(FileName, ParsedInfo);
1952 }
1953 
1954 std::error_code DataAggregator::parseMMapEvents() {
1955   outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1956   NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1957                      TimerGroupDesc, opts::TimeAggregator);
1958 
1959   std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1960   while (hasData()) {
1961     ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1962     if (std::error_code EC = FileMMapInfoRes.getError())
1963       return EC;
1964 
1965     std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1966     if (FileMMapInfo.second.PID == -1)
1967       continue;
1968 
1969     // Consider only the first mapping of the file for any given PID
1970     bool PIDExists = false;
1971     auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
1972     for (auto MI = Range.first; MI != Range.second; ++MI) {
1973       if (MI->second.PID == FileMMapInfo.second.PID) {
1974         PIDExists = true;
1975         break;
1976       }
1977     }
1978     if (PIDExists)
1979       continue;
1980 
1981     GlobalMMapInfo.insert(FileMMapInfo);
1982   }
1983 
1984   LLVM_DEBUG({
1985     dbgs() << "FileName -> mmap info:\n";
1986     for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo)
1987       dbgs() << "  " << Pair.first << " : " << Pair.second.PID << " [0x"
1988              << Twine::utohexstr(Pair.second.BaseAddress) << ", "
1989              << Twine::utohexstr(Pair.second.Size) << " @ "
1990              << Twine::utohexstr(Pair.second.Offset) << "]\n";
1991   });
1992 
1993   StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
1994   if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
1995     errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
1996            << "\" for profile matching\n";
1997     NameToUse = BuildIDBinaryName;
1998   }
1999 
2000   auto Range = GlobalMMapInfo.equal_range(NameToUse);
2001   for (auto I = Range.first; I != Range.second; ++I) {
2002     const MMapInfo &MMapInfo = I->second;
2003     if (BC->HasFixedLoadAddress && MMapInfo.BaseAddress) {
2004       // Check that the binary mapping matches one of the segments.
2005       bool MatchFound = false;
2006       for (auto &KV : BC->SegmentMapInfo) {
2007         SegmentInfo &SegInfo = KV.second;
2008         // The mapping is page-aligned and hence the BaseAddress could be
2009         // different from the segment start address. We cannot know the page
2010         // size of the mapping, but we know it should not exceed the segment
2011         // alignment value. Hence we are performing an approximate check.
2012         if (SegInfo.Address >= MMapInfo.BaseAddress &&
2013             SegInfo.Address - MMapInfo.BaseAddress < SegInfo.Alignment) {
2014           MatchFound = true;
2015           break;
2016         }
2017       }
2018       if (!MatchFound) {
2019         errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2020                << " at 0x" << Twine::utohexstr(MMapInfo.BaseAddress) << '\n';
2021         continue;
2022       }
2023     }
2024 
2025     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2026   }
2027 
2028   if (BinaryMMapInfo.empty()) {
2029     if (errs().has_colors())
2030       errs().changeColor(raw_ostream::RED);
2031     errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2032            << BC->getFilename() << "\".";
2033     if (!GlobalMMapInfo.empty()) {
2034       errs() << " Profile for the following binary name(s) is available:\n";
2035       for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2036            I = GlobalMMapInfo.upper_bound(I->first))
2037         errs() << "  " << I->first << '\n';
2038       errs() << "Please rename the input binary.\n";
2039     } else {
2040       errs() << " Failed to extract any binary name from a profile.\n";
2041     }
2042     if (errs().has_colors())
2043       errs().resetColor();
2044 
2045     exit(1);
2046   }
2047 
2048   return std::error_code();
2049 }
2050 
2051 std::error_code DataAggregator::parseTaskEvents() {
2052   outs() << "PERF2BOLT: parsing perf-script task events output\n";
2053   NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2054                      TimerGroupDesc, opts::TimeAggregator);
2055 
2056   while (hasData()) {
2057     if (Optional<int32_t> CommInfo = parseCommExecEvent()) {
2058       // Remove forked child that ran execve
2059       auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2060       if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2061         BinaryMMapInfo.erase(MMapInfoIter);
2062       consumeRestOfLine();
2063       continue;
2064     }
2065 
2066     Optional<ForkInfo> ForkInfo = parseForkEvent();
2067     if (!ForkInfo)
2068       continue;
2069 
2070     if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2071       continue;
2072 
2073     if (ForkInfo->Time == 0) {
2074       // Process was forked and mmaped before perf ran. In this case the child
2075       // should have its own mmap entry unless it was execve'd.
2076       continue;
2077     }
2078 
2079     auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2080     if (MMapInfoIter == BinaryMMapInfo.end())
2081       continue;
2082 
2083     MMapInfo MMapInfo = MMapInfoIter->second;
2084     MMapInfo.PID = ForkInfo->ChildPID;
2085     MMapInfo.Forked = true;
2086     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2087   }
2088 
2089   outs() << "PERF2BOLT: input binary is associated with "
2090          << BinaryMMapInfo.size() << " PID(s)\n";
2091 
2092   LLVM_DEBUG({
2093     for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
2094       outs() << "  " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "")
2095              << ": (0x" << Twine::utohexstr(MMI.second.BaseAddress) << ": 0x"
2096              << Twine::utohexstr(MMI.second.Size) << ")\n";
2097   });
2098 
2099   return std::error_code();
2100 }
2101 
2102 Optional<std::pair<StringRef, StringRef>>
2103 DataAggregator::parseNameBuildIDPair() {
2104   while (checkAndConsumeFS()) {
2105   }
2106 
2107   ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2108   if (std::error_code EC = BuildIDStr.getError())
2109     return NoneType();
2110 
2111   ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2112   if (std::error_code EC = NameStr.getError())
2113     return NoneType();
2114 
2115   consumeRestOfLine();
2116   return std::make_pair(NameStr.get(), BuildIDStr.get());
2117 }
2118 
2119 Optional<StringRef>
2120 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2121   while (hasData()) {
2122     Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair();
2123     if (!IDPair)
2124       return NoneType();
2125 
2126     if (IDPair->second.startswith(FileBuildID))
2127       return sys::path::filename(IDPair->first);
2128   }
2129   return NoneType();
2130 }
2131 
2132 std::error_code
2133 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2134   std::error_code EC;
2135   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2136   if (EC)
2137     return EC;
2138 
2139   bool WriteMemLocs = false;
2140 
2141   auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2142     if (WriteMemLocs)
2143       OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2144     else
2145       OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2146     OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2147             << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2148   };
2149 
2150   uint64_t BranchValues = 0;
2151   uint64_t MemValues = 0;
2152 
2153   if (BAT)
2154     OutFile << "boltedcollection\n";
2155   if (opts::BasicAggregation) {
2156     OutFile << "no_lbr";
2157     for (const StringMapEntry<NoneType> &Entry : EventNames)
2158       OutFile << " " << Entry.getKey();
2159     OutFile << "\n";
2160 
2161     for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
2162       for (const SampleInfo &SI : Func.getValue().Data) {
2163         writeLocation(SI.Loc);
2164         OutFile << SI.Hits << "\n";
2165         ++BranchValues;
2166       }
2167     }
2168   } else {
2169     for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
2170       for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) {
2171         writeLocation(BI.From);
2172         writeLocation(BI.To);
2173         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2174         ++BranchValues;
2175       }
2176       for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) {
2177         // Do not output if source is a known symbol, since this was already
2178         // accounted for in the source function
2179         if (BI.From.IsSymbol)
2180           continue;
2181         writeLocation(BI.From);
2182         writeLocation(BI.To);
2183         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2184         ++BranchValues;
2185       }
2186     }
2187 
2188     WriteMemLocs = true;
2189     for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
2190       for (const MemInfo &MemEvent : Func.getValue().Data) {
2191         writeLocation(MemEvent.Offset);
2192         writeLocation(MemEvent.Addr);
2193         OutFile << MemEvent.Count << "\n";
2194         ++MemValues;
2195       }
2196     }
2197   }
2198 
2199   outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2200          << " memory objects to " << OutputFilename << "\n";
2201 
2202   return std::error_code();
2203 }
2204 
2205 void DataAggregator::dump() const { DataReader::dump(); }
2206 
2207 void DataAggregator::dump(const LBREntry &LBR) const {
2208   Diag << "From: " << Twine::utohexstr(LBR.From)
2209        << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2210        << "\n";
2211 }
2212 
2213 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2214   Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2215   for (const LBREntry &LBR : Sample.LBR)
2216     dump(LBR);
2217 }
2218 
2219 void DataAggregator::dump(const PerfMemSample &Sample) const {
2220   Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
2221 }
2222