1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Profile/BoltAddressTranslation.h"
18 #include "bolt/Profile/Heatmap.h"
19 #include "bolt/Utils/CommandLineOpts.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/ScopeExit.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/Process.h"
26 #include "llvm/Support/Program.h"
27 #include "llvm/Support/Regex.h"
28 #include "llvm/Support/Timer.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <map>
31 #include <unordered_map>
32 #include <utility>
33 
34 #define DEBUG_TYPE "aggregator"
35 
36 using namespace llvm;
37 using namespace bolt;
38 
39 namespace opts {
40 
41 static cl::opt<bool>
42 BasicAggregation("nl",
43   cl::desc("aggregate basic samples (without LBR info)"),
44   cl::init(false),
45   cl::ZeroOrMore,
46   cl::cat(AggregatorCategory));
47 
48 static cl::opt<bool>
49 FilterMemProfile("filter-mem-profile",
50   cl::desc("if processing a memory profile, filter out stack or heap accesses "
51            "that won't be useful for BOLT to reduce profile file size"),
52   cl::init(true),
53   cl::cat(AggregatorCategory));
54 
55 static cl::opt<unsigned long long>
56 FilterPID("pid",
57   cl::desc("only use samples from process with specified PID"),
58   cl::init(0),
59   cl::Optional,
60   cl::cat(AggregatorCategory));
61 
62 static cl::opt<bool>
63 IgnoreBuildID("ignore-build-id",
64   cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
65   cl::init(false),
66   cl::cat(AggregatorCategory));
67 
68 static cl::opt<bool>
69 IgnoreInterruptLBR("ignore-interrupt-lbr",
70   cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
71   cl::init(true),
72   cl::ZeroOrMore,
73   cl::cat(AggregatorCategory));
74 
75 static cl::opt<unsigned long long>
76 MaxSamples("max-samples",
77   cl::init(-1ULL),
78   cl::desc("maximum number of samples to read from LBR profile"),
79   cl::Optional,
80   cl::Hidden,
81   cl::cat(AggregatorCategory));
82 
83 static cl::opt<bool>
84 ReadPreAggregated("pa",
85   cl::desc("skip perf and read data from a pre-aggregated file format"),
86   cl::init(false),
87   cl::ZeroOrMore,
88   cl::cat(AggregatorCategory));
89 
90 static cl::opt<bool>
91 TimeAggregator("time-aggr",
92   cl::desc("time BOLT aggregator"),
93   cl::init(false),
94   cl::ZeroOrMore,
95   cl::cat(AggregatorCategory));
96 
97 static cl::opt<bool>
98 UseEventPC("use-event-pc",
99   cl::desc("use event PC in combination with LBR sampling"),
100   cl::init(false),
101   cl::ZeroOrMore,
102   cl::cat(AggregatorCategory));
103 
104 static cl::opt<bool>
105 WriteAutoFDOData("autofdo",
106   cl::desc("generate autofdo textual data instead of bolt data"),
107   cl::init(false),
108   cl::ZeroOrMore,
109   cl::cat(AggregatorCategory));
110 
111 } // namespace opts
112 
113 namespace {
114 
115 const char TimerGroupName[] = "aggregator";
116 const char TimerGroupDesc[] = "Aggregator";
117 
118 }
119 
120 constexpr uint64_t DataAggregator::KernelBaseAddr;
121 
122 DataAggregator::~DataAggregator() { deleteTempFiles(); }
123 
124 namespace {
125 void deleteTempFile(const std::string &FileName) {
126   if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
127     errs() << "PERF2BOLT: failed to delete temporary file " << FileName
128            << " with error " << Errc.message() << "\n";
129 }
130 }
131 
132 void DataAggregator::deleteTempFiles() {
133   for (std::string &FileName : TempFiles)
134     deleteTempFile(FileName);
135   TempFiles.clear();
136 }
137 
138 void DataAggregator::findPerfExecutable() {
139   Optional<std::string> PerfExecutable =
140       sys::Process::FindInEnvPath("PATH", "perf");
141   if (!PerfExecutable) {
142     outs() << "PERF2BOLT: No perf executable found!\n";
143     exit(1);
144   }
145   PerfPath = *PerfExecutable;
146 }
147 
148 void DataAggregator::start() {
149   outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
150 
151   // Don't launch perf for pre-aggregated files
152   if (opts::ReadPreAggregated)
153     return;
154 
155   findPerfExecutable();
156 
157   if (opts::BasicAggregation)
158     launchPerfProcess("events without LBR",
159                       MainEventsPPI,
160                       "script -F pid,event,ip",
161                       /*Wait = */false);
162   else
163     launchPerfProcess("branch events",
164                       MainEventsPPI,
165                       "script -F pid,ip,brstack",
166                       /*Wait = */false);
167 
168   // Note: we launch script for mem events regardless of the option, as the
169   //       command fails fairly fast if mem events were not collected.
170   launchPerfProcess("mem events",
171                     MemEventsPPI,
172                     "script -F pid,event,addr,ip",
173                     /*Wait = */false);
174 
175   launchPerfProcess("process events",
176                     MMapEventsPPI,
177                     "script --show-mmap-events",
178                     /*Wait = */false);
179 
180   launchPerfProcess("task events",
181                     TaskEventsPPI,
182                     "script --show-task-events",
183                     /*Wait = */false);
184 }
185 
186 void DataAggregator::abort() {
187   if (opts::ReadPreAggregated)
188     return;
189 
190   std::string Error;
191 
192   // Kill subprocesses in case they are not finished
193   sys::Wait(TaskEventsPPI.PI, 1, false, &Error);
194   sys::Wait(MMapEventsPPI.PI, 1, false, &Error);
195   sys::Wait(MainEventsPPI.PI, 1, false, &Error);
196   sys::Wait(MemEventsPPI.PI, 1, false, &Error);
197 
198   deleteTempFiles();
199 
200   exit(1);
201 }
202 
203 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
204                                        const char *ArgsString, bool Wait) {
205   SmallVector<StringRef, 4> Argv;
206 
207   outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
208   Argv.push_back(PerfPath.data());
209 
210   char *WritableArgsString = strdup(ArgsString);
211   char *Str = WritableArgsString;
212   do {
213     Argv.push_back(Str);
214     while (*Str && *Str != ' ')
215       ++Str;
216     if (!*Str)
217       break;
218     *Str++ = 0;
219   } while (true);
220 
221   Argv.push_back("-f");
222   Argv.push_back("-i");
223   Argv.push_back(Filename.c_str());
224 
225   if (std::error_code Errc =
226           sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
227     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
228            << " with error " << Errc.message() << "\n";
229     exit(1);
230   }
231   TempFiles.push_back(PPI.StdoutPath.data());
232 
233   if (std::error_code Errc =
234           sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
235     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
236            << " with error " << Errc.message() << "\n";
237     exit(1);
238   }
239   TempFiles.push_back(PPI.StderrPath.data());
240 
241   Optional<StringRef> Redirects[] = {
242       llvm::None,                        // Stdin
243       StringRef(PPI.StdoutPath.data()),  // Stdout
244       StringRef(PPI.StderrPath.data())}; // Stderr
245 
246   LLVM_DEBUG({
247     dbgs() << "Launching perf: ";
248     for (StringRef Arg : Argv)
249       dbgs() << Arg << " ";
250     dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
251            << "\n";
252   });
253 
254   if (Wait)
255     PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
256                                             /*envp*/ llvm::None, Redirects);
257   else
258     PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None,
259                                 Redirects);
260 
261   free(WritableArgsString);
262 }
263 
264 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
265   PerfProcessInfo BuildIDProcessInfo;
266   launchPerfProcess("buildid list",
267                     BuildIDProcessInfo,
268                     "buildid-list",
269                     /*Wait = */true);
270 
271   if (BuildIDProcessInfo.PI.ReturnCode != 0) {
272     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
273         MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
274     StringRef ErrBuf = (*MB)->getBuffer();
275 
276     errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
277            << '\n';
278     errs() << ErrBuf;
279     return;
280   }
281 
282   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
283       MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
284   if (std::error_code EC = MB.getError()) {
285     errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
286            << EC.message() << "\n";
287     return;
288   }
289 
290   FileBuf = std::move(*MB);
291   ParsingBuf = FileBuf->getBuffer();
292   if (ParsingBuf.empty()) {
293     errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
294               "data was recorded without it\n";
295     return;
296   }
297 
298   Col = 0;
299   Line = 1;
300   Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
301   if (!FileName) {
302     errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
303               "This indicates the input binary supplied for data aggregation "
304               "is not the same recorded by perf when collecting profiling "
305               "data, or there were no samples recorded for the binary. "
306               "Use -ignore-build-id option to override.\n";
307     if (!opts::IgnoreBuildID)
308       abort();
309   } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
310     errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
311     BuildIDBinaryName = std::string(*FileName);
312   } else {
313     outs() << "PERF2BOLT: matched build-id and file name\n";
314   }
315 
316   return;
317 }
318 
319 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
320   if (opts::ReadPreAggregated)
321     return true;
322 
323   Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
324   if (!FD)
325     return false;
326 
327   char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
328 
329   auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
330   Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
331       *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0);
332   if (!BytesRead || *BytesRead != 7)
333     return false;
334 
335   if (strncmp(Buf, "PERFILE", 7) == 0)
336     return true;
337   return false;
338 }
339 
340 void DataAggregator::parsePreAggregated() {
341   std::string Error;
342 
343   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
344       MemoryBuffer::getFileOrSTDIN(Filename);
345   if (std::error_code EC = MB.getError()) {
346     errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
347            << EC.message() << "\n";
348     exit(1);
349   }
350 
351   FileBuf = std::move(*MB);
352   ParsingBuf = FileBuf->getBuffer();
353   Col = 0;
354   Line = 1;
355   if (parsePreAggregatedLBRSamples()) {
356     errs() << "PERF2BOLT: failed to parse samples\n";
357     exit(1);
358   }
359 }
360 
361 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
362   outs() << "PERF2BOLT: writing data for autofdo tools...\n";
363   NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
364                      TimerGroupDesc, opts::TimeAggregator);
365 
366   std::error_code EC;
367   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
368   if (EC)
369     return EC;
370 
371   // Format:
372   // number of unique traces
373   // from_1-to_1:count_1
374   // from_2-to_2:count_2
375   // ......
376   // from_n-to_n:count_n
377   // number of unique sample addresses
378   // addr_1:count_1
379   // addr_2:count_2
380   // ......
381   // addr_n:count_n
382   // number of unique LBR entries
383   // src_1->dst_1:count_1
384   // src_2->dst_2:count_2
385   // ......
386   // src_n->dst_n:count_n
387 
388   const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
389 
390   // AutoFDO addresses are relative to the first allocated loadable program
391   // segment
392   auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
393     if (Address < FirstAllocAddress)
394       return 0;
395     return Address - FirstAllocAddress;
396   };
397 
398   OutFile << FallthroughLBRs.size() << "\n";
399   for (const auto &AggrLBR : FallthroughLBRs) {
400     const Trace &Trace = AggrLBR.first;
401     const FTInfo &Info = AggrLBR.second;
402     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-"
403             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
404             << (Info.InternCount + Info.ExternCount) << "\n";
405   }
406 
407   OutFile << BasicSamples.size() << "\n";
408   for (const auto &Sample : BasicSamples) {
409     uint64_t PC = Sample.first;
410     uint64_t HitCount = Sample.second;
411     OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n";
412   }
413 
414   OutFile << BranchLBRs.size() << "\n";
415   for (const auto &AggrLBR : BranchLBRs) {
416     const Trace &Trace = AggrLBR.first;
417     const BranchInfo &Info = AggrLBR.second;
418     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->"
419             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
420             << Info.TakenCount << "\n";
421   }
422 
423   outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
424          << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
425          << " unique branches to " << OutputFilename << "\n";
426 
427   return std::error_code();
428 }
429 
430 void DataAggregator::filterBinaryMMapInfo() {
431   if (opts::FilterPID) {
432     auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
433     if (MMapInfoIter != BinaryMMapInfo.end()) {
434       MMapInfo MMap = MMapInfoIter->second;
435       BinaryMMapInfo.clear();
436       BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
437     } else {
438       if (errs().has_colors())
439         errs().changeColor(raw_ostream::RED);
440       errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
441              << opts::FilterPID << "\""
442              << " for binary \"" << BC->getFilename() << "\".";
443       assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
444       errs() << " Profile for the following process is available:\n";
445       for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
446         outs() << "  " << MMI.second.PID
447                << (MMI.second.Forked ? " (forked)\n" : "\n");
448 
449       if (errs().has_colors())
450         errs().resetColor();
451 
452       exit(1);
453     }
454   }
455 }
456 
457 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
458   this->BC = &BC;
459 
460   if (opts::ReadPreAggregated) {
461     parsePreAggregated();
462     return Error::success();
463   }
464 
465   if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) {
466     outs() << "BOLT-INFO: binary build-id is:     " << *FileBuildID << "\n";
467     processFileBuildID(*FileBuildID);
468   } else {
469     errs() << "BOLT-WARNING: build-id will not be checked because we could "
470               "not read one from input binary\n";
471   }
472 
473   auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) {
474     std::string Error;
475     outs() << "PERF2BOLT: waiting for perf " << Name
476            << " collection to finish...\n";
477     sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error);
478 
479     if (!Error.empty()) {
480       errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
481       deleteTempFiles();
482       exit(1);
483     }
484 
485     if (PI.ReturnCode != 0) {
486       ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
487           MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
488       StringRef ErrBuf = (*ErrorMB)->getBuffer();
489 
490       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
491       errs() << ErrBuf;
492       deleteTempFiles();
493       exit(1);
494     }
495 
496     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
497         MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
498     if (std::error_code EC = MB.getError()) {
499       errs() << "Cannot open " << Process.StdoutPath.data() << ": "
500              << EC.message() << "\n";
501       deleteTempFiles();
502       exit(1);
503     }
504 
505     FileBuf = std::move(*MB);
506     ParsingBuf = FileBuf->getBuffer();
507     Col = 0;
508     Line = 1;
509   };
510 
511   if (opts::LinuxKernelMode) {
512     // Current MMap parsing logic does not work with linux kernel.
513     // MMap entries for linux kernel uses PERF_RECORD_MMAP
514     // format instead of typical PERF_RECORD_MMAP2 format.
515     // Since linux kernel address mapping is absolute (same as
516     // in the ELF file), we avoid parsing MMap in linux kernel mode.
517     // While generating optimized linux kernel binary, we may need
518     // to parse MMap entries.
519 
520     // In linux kernel mode, we analyze and optimize
521     // all linux kernel binary instructions, irrespective
522     // of whether they are due to system calls or due to
523     // interrupts. Therefore, we cannot ignore interrupt
524     // in Linux kernel mode.
525     opts::IgnoreInterruptLBR = false;
526   } else {
527     prepareToParse("mmap events", MMapEventsPPI);
528     if (parseMMapEvents())
529       errs() << "PERF2BOLT: failed to parse mmap events\n";
530   }
531 
532   prepareToParse("task events", TaskEventsPPI);
533   if (parseTaskEvents())
534     errs() << "PERF2BOLT: failed to parse task events\n";
535 
536   filterBinaryMMapInfo();
537   prepareToParse("events", MainEventsPPI);
538 
539   if (opts::HeatmapMode) {
540     if (std::error_code EC = printLBRHeatMap()) {
541       errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
542       exit(1);
543     }
544     exit(0);
545   }
546 
547   if ((!opts::BasicAggregation && parseBranchEvents()) ||
548       (opts::BasicAggregation && parseBasicEvents()))
549     errs() << "PERF2BOLT: failed to parse samples\n";
550 
551   // We can finish early if the goal is just to generate data for autofdo
552   if (opts::WriteAutoFDOData) {
553     if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
554       errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
555 
556     deleteTempFiles();
557     exit(0);
558   }
559 
560   // Special handling for memory events
561   std::string Error;
562   sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error);
563   if (PI.ReturnCode != 0) {
564     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
565         MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data());
566     StringRef ErrBuf = (*MB)->getBuffer();
567 
568     deleteTempFiles();
569 
570     Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
571                  "Cannot print 'addr' field.");
572     if (!NoData.match(ErrBuf)) {
573       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
574       errs() << ErrBuf;
575       exit(1);
576     }
577     return Error::success();
578   }
579 
580   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
581       MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data());
582   if (std::error_code EC = MB.getError()) {
583     errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": "
584            << EC.message() << "\n";
585     deleteTempFiles();
586     exit(1);
587   }
588 
589   FileBuf = std::move(*MB);
590   ParsingBuf = FileBuf->getBuffer();
591   Col = 0;
592   Line = 1;
593   if (const std::error_code EC = parseMemEvents())
594     errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
595            << '\n';
596 
597   deleteTempFiles();
598 
599   return Error::success();
600 }
601 
602 Error DataAggregator::readProfile(BinaryContext &BC) {
603   processProfile(BC);
604 
605   for (auto &BFI : BC.getBinaryFunctions()) {
606     BinaryFunction &Function = BFI.second;
607     convertBranchData(Function);
608   }
609 
610   if (opts::AggregateOnly) {
611     if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
612       report_error("cannot create output data file", EC);
613   }
614 
615   return Error::success();
616 }
617 
618 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
619   return Function.hasProfileAvailable();
620 }
621 
622 void DataAggregator::processProfile(BinaryContext &BC) {
623   if (opts::ReadPreAggregated)
624     processPreAggregated();
625   else if (opts::BasicAggregation)
626     processBasicEvents();
627   else
628     processBranchEvents();
629 
630   processMemEvents();
631 
632   // Mark all functions with registered events as having a valid profile.
633   for (auto &BFI : BC.getBinaryFunctions()) {
634     BinaryFunction &BF = BFI.second;
635     if (getBranchData(BF)) {
636       const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
637                                                 : BinaryFunction::PF_LBR;
638       BF.markProfiled(Flags);
639     }
640   }
641 
642   // Release intermediate storage.
643   clear(BranchLBRs);
644   clear(FallthroughLBRs);
645   clear(AggregatedLBRs);
646   clear(BasicSamples);
647   clear(MemSamples);
648 }
649 
650 BinaryFunction *
651 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
652   if (!BC->containsAddress(Address))
653     return nullptr;
654 
655   return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
656                                                 /*UseMaxSize=*/true);
657 }
658 
659 StringRef DataAggregator::getLocationName(BinaryFunction &Func,
660                                           uint64_t Count) {
661   if (!BAT)
662     return Func.getOneName();
663 
664   const BinaryFunction *OrigFunc = &Func;
665   if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
666     NumColdSamples += Count;
667     BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
668     if (HotFunc)
669       OrigFunc = HotFunc;
670   }
671   // If it is a local function, prefer the name containing the file name where
672   // the local function was declared
673   for (StringRef AlternativeName : OrigFunc->getNames()) {
674     size_t FileNameIdx = AlternativeName.find('/');
675     // Confirm the alternative name has the pattern Symbol/FileName/1 before
676     // using it
677     if (FileNameIdx == StringRef::npos ||
678         AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
679       continue;
680     return AlternativeName;
681   }
682   return OrigFunc->getOneName();
683 }
684 
685 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
686                               uint64_t Count) {
687   auto I = NamesToSamples.find(Func.getOneName());
688   if (I == NamesToSamples.end()) {
689     bool Success;
690     StringRef LocName = getLocationName(Func, Count);
691     std::tie(I, Success) = NamesToSamples.insert(
692         std::make_pair(Func.getOneName(),
693                        FuncSampleData(LocName, FuncSampleData::ContainerTy())));
694   }
695 
696   Address -= Func.getAddress();
697   if (BAT)
698     Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false);
699 
700   I->second.bumpCount(Address, Count);
701   return true;
702 }
703 
704 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
705                                    uint64_t To, uint64_t Count,
706                                    uint64_t Mispreds) {
707   FuncBranchData *AggrData = getBranchData(Func);
708   if (!AggrData) {
709     AggrData = &NamesToBranches[Func.getOneName()];
710     AggrData->Name = getLocationName(Func, Count);
711     setBranchData(Func, AggrData);
712   }
713 
714   From -= Func.getAddress();
715   To -= Func.getAddress();
716   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName()
717                     << " @ " << Twine::utohexstr(From) << " -> "
718                     << Func.getPrintName() << " @ " << Twine::utohexstr(To)
719                     << '\n');
720   if (BAT) {
721     From = BAT->translate(Func, From, /*IsBranchSrc=*/true);
722     To = BAT->translate(Func, To, /*IsBranchSrc=*/false);
723     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
724                       << Func.getPrintName() << " @ " << Twine::utohexstr(From)
725                       << " -> " << Func.getPrintName() << " @ "
726                       << Twine::utohexstr(To) << '\n');
727   }
728 
729   AggrData->bumpBranchCount(From, To, Count, Mispreds);
730   return true;
731 }
732 
733 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
734                                    BinaryFunction *ToFunc, uint64_t From,
735                                    uint64_t To, uint64_t Count,
736                                    uint64_t Mispreds) {
737   FuncBranchData *FromAggrData = nullptr;
738   FuncBranchData *ToAggrData = nullptr;
739   StringRef SrcFunc;
740   StringRef DstFunc;
741   if (FromFunc) {
742     SrcFunc = getLocationName(*FromFunc, Count);
743     FromAggrData = getBranchData(*FromFunc);
744     if (!FromAggrData) {
745       FromAggrData = &NamesToBranches[FromFunc->getOneName()];
746       FromAggrData->Name = SrcFunc;
747       setBranchData(*FromFunc, FromAggrData);
748     }
749     From -= FromFunc->getAddress();
750     if (BAT)
751       From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true);
752 
753     recordExit(*FromFunc, From, Mispreds, Count);
754   }
755   if (ToFunc) {
756     DstFunc = getLocationName(*ToFunc, 0);
757     ToAggrData = getBranchData(*ToFunc);
758     if (!ToAggrData) {
759       ToAggrData = &NamesToBranches[ToFunc->getOneName()];
760       ToAggrData->Name = DstFunc;
761       setBranchData(*ToFunc, ToAggrData);
762     }
763     To -= ToFunc->getAddress();
764     if (BAT)
765       To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false);
766 
767     recordEntry(*ToFunc, To, Mispreds, Count);
768   }
769 
770   if (FromAggrData)
771     FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
772                                 Count, Mispreds);
773   if (ToAggrData)
774     ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
775                                Count, Mispreds);
776   return true;
777 }
778 
779 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
780                               uint64_t Mispreds) {
781   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
782   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
783   if (!FromFunc && !ToFunc)
784     return false;
785 
786   if (FromFunc == ToFunc) {
787     recordBranch(*FromFunc, From - FromFunc->getAddress(),
788                  To - FromFunc->getAddress(), Count, Mispreds);
789     return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
790   }
791 
792   return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
793 }
794 
795 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
796                              uint64_t Count) {
797   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
798   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
799   if (!FromFunc || !ToFunc) {
800     LLVM_DEBUG(
801         dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
802                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
803                << " and ending in " << ToFunc->getPrintName() << " @ "
804                << ToFunc->getPrintName() << " @ "
805                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
806     NumLongRangeTraces += Count;
807     return false;
808   }
809   if (FromFunc != ToFunc) {
810     NumInvalidTraces += Count;
811     LLVM_DEBUG(
812         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
813                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
814                << " and ending in " << ToFunc->getPrintName() << " @ "
815                << ToFunc->getPrintName() << " @ "
816                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
817     return false;
818   }
819 
820   Optional<BoltAddressTranslation::FallthroughListTy> FTs =
821       BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From)
822           : getFallthroughsInTrace(*FromFunc, First, Second, Count);
823   if (!FTs) {
824     LLVM_DEBUG(
825         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
826                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
827                << " and ending in " << ToFunc->getPrintName() << " @ "
828                << ToFunc->getPrintName() << " @ "
829                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
830     NumInvalidTraces += Count;
831     return false;
832   }
833 
834   LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
835                     << FromFunc->getPrintName() << ":"
836                     << Twine::utohexstr(First.To) << " to "
837                     << Twine::utohexstr(Second.From) << ".\n");
838   for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
839     doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
840                   Pair.second + FromFunc->getAddress(), Count, false);
841 
842   return true;
843 }
844 
845 bool DataAggregator::recordTrace(
846     BinaryFunction &BF,
847     const LBREntry &FirstLBR,
848     const LBREntry &SecondLBR,
849     uint64_t Count,
850     SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const {
851   BinaryContext &BC = BF.getBinaryContext();
852 
853   if (!BF.isSimple())
854     return false;
855 
856   assert(BF.hasCFG() && "can only record traces in CFG state");
857 
858   // Offsets of the trace within this function.
859   const uint64_t From = FirstLBR.To - BF.getAddress();
860   const uint64_t To = SecondLBR.From - BF.getAddress();
861 
862   if (From > To)
863     return false;
864 
865   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
866   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
867 
868   if (!FromBB || !ToBB)
869     return false;
870 
871   // Adjust FromBB if the first LBR is a return from the last instruction in
872   // the previous block (that instruction should be a call).
873   if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
874       !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
875     BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1];
876     if (PrevBB->getSuccessor(FromBB->getLabel())) {
877       const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
878       if (Instr && BC.MIB->isCall(*Instr))
879         FromBB = PrevBB;
880       else
881         LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
882                           << '\n');
883     } else {
884       LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
885     }
886   }
887 
888   // Fill out information for fall-through edges. The From and To could be
889   // within the same basic block, e.g. when two call instructions are in the
890   // same block. In this case we skip the processing.
891   if (FromBB == ToBB)
892     return true;
893 
894   // Process blocks in the original layout order.
895   BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()];
896   assert(BB == FromBB && "index mismatch");
897   while (BB != ToBB) {
898     BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1];
899     assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
900 
901     // Check for bad LBRs.
902     if (!BB->getSuccessor(NextBB->getLabel())) {
903       LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
904                         << "  " << FirstLBR << '\n'
905                         << "  " << SecondLBR << '\n');
906       return false;
907     }
908 
909     // Record fall-through jumps
910     BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB);
911     BI.Count += Count;
912 
913     if (Branches) {
914       const MCInst *Instr = BB->getLastNonPseudoInstr();
915       uint64_t Offset = 0;
916       if (Instr)
917         Offset = BC.MIB->getAnnotationWithDefault<uint32_t>(*Instr, "Offset");
918       else
919         Offset = BB->getOffset();
920 
921       Branches->emplace_back(Offset, NextBB->getOffset());
922     }
923 
924     BB = NextBB;
925   }
926 
927   return true;
928 }
929 
930 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
931 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
932                                        const LBREntry &FirstLBR,
933                                        const LBREntry &SecondLBR,
934                                        uint64_t Count) const {
935   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
936 
937   if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res))
938     return NoneType();
939 
940   return Res;
941 }
942 
943 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
944                                  uint64_t Count) const {
945   if (To > BF.getSize())
946     return false;
947 
948   if (!BF.hasProfile())
949     BF.ExecutionCount = 0;
950 
951   BinaryBasicBlock *EntryBB = nullptr;
952   if (To == 0) {
953     BF.ExecutionCount += Count;
954     if (!BF.empty())
955       EntryBB = &BF.front();
956   } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
957     if (BB->isEntryPoint())
958       EntryBB = BB;
959   }
960 
961   if (EntryBB)
962     EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
963 
964   return true;
965 }
966 
967 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
968                                 uint64_t Count) const {
969   if (!BF.isSimple() || From > BF.getSize())
970     return false;
971 
972   if (!BF.hasProfile())
973     BF.ExecutionCount = 0;
974 
975   return true;
976 }
977 
978 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
979   LBREntry Res;
980   ErrorOr<StringRef> FromStrRes = parseString('/');
981   if (std::error_code EC = FromStrRes.getError())
982     return EC;
983   StringRef OffsetStr = FromStrRes.get();
984   if (OffsetStr.getAsInteger(0, Res.From)) {
985     reportError("expected hexadecimal number with From address");
986     Diag << "Found: " << OffsetStr << "\n";
987     return make_error_code(llvm::errc::io_error);
988   }
989 
990   ErrorOr<StringRef> ToStrRes = parseString('/');
991   if (std::error_code EC = ToStrRes.getError())
992     return EC;
993   OffsetStr = ToStrRes.get();
994   if (OffsetStr.getAsInteger(0, Res.To)) {
995     reportError("expected hexadecimal number with To address");
996     Diag << "Found: " << OffsetStr << "\n";
997     return make_error_code(llvm::errc::io_error);
998   }
999 
1000   ErrorOr<StringRef> MispredStrRes = parseString('/');
1001   if (std::error_code EC = MispredStrRes.getError())
1002     return EC;
1003   StringRef MispredStr = MispredStrRes.get();
1004   if (MispredStr.size() != 1 ||
1005       (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1006     reportError("expected single char for mispred bit");
1007     Diag << "Found: " << MispredStr << "\n";
1008     return make_error_code(llvm::errc::io_error);
1009   }
1010   Res.Mispred = MispredStr[0] == 'M';
1011 
1012   static bool MispredWarning = true;
1013   if (MispredStr[0] == '-' && MispredWarning) {
1014     errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1015     MispredWarning = false;
1016   }
1017 
1018   ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1019   if (std::error_code EC = Rest.getError())
1020     return EC;
1021   if (Rest.get().size() < 5) {
1022     reportError("expected rest of LBR entry");
1023     Diag << "Found: " << Rest.get() << "\n";
1024     return make_error_code(llvm::errc::io_error);
1025   }
1026   return Res;
1027 }
1028 
1029 bool DataAggregator::checkAndConsumeFS() {
1030   if (ParsingBuf[0] != FieldSeparator)
1031     return false;
1032 
1033   ParsingBuf = ParsingBuf.drop_front(1);
1034   Col += 1;
1035   return true;
1036 }
1037 
1038 void DataAggregator::consumeRestOfLine() {
1039   size_t LineEnd = ParsingBuf.find_first_of('\n');
1040   if (LineEnd == StringRef::npos) {
1041     ParsingBuf = StringRef();
1042     Col = 0;
1043     Line += 1;
1044     return;
1045   }
1046   ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1047   Col = 0;
1048   Line += 1;
1049 }
1050 
1051 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1052   PerfBranchSample Res;
1053 
1054   while (checkAndConsumeFS()) {
1055   }
1056 
1057   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1058   if (std::error_code EC = PIDRes.getError())
1059     return EC;
1060   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1061   if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
1062     consumeRestOfLine();
1063     return make_error_code(errc::no_such_process);
1064   }
1065 
1066   while (checkAndConsumeFS()) {
1067   }
1068 
1069   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1070   if (std::error_code EC = PCRes.getError())
1071     return EC;
1072   Res.PC = PCRes.get();
1073 
1074   if (checkAndConsumeNewLine())
1075     return Res;
1076 
1077   while (!checkAndConsumeNewLine()) {
1078     checkAndConsumeFS();
1079 
1080     ErrorOr<LBREntry> LBRRes = parseLBREntry();
1081     if (std::error_code EC = LBRRes.getError())
1082       return EC;
1083     LBREntry LBR = LBRRes.get();
1084     if (ignoreKernelInterrupt(LBR))
1085       continue;
1086     if (!BC->HasFixedLoadAddress)
1087       adjustLBR(LBR, MMapInfoIter->second);
1088     Res.LBR.push_back(LBR);
1089   }
1090 
1091   return Res;
1092 }
1093 
1094 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1095   while (checkAndConsumeFS()) {
1096   }
1097 
1098   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1099   if (std::error_code EC = PIDRes.getError())
1100     return EC;
1101 
1102   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1103   if (MMapInfoIter == BinaryMMapInfo.end()) {
1104     consumeRestOfLine();
1105     return PerfBasicSample{StringRef(), 0};
1106   }
1107 
1108   while (checkAndConsumeFS()) {
1109   }
1110 
1111   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1112   if (std::error_code EC = Event.getError())
1113     return EC;
1114 
1115   while (checkAndConsumeFS()) {
1116   }
1117 
1118   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1119   if (std::error_code EC = AddrRes.getError())
1120     return EC;
1121 
1122   if (!checkAndConsumeNewLine()) {
1123     reportError("expected end of line");
1124     return make_error_code(llvm::errc::io_error);
1125   }
1126 
1127   uint64_t Address = *AddrRes;
1128   if (!BC->HasFixedLoadAddress)
1129     adjustAddress(Address, MMapInfoIter->second);
1130 
1131   return PerfBasicSample{Event.get(), Address};
1132 }
1133 
1134 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1135   PerfMemSample Res{0, 0};
1136 
1137   while (checkAndConsumeFS()) {
1138   }
1139 
1140   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1141   if (std::error_code EC = PIDRes.getError())
1142     return EC;
1143 
1144   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1145   if (MMapInfoIter == BinaryMMapInfo.end()) {
1146     consumeRestOfLine();
1147     return Res;
1148   }
1149 
1150   while (checkAndConsumeFS()) {
1151   }
1152 
1153   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1154   if (std::error_code EC = Event.getError())
1155     return EC;
1156   if (Event.get().find("mem-loads") == StringRef::npos) {
1157     consumeRestOfLine();
1158     return Res;
1159   }
1160 
1161   while (checkAndConsumeFS()) {
1162   }
1163 
1164   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1165   if (std::error_code EC = AddrRes.getError())
1166     return EC;
1167 
1168   while (checkAndConsumeFS()) {
1169   }
1170 
1171   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1172   if (std::error_code EC = PCRes.getError()) {
1173     consumeRestOfLine();
1174     return EC;
1175   }
1176 
1177   if (!checkAndConsumeNewLine()) {
1178     reportError("expected end of line");
1179     return make_error_code(llvm::errc::io_error);
1180   }
1181 
1182   uint64_t Address = *AddrRes;
1183   if (!BC->HasFixedLoadAddress)
1184     adjustAddress(Address, MMapInfoIter->second);
1185 
1186   return PerfMemSample{PCRes.get(), Address};
1187 }
1188 
1189 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1190   auto parseOffset = [this]() -> ErrorOr<Location> {
1191     ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1192     if (std::error_code EC = Res.getError())
1193       return EC;
1194     return Location(Res.get());
1195   };
1196 
1197   size_t Sep = ParsingBuf.find_first_of(" \n");
1198   if (Sep == StringRef::npos)
1199     return parseOffset();
1200   StringRef LookAhead = ParsingBuf.substr(0, Sep);
1201   if (LookAhead.find_first_of(":") == StringRef::npos)
1202     return parseOffset();
1203 
1204   ErrorOr<StringRef> BuildID = parseString(':');
1205   if (std::error_code EC = BuildID.getError())
1206     return EC;
1207   ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1208   if (std::error_code EC = Offset.getError())
1209     return EC;
1210   return Location(true, BuildID.get(), Offset.get());
1211 }
1212 
1213 ErrorOr<DataAggregator::AggregatedLBREntry>
1214 DataAggregator::parseAggregatedLBREntry() {
1215   while (checkAndConsumeFS()) {
1216   }
1217 
1218   ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1219   if (std::error_code EC = TypeOrErr.getError())
1220     return EC;
1221   auto Type = AggregatedLBREntry::BRANCH;
1222   if (TypeOrErr.get() == "B") {
1223     Type = AggregatedLBREntry::BRANCH;
1224   } else if (TypeOrErr.get() == "F") {
1225     Type = AggregatedLBREntry::FT;
1226   } else if (TypeOrErr.get() == "f") {
1227     Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1228   } else {
1229     reportError("expected B, F or f");
1230     return make_error_code(llvm::errc::io_error);
1231   }
1232 
1233   while (checkAndConsumeFS()) {
1234   }
1235   ErrorOr<Location> From = parseLocationOrOffset();
1236   if (std::error_code EC = From.getError())
1237     return EC;
1238 
1239   while (checkAndConsumeFS()) {
1240   }
1241   ErrorOr<Location> To = parseLocationOrOffset();
1242   if (std::error_code EC = To.getError())
1243     return EC;
1244 
1245   while (checkAndConsumeFS()) {
1246   }
1247   ErrorOr<int64_t> Frequency =
1248       parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1249   if (std::error_code EC = Frequency.getError())
1250     return EC;
1251 
1252   uint64_t Mispreds = 0;
1253   if (Type == AggregatedLBREntry::BRANCH) {
1254     while (checkAndConsumeFS()) {
1255     }
1256     ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1257     if (std::error_code EC = MispredsOrErr.getError())
1258       return EC;
1259     Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1260   }
1261 
1262   if (!checkAndConsumeNewLine()) {
1263     reportError("expected end of line");
1264     return make_error_code(llvm::errc::io_error);
1265   }
1266 
1267   return AggregatedLBREntry{From.get(), To.get(),
1268                             static_cast<uint64_t>(Frequency.get()), Mispreds,
1269                             Type};
1270 }
1271 
1272 bool DataAggregator::hasData() {
1273   if (ParsingBuf.size() == 0)
1274     return false;
1275 
1276   return true;
1277 }
1278 
1279 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1280   return opts::IgnoreInterruptLBR &&
1281          (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1282 }
1283 
1284 std::error_code DataAggregator::printLBRHeatMap() {
1285   outs() << "PERF2BOLT: parse branch events...\n";
1286   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1287                      TimerGroupDesc, opts::TimeAggregator);
1288 
1289   if (opts::LinuxKernelMode) {
1290     opts::HeatmapMaxAddress = 0xffffffffffffffff;
1291     opts::HeatmapMinAddress = KernelBaseAddr;
1292   }
1293   Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1294              opts::HeatmapMaxAddress);
1295   uint64_t NumTotalSamples = 0;
1296 
1297   while (hasData()) {
1298     ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1299     if (std::error_code EC = SampleRes.getError()) {
1300       if (EC == errc::no_such_process)
1301         continue;
1302       return EC;
1303     }
1304 
1305     PerfBranchSample &Sample = SampleRes.get();
1306 
1307     // LBRs are stored in reverse execution order. NextLBR refers to the next
1308     // executed branch record.
1309     const LBREntry *NextLBR = nullptr;
1310     for (const LBREntry &LBR : Sample.LBR) {
1311       if (NextLBR) {
1312         // Record fall-through trace.
1313         const uint64_t TraceFrom = LBR.To;
1314         const uint64_t TraceTo = NextLBR->From;
1315         ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1316       }
1317       NextLBR = &LBR;
1318     }
1319     if (!Sample.LBR.empty()) {
1320       HM.registerAddress(Sample.LBR.front().To);
1321       HM.registerAddress(Sample.LBR.back().From);
1322     }
1323     NumTotalSamples += Sample.LBR.size();
1324   }
1325 
1326   if (!NumTotalSamples) {
1327     errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1328               "Cannot build heatmap.\n";
1329     exit(1);
1330   }
1331 
1332   outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1333   outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1334 
1335   outs() << "HEATMAP: building heat map...\n";
1336 
1337   for (const auto &LBR : FallthroughLBRs) {
1338     const Trace &Trace = LBR.first;
1339     const FTInfo &Info = LBR.second;
1340     HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1341   }
1342 
1343   if (HM.getNumInvalidRanges())
1344     outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1345 
1346   if (!HM.size()) {
1347     errs() << "HEATMAP-ERROR: no valid traces registered\n";
1348     exit(1);
1349   }
1350 
1351   HM.print(opts::HeatmapFile);
1352   if (opts::HeatmapFile == "-")
1353     HM.printCDF(opts::HeatmapFile);
1354   else
1355     HM.printCDF(opts::HeatmapFile + ".csv");
1356 
1357   return std::error_code();
1358 }
1359 
1360 std::error_code DataAggregator::parseBranchEvents() {
1361   outs() << "PERF2BOLT: parse branch events...\n";
1362   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1363                      TimerGroupDesc, opts::TimeAggregator);
1364 
1365   uint64_t NumTotalSamples = 0;
1366   uint64_t NumEntries = 0;
1367   uint64_t NumSamples = 0;
1368   uint64_t NumSamplesNoLBR = 0;
1369   uint64_t NumTraces = 0;
1370   bool NeedsSkylakeFix = false;
1371 
1372   while (hasData() && NumTotalSamples < opts::MaxSamples) {
1373     ++NumTotalSamples;
1374 
1375     ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1376     if (std::error_code EC = SampleRes.getError()) {
1377       if (EC == errc::no_such_process)
1378         continue;
1379       return EC;
1380     }
1381     ++NumSamples;
1382 
1383     PerfBranchSample &Sample = SampleRes.get();
1384     if (opts::WriteAutoFDOData)
1385       ++BasicSamples[Sample.PC];
1386 
1387     if (Sample.LBR.empty()) {
1388       ++NumSamplesNoLBR;
1389       continue;
1390     }
1391 
1392     NumEntries += Sample.LBR.size();
1393     if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1394       errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1395       NeedsSkylakeFix = true;
1396     }
1397 
1398     // LBRs are stored in reverse execution order. NextPC refers to the next
1399     // recorded executed PC.
1400     uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1401     uint32_t NumEntry = 0;
1402     for (const LBREntry &LBR : Sample.LBR) {
1403       ++NumEntry;
1404       // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1405       // sometimes record entry 32 as an exact copy of entry 31. This will cause
1406       // us to likely record an invalid trace and generate a stale function for
1407       // BAT mode (non BAT disassembles the function and is able to ignore this
1408       // trace at aggregation time). Drop first 2 entries (last two, in
1409       // chronological order)
1410       if (NeedsSkylakeFix && NumEntry <= 2)
1411         continue;
1412       if (NextPC) {
1413         // Record fall-through trace.
1414         const uint64_t TraceFrom = LBR.To;
1415         const uint64_t TraceTo = NextPC;
1416         const BinaryFunction *TraceBF =
1417             getBinaryFunctionContainingAddress(TraceFrom);
1418         if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1419           FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1420           if (TraceBF->containsAddress(LBR.From))
1421             ++Info.InternCount;
1422           else
1423             ++Info.ExternCount;
1424         } else {
1425           if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) {
1426             LLVM_DEBUG(dbgs()
1427                        << "Invalid trace starting in "
1428                        << TraceBF->getPrintName() << " @ "
1429                        << Twine::utohexstr(TraceFrom - TraceBF->getAddress())
1430                        << " and ending @ " << Twine::utohexstr(TraceTo)
1431                        << '\n');
1432             ++NumInvalidTraces;
1433           } else {
1434             LLVM_DEBUG(dbgs()
1435                        << "Out of range trace starting in "
1436                        << (TraceBF ? TraceBF->getPrintName() : "None") << " @ "
1437                        << Twine::utohexstr(
1438                               TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1439                        << " and ending in "
1440                        << (getBinaryFunctionContainingAddress(TraceTo)
1441                                ? getBinaryFunctionContainingAddress(TraceTo)
1442                                      ->getPrintName()
1443                                : "None")
1444                        << " @ "
1445                        << Twine::utohexstr(
1446                               TraceTo -
1447                               (getBinaryFunctionContainingAddress(TraceTo)
1448                                    ? getBinaryFunctionContainingAddress(TraceTo)
1449                                          ->getAddress()
1450                                    : 0))
1451                        << '\n');
1452             ++NumLongRangeTraces;
1453           }
1454         }
1455         ++NumTraces;
1456       }
1457       NextPC = LBR.From;
1458 
1459       uint64_t From = LBR.From;
1460       if (!getBinaryFunctionContainingAddress(From))
1461         From = 0;
1462       uint64_t To = LBR.To;
1463       if (!getBinaryFunctionContainingAddress(To))
1464         To = 0;
1465       if (!From && !To)
1466         continue;
1467       BranchInfo &Info = BranchLBRs[Trace(From, To)];
1468       ++Info.TakenCount;
1469       Info.MispredCount += LBR.Mispred;
1470     }
1471   }
1472 
1473   for (const auto &LBR : BranchLBRs) {
1474     const Trace &Trace = LBR.first;
1475     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From))
1476       BF->setHasProfileAvailable();
1477     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To))
1478       BF->setHasProfileAvailable();
1479   }
1480 
1481   auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1482     OS << " (";
1483     if (OS.has_colors()) {
1484       if (Percent > T2)
1485         OS.changeColor(raw_ostream::RED);
1486       else if (Percent > T1)
1487         OS.changeColor(raw_ostream::YELLOW);
1488       else
1489         OS.changeColor(raw_ostream::GREEN);
1490     }
1491     OS << format("%.1f%%", Percent);
1492     if (OS.has_colors())
1493       OS.resetColor();
1494     OS << ")";
1495   };
1496 
1497   outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1498          << " LBR entries\n";
1499   if (NumTotalSamples) {
1500     if (NumSamples && NumSamplesNoLBR == NumSamples) {
1501       // Note: we don't know if perf2bolt is being used to parse memory samples
1502       // at this point. In this case, it is OK to parse zero LBRs.
1503       errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1504                 "LBR. Record profile with perf record -j any or run perf2bolt "
1505                 "in no-LBR mode with -nl (the performance improvement in -nl "
1506                 "mode may be limited)\n";
1507     } else {
1508       const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1509       const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1510       outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1511       printColored(outs(), PercentIgnored, 20, 50);
1512       outs() << " were ignored\n";
1513       if (PercentIgnored > 50.0f)
1514         errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1515                   "were attributed to the input binary\n";
1516     }
1517   }
1518   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1519          << NumInvalidTraces;
1520   float Perc = 0.0f;
1521   if (NumTraces > 0) {
1522     Perc = NumInvalidTraces * 100.0f / NumTraces;
1523     printColored(outs(), Perc, 5, 10);
1524   }
1525   outs() << "\n";
1526   if (Perc > 10.0f)
1527     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1528               "binary is probably not the same binary used during profiling "
1529               "collection. The generated data may be ineffective for improving "
1530               "performance.\n\n";
1531 
1532   outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1533          << NumLongRangeTraces;
1534   if (NumTraces > 0)
1535     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1536   outs() << "\n";
1537 
1538   if (NumColdSamples > 0) {
1539     const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1540     outs() << "PERF2BOLT: " << NumColdSamples
1541            << format(" (%.1f%%)", ColdSamples)
1542            << " samples recorded in cold regions of split functions.\n";
1543     if (ColdSamples > 5.0f)
1544       outs()
1545           << "WARNING: The BOLT-processed binary where samples were collected "
1546              "likely used bad data or your service observed a large shift in "
1547              "profile. You may want to audit this.\n";
1548   }
1549 
1550   return std::error_code();
1551 }
1552 
1553 void DataAggregator::processBranchEvents() {
1554   outs() << "PERF2BOLT: processing branch events...\n";
1555   NamedRegionTimer T("processBranch", "Processing branch events",
1556                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1557 
1558   for (const auto &AggrLBR : FallthroughLBRs) {
1559     const Trace &Loc = AggrLBR.first;
1560     const FTInfo &Info = AggrLBR.second;
1561     LBREntry First{Loc.From, Loc.From, false};
1562     LBREntry Second{Loc.To, Loc.To, false};
1563     if (Info.InternCount)
1564       doTrace(First, Second, Info.InternCount);
1565     if (Info.ExternCount) {
1566       First.From = 0;
1567       doTrace(First, Second, Info.ExternCount);
1568     }
1569   }
1570 
1571   for (const auto &AggrLBR : BranchLBRs) {
1572     const Trace &Loc = AggrLBR.first;
1573     const BranchInfo &Info = AggrLBR.second;
1574     doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
1575   }
1576 }
1577 
1578 std::error_code DataAggregator::parseBasicEvents() {
1579   outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1580   NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1581                      TimerGroupDesc, opts::TimeAggregator);
1582   while (hasData()) {
1583     ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1584     if (std::error_code EC = Sample.getError())
1585       return EC;
1586 
1587     if (!Sample->PC)
1588       continue;
1589 
1590     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1591       BF->setHasProfileAvailable();
1592 
1593     ++BasicSamples[Sample->PC];
1594     EventNames.insert(Sample->EventName);
1595   }
1596 
1597   return std::error_code();
1598 }
1599 
1600 void DataAggregator::processBasicEvents() {
1601   outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1602   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1603                      TimerGroupDesc, opts::TimeAggregator);
1604   uint64_t OutOfRangeSamples = 0;
1605   uint64_t NumSamples = 0;
1606   for (auto &Sample : BasicSamples) {
1607     const uint64_t PC = Sample.first;
1608     const uint64_t HitCount = Sample.second;
1609     NumSamples += HitCount;
1610     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1611     if (!Func) {
1612       OutOfRangeSamples += HitCount;
1613       continue;
1614     }
1615 
1616     doSample(*Func, PC, HitCount);
1617   }
1618   outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1619 
1620   outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1621          << OutOfRangeSamples;
1622   float Perc = 0.0f;
1623   if (NumSamples > 0) {
1624     outs() << " (";
1625     Perc = OutOfRangeSamples * 100.0f / NumSamples;
1626     if (outs().has_colors()) {
1627       if (Perc > 60.0f)
1628         outs().changeColor(raw_ostream::RED);
1629       else if (Perc > 40.0f)
1630         outs().changeColor(raw_ostream::YELLOW);
1631       else
1632         outs().changeColor(raw_ostream::GREEN);
1633     }
1634     outs() << format("%.1f%%", Perc);
1635     if (outs().has_colors())
1636       outs().resetColor();
1637     outs() << ")";
1638   }
1639   outs() << "\n";
1640   if (Perc > 80.0f)
1641     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1642               "binary is probably not the same binary used during profiling "
1643               "collection. The generated data may be ineffective for improving "
1644               "performance.\n\n";
1645 }
1646 
1647 std::error_code DataAggregator::parseMemEvents() {
1648   outs() << "PERF2BOLT: parsing memory events...\n";
1649   NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1650                      TimerGroupDesc, opts::TimeAggregator);
1651   while (hasData()) {
1652     ErrorOr<PerfMemSample> Sample = parseMemSample();
1653     if (std::error_code EC = Sample.getError())
1654       return EC;
1655 
1656     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1657       BF->setHasProfileAvailable();
1658 
1659     MemSamples.emplace_back(std::move(Sample.get()));
1660   }
1661 
1662   return std::error_code();
1663 }
1664 
1665 void DataAggregator::processMemEvents() {
1666   NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1667                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1668   for (const PerfMemSample &Sample : MemSamples) {
1669     uint64_t PC = Sample.PC;
1670     uint64_t Addr = Sample.Addr;
1671     StringRef FuncName;
1672     StringRef MemName;
1673 
1674     // Try to resolve symbol for PC
1675     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1676     if (!Func) {
1677       LLVM_DEBUG(if (PC != 0) {
1678         dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x"
1679                << Twine::utohexstr(Addr) << "\n";
1680       });
1681       continue;
1682     }
1683 
1684     FuncName = Func->getOneName();
1685     PC -= Func->getAddress();
1686 
1687     // Try to resolve symbol for memory load
1688     if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1689       MemName = BD->getName();
1690       Addr -= BD->getAddress();
1691     } else if (opts::FilterMemProfile) {
1692       // Filter out heap/stack accesses
1693       continue;
1694     }
1695 
1696     const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1697     const Location AddrLoc(!MemName.empty(), MemName, Addr);
1698 
1699     FuncMemData *MemData = &NamesToMemEvents[FuncName];
1700     setMemData(*Func, MemData);
1701     MemData->update(FuncLoc, AddrLoc);
1702     LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1703   }
1704 }
1705 
1706 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1707   outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1708   NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1709                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1710   while (hasData()) {
1711     ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1712     if (std::error_code EC = AggrEntry.getError())
1713       return EC;
1714 
1715     if (BinaryFunction *BF =
1716             getBinaryFunctionContainingAddress(AggrEntry->From.Offset))
1717       BF->setHasProfileAvailable();
1718     if (BinaryFunction *BF =
1719             getBinaryFunctionContainingAddress(AggrEntry->To.Offset))
1720       BF->setHasProfileAvailable();
1721 
1722     AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1723   }
1724 
1725   return std::error_code();
1726 }
1727 
1728 void DataAggregator::processPreAggregated() {
1729   outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1730   NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1731                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1732 
1733   uint64_t NumTraces = 0;
1734   for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1735     switch (AggrEntry.EntryType) {
1736     case AggregatedLBREntry::BRANCH:
1737       doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1738                AggrEntry.Mispreds);
1739       break;
1740     case AggregatedLBREntry::FT:
1741     case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1742       LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1743                          ? AggrEntry.From.Offset
1744                          : 0,
1745                      AggrEntry.From.Offset, false};
1746       LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1747       doTrace(First, Second, AggrEntry.Count);
1748       NumTraces += AggrEntry.Count;
1749       break;
1750     }
1751     }
1752   }
1753 
1754   outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1755          << " aggregated LBR entries\n";
1756   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1757          << NumInvalidTraces;
1758   float Perc = 0.0f;
1759   if (NumTraces > 0) {
1760     outs() << " (";
1761     Perc = NumInvalidTraces * 100.0f / NumTraces;
1762     if (outs().has_colors()) {
1763       if (Perc > 10.0f)
1764         outs().changeColor(raw_ostream::RED);
1765       else if (Perc > 5.0f)
1766         outs().changeColor(raw_ostream::YELLOW);
1767       else
1768         outs().changeColor(raw_ostream::GREEN);
1769     }
1770     outs() << format("%.1f%%", Perc);
1771     if (outs().has_colors())
1772       outs().resetColor();
1773     outs() << ")";
1774   }
1775   outs() << "\n";
1776   if (Perc > 10.0f)
1777     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1778               "binary is probably not the same binary used during profiling "
1779               "collection. The generated data may be ineffective for improving "
1780               "performance.\n\n";
1781 
1782   outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1783          << NumLongRangeTraces;
1784   if (NumTraces > 0)
1785     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1786   outs() << "\n";
1787 }
1788 
1789 Optional<int32_t> DataAggregator::parseCommExecEvent() {
1790   size_t LineEnd = ParsingBuf.find_first_of("\n");
1791   if (LineEnd == StringRef::npos) {
1792     reportError("expected rest of line");
1793     Diag << "Found: " << ParsingBuf << "\n";
1794     return NoneType();
1795   }
1796   StringRef Line = ParsingBuf.substr(0, LineEnd);
1797 
1798   size_t Pos = Line.find("PERF_RECORD_COMM exec");
1799   if (Pos == StringRef::npos)
1800     return NoneType();
1801   Line = Line.drop_front(Pos);
1802 
1803   // Line:
1804   //  PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1805   StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1806   int32_t PID;
1807   if (PIDStr.getAsInteger(10, PID)) {
1808     reportError("expected PID");
1809     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1810     return NoneType();
1811   }
1812 
1813   return PID;
1814 }
1815 
1816 namespace {
1817 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1818   const StringRef SecTimeStr = TimeStr.split('.').first;
1819   const StringRef USecTimeStr = TimeStr.split('.').second;
1820   uint64_t SecTime;
1821   uint64_t USecTime;
1822   if (SecTimeStr.getAsInteger(10, SecTime) ||
1823       USecTimeStr.getAsInteger(10, USecTime))
1824     return NoneType();
1825   return SecTime * 1000000ULL + USecTime;
1826 }
1827 }
1828 
1829 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1830   while (checkAndConsumeFS()) {
1831   }
1832 
1833   size_t LineEnd = ParsingBuf.find_first_of("\n");
1834   if (LineEnd == StringRef::npos) {
1835     reportError("expected rest of line");
1836     Diag << "Found: " << ParsingBuf << "\n";
1837     return NoneType();
1838   }
1839   StringRef Line = ParsingBuf.substr(0, LineEnd);
1840 
1841   size_t Pos = Line.find("PERF_RECORD_FORK");
1842   if (Pos == StringRef::npos) {
1843     consumeRestOfLine();
1844     return NoneType();
1845   }
1846 
1847   ForkInfo FI;
1848 
1849   const StringRef TimeStr =
1850       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1851   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1852     FI.Time = *TimeRes;
1853   }
1854 
1855   Line = Line.drop_front(Pos);
1856 
1857   // Line:
1858   //  PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1859   const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1860   if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1861     reportError("expected PID");
1862     Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1863     return NoneType();
1864   }
1865 
1866   const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1867   if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1868     reportError("expected PID");
1869     Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1870     return NoneType();
1871   }
1872 
1873   consumeRestOfLine();
1874 
1875   return FI;
1876 }
1877 
1878 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1879 DataAggregator::parseMMapEvent() {
1880   while (checkAndConsumeFS()) {
1881   }
1882 
1883   MMapInfo ParsedInfo;
1884 
1885   size_t LineEnd = ParsingBuf.find_first_of("\n");
1886   if (LineEnd == StringRef::npos) {
1887     reportError("expected rest of line");
1888     Diag << "Found: " << ParsingBuf << "\n";
1889     return make_error_code(llvm::errc::io_error);
1890   }
1891   StringRef Line = ParsingBuf.substr(0, LineEnd);
1892 
1893   size_t Pos = Line.find("PERF_RECORD_MMAP2");
1894   if (Pos == StringRef::npos) {
1895     consumeRestOfLine();
1896     return std::make_pair(StringRef(), ParsedInfo);
1897   }
1898 
1899   // Line:
1900   //   {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1901 
1902   const StringRef TimeStr =
1903       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1904   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1905     ParsedInfo.Time = *TimeRes;
1906 
1907   Line = Line.drop_front(Pos);
1908 
1909   // Line:
1910   //   PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1911 
1912   StringRef FileName = Line.rsplit(FieldSeparator).second;
1913   if (FileName.startswith("//") || FileName.startswith("[")) {
1914     consumeRestOfLine();
1915     return std::make_pair(StringRef(), ParsedInfo);
1916   }
1917   FileName = sys::path::filename(FileName);
1918 
1919   const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
1920   if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
1921     reportError("expected PID");
1922     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1923     return make_error_code(llvm::errc::io_error);
1924   }
1925 
1926   const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
1927   if (BaseAddressStr.getAsInteger(0, ParsedInfo.BaseAddress)) {
1928     reportError("expected base address");
1929     Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1930     return make_error_code(llvm::errc::io_error);
1931   }
1932 
1933   const StringRef SizeStr = Line.split('(').second.split(')').first;
1934   if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
1935     reportError("expected mmaped size");
1936     Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1937     return make_error_code(llvm::errc::io_error);
1938   }
1939 
1940   const StringRef OffsetStr =
1941       Line.split('@').second.ltrim().split(FieldSeparator).first;
1942   if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
1943     reportError("expected mmaped page-aligned offset");
1944     Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1945     return make_error_code(llvm::errc::io_error);
1946   }
1947 
1948   consumeRestOfLine();
1949 
1950   return std::make_pair(FileName, ParsedInfo);
1951 }
1952 
1953 std::error_code DataAggregator::parseMMapEvents() {
1954   outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1955   NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1956                      TimerGroupDesc, opts::TimeAggregator);
1957 
1958   std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1959   while (hasData()) {
1960     ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1961     if (std::error_code EC = FileMMapInfoRes.getError())
1962       return EC;
1963 
1964     std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1965     if (FileMMapInfo.second.PID == -1)
1966       continue;
1967 
1968     // Consider only the first mapping of the file for any given PID
1969     bool PIDExists = false;
1970     auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
1971     for (auto MI = Range.first; MI != Range.second; ++MI) {
1972       if (MI->second.PID == FileMMapInfo.second.PID) {
1973         PIDExists = true;
1974         break;
1975       }
1976     }
1977     if (PIDExists)
1978       continue;
1979 
1980     GlobalMMapInfo.insert(FileMMapInfo);
1981   }
1982 
1983   LLVM_DEBUG({
1984     dbgs() << "FileName -> mmap info:\n";
1985     for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo)
1986       dbgs() << "  " << Pair.first << " : " << Pair.second.PID << " [0x"
1987              << Twine::utohexstr(Pair.second.BaseAddress) << ", "
1988              << Twine::utohexstr(Pair.second.Size) << " @ "
1989              << Twine::utohexstr(Pair.second.Offset) << "]\n";
1990   });
1991 
1992   StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
1993   if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
1994     errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
1995            << "\" for profile matching\n";
1996     NameToUse = BuildIDBinaryName;
1997   }
1998 
1999   auto Range = GlobalMMapInfo.equal_range(NameToUse);
2000   for (auto I = Range.first; I != Range.second; ++I) {
2001     const MMapInfo &MMapInfo = I->second;
2002     if (BC->HasFixedLoadAddress && MMapInfo.BaseAddress) {
2003       // Check that the binary mapping matches one of the segments.
2004       bool MatchFound = false;
2005       for (auto &KV : BC->SegmentMapInfo) {
2006         SegmentInfo &SegInfo = KV.second;
2007         // The mapping is page-aligned and hence the BaseAddress could be
2008         // different from the segment start address. We cannot know the page
2009         // size of the mapping, but we know it should not exceed the segment
2010         // alignment value. Hence we are performing an approximate check.
2011         if (SegInfo.Address >= MMapInfo.BaseAddress &&
2012             SegInfo.Address - MMapInfo.BaseAddress < SegInfo.Alignment) {
2013           MatchFound = true;
2014           break;
2015         }
2016       }
2017       if (!MatchFound) {
2018         errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2019                << " at 0x" << Twine::utohexstr(MMapInfo.BaseAddress) << '\n';
2020         continue;
2021       }
2022     }
2023 
2024     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2025   }
2026 
2027   if (BinaryMMapInfo.empty()) {
2028     if (errs().has_colors())
2029       errs().changeColor(raw_ostream::RED);
2030     errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2031            << BC->getFilename() << "\".";
2032     if (!GlobalMMapInfo.empty()) {
2033       errs() << " Profile for the following binary name(s) is available:\n";
2034       for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2035            I = GlobalMMapInfo.upper_bound(I->first))
2036         errs() << "  " << I->first << '\n';
2037       errs() << "Please rename the input binary.\n";
2038     } else {
2039       errs() << " Failed to extract any binary name from a profile.\n";
2040     }
2041     if (errs().has_colors())
2042       errs().resetColor();
2043 
2044     exit(1);
2045   }
2046 
2047   return std::error_code();
2048 }
2049 
2050 std::error_code DataAggregator::parseTaskEvents() {
2051   outs() << "PERF2BOLT: parsing perf-script task events output\n";
2052   NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2053                      TimerGroupDesc, opts::TimeAggregator);
2054 
2055   while (hasData()) {
2056     if (Optional<int32_t> CommInfo = parseCommExecEvent()) {
2057       // Remove forked child that ran execve
2058       auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2059       if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2060         BinaryMMapInfo.erase(MMapInfoIter);
2061       consumeRestOfLine();
2062       continue;
2063     }
2064 
2065     Optional<ForkInfo> ForkInfo = parseForkEvent();
2066     if (!ForkInfo)
2067       continue;
2068 
2069     if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2070       continue;
2071 
2072     if (ForkInfo->Time == 0) {
2073       // Process was forked and mmaped before perf ran. In this case the child
2074       // should have its own mmap entry unless it was execve'd.
2075       continue;
2076     }
2077 
2078     auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2079     if (MMapInfoIter == BinaryMMapInfo.end())
2080       continue;
2081 
2082     MMapInfo MMapInfo = MMapInfoIter->second;
2083     MMapInfo.PID = ForkInfo->ChildPID;
2084     MMapInfo.Forked = true;
2085     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2086   }
2087 
2088   outs() << "PERF2BOLT: input binary is associated with "
2089          << BinaryMMapInfo.size() << " PID(s)\n";
2090 
2091   LLVM_DEBUG({
2092     for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
2093       outs() << "  " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "")
2094              << ": (0x" << Twine::utohexstr(MMI.second.BaseAddress) << ": 0x"
2095              << Twine::utohexstr(MMI.second.Size) << ")\n";
2096   });
2097 
2098   return std::error_code();
2099 }
2100 
2101 Optional<std::pair<StringRef, StringRef>>
2102 DataAggregator::parseNameBuildIDPair() {
2103   while (checkAndConsumeFS()) {
2104   }
2105 
2106   ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2107   if (std::error_code EC = BuildIDStr.getError())
2108     return NoneType();
2109 
2110   ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2111   if (std::error_code EC = NameStr.getError())
2112     return NoneType();
2113 
2114   consumeRestOfLine();
2115   return std::make_pair(NameStr.get(), BuildIDStr.get());
2116 }
2117 
2118 Optional<StringRef>
2119 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2120   while (hasData()) {
2121     Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair();
2122     if (!IDPair)
2123       return NoneType();
2124 
2125     if (IDPair->second.startswith(FileBuildID))
2126       return sys::path::filename(IDPair->first);
2127   }
2128   return NoneType();
2129 }
2130 
2131 std::error_code
2132 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2133   std::error_code EC;
2134   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2135   if (EC)
2136     return EC;
2137 
2138   bool WriteMemLocs = false;
2139 
2140   auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2141     if (WriteMemLocs)
2142       OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2143     else
2144       OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2145     OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2146             << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2147   };
2148 
2149   uint64_t BranchValues = 0;
2150   uint64_t MemValues = 0;
2151 
2152   if (BAT)
2153     OutFile << "boltedcollection\n";
2154   if (opts::BasicAggregation) {
2155     OutFile << "no_lbr";
2156     for (const StringMapEntry<NoneType> &Entry : EventNames)
2157       OutFile << " " << Entry.getKey();
2158     OutFile << "\n";
2159 
2160     for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
2161       for (const SampleInfo &SI : Func.getValue().Data) {
2162         writeLocation(SI.Loc);
2163         OutFile << SI.Hits << "\n";
2164         ++BranchValues;
2165       }
2166     }
2167   } else {
2168     for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
2169       for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) {
2170         writeLocation(BI.From);
2171         writeLocation(BI.To);
2172         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2173         ++BranchValues;
2174       }
2175       for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) {
2176         // Do not output if source is a known symbol, since this was already
2177         // accounted for in the source function
2178         if (BI.From.IsSymbol)
2179           continue;
2180         writeLocation(BI.From);
2181         writeLocation(BI.To);
2182         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2183         ++BranchValues;
2184       }
2185     }
2186 
2187     WriteMemLocs = true;
2188     for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
2189       for (const MemInfo &MemEvent : Func.getValue().Data) {
2190         writeLocation(MemEvent.Offset);
2191         writeLocation(MemEvent.Addr);
2192         OutFile << MemEvent.Count << "\n";
2193         ++MemValues;
2194       }
2195     }
2196   }
2197 
2198   outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2199          << " memory objects to " << OutputFilename << "\n";
2200 
2201   return std::error_code();
2202 }
2203 
2204 void DataAggregator::dump() const { DataReader::dump(); }
2205 
2206 void DataAggregator::dump(const LBREntry &LBR) const {
2207   Diag << "From: " << Twine::utohexstr(LBR.From)
2208        << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2209        << "\n";
2210 }
2211 
2212 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2213   Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2214   for (const LBREntry &LBR : Sample.LBR)
2215     dump(LBR);
2216 }
2217 
2218 void DataAggregator::dump(const PerfMemSample &Sample) const {
2219   Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
2220 }
2221