1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Profile/BoltAddressTranslation.h"
18 #include "bolt/Profile/Heatmap.h"
19 #include "bolt/Utils/CommandLineOpts.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/ScopeExit.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/Process.h"
26 #include "llvm/Support/Program.h"
27 #include "llvm/Support/Regex.h"
28 #include "llvm/Support/Timer.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <map>
31 #include <unordered_map>
32 
33 #define DEBUG_TYPE "aggregator"
34 
35 using namespace llvm;
36 using namespace bolt;
37 
38 namespace opts {
39 
40 static cl::opt<bool>
41 BasicAggregation("nl",
42   cl::desc("aggregate basic samples (without LBR info)"),
43   cl::init(false),
44   cl::ZeroOrMore,
45   cl::cat(AggregatorCategory));
46 
47 static cl::opt<bool>
48 FilterMemProfile("filter-mem-profile",
49   cl::desc("if processing a memory profile, filter out stack or heap accesses "
50            "that won't be useful for BOLT to reduce profile file size"),
51   cl::init(true),
52   cl::cat(AggregatorCategory));
53 
54 static cl::opt<unsigned long long>
55 FilterPID("pid",
56   cl::desc("only use samples from process with specified PID"),
57   cl::init(0),
58   cl::Optional,
59   cl::cat(AggregatorCategory));
60 
61 static cl::opt<bool>
62 IgnoreBuildID("ignore-build-id",
63   cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
64   cl::init(false),
65   cl::cat(AggregatorCategory));
66 
67 static cl::opt<bool>
68 IgnoreInterruptLBR("ignore-interrupt-lbr",
69   cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
70   cl::init(true),
71   cl::ZeroOrMore,
72   cl::cat(AggregatorCategory));
73 
74 static cl::opt<unsigned long long>
75 MaxSamples("max-samples",
76   cl::init(-1ULL),
77   cl::desc("maximum number of samples to read from LBR profile"),
78   cl::Optional,
79   cl::Hidden,
80   cl::cat(AggregatorCategory));
81 
82 static cl::opt<bool>
83 ReadPreAggregated("pa",
84   cl::desc("skip perf and read data from a pre-aggregated file format"),
85   cl::init(false),
86   cl::ZeroOrMore,
87   cl::cat(AggregatorCategory));
88 
89 static cl::opt<bool>
90 TimeAggregator("time-aggr",
91   cl::desc("time BOLT aggregator"),
92   cl::init(false),
93   cl::ZeroOrMore,
94   cl::cat(AggregatorCategory));
95 
96 static cl::opt<bool>
97 UseEventPC("use-event-pc",
98   cl::desc("use event PC in combination with LBR sampling"),
99   cl::init(false),
100   cl::ZeroOrMore,
101   cl::cat(AggregatorCategory));
102 
103 static cl::opt<bool>
104 WriteAutoFDOData("autofdo",
105   cl::desc("generate autofdo textual data instead of bolt data"),
106   cl::init(false),
107   cl::ZeroOrMore,
108   cl::cat(AggregatorCategory));
109 
110 } // namespace opts
111 
112 namespace {
113 
114 const char TimerGroupName[] = "aggregator";
115 const char TimerGroupDesc[] = "Aggregator";
116 
117 }
118 
119 constexpr uint64_t DataAggregator::KernelBaseAddr;
120 
121 DataAggregator::~DataAggregator() { deleteTempFiles(); }
122 
123 namespace {
124 void deleteTempFile(const std::string &FileName) {
125   if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
126     errs() << "PERF2BOLT: failed to delete temporary file " << FileName
127            << " with error " << Errc.message() << "\n";
128 }
129 }
130 
131 void DataAggregator::deleteTempFiles() {
132   for (std::string &FileName : TempFiles)
133     deleteTempFile(FileName);
134   TempFiles.clear();
135 }
136 
137 void DataAggregator::findPerfExecutable() {
138   Optional<std::string> PerfExecutable =
139       sys::Process::FindInEnvPath("PATH", "perf");
140   if (!PerfExecutable) {
141     outs() << "PERF2BOLT: No perf executable found!\n";
142     exit(1);
143   }
144   PerfPath = *PerfExecutable;
145 }
146 
147 void DataAggregator::start() {
148   outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
149 
150   // Don't launch perf for pre-aggregated files
151   if (opts::ReadPreAggregated)
152     return;
153 
154   findPerfExecutable();
155 
156   if (opts::BasicAggregation)
157     launchPerfProcess("events without LBR",
158                       MainEventsPPI,
159                       "script -F pid,event,ip",
160                       /*Wait = */false);
161   else
162     launchPerfProcess("branch events",
163                       MainEventsPPI,
164                       "script -F pid,ip,brstack",
165                       /*Wait = */false);
166 
167   // Note: we launch script for mem events regardless of the option, as the
168   //       command fails fairly fast if mem events were not collected.
169   launchPerfProcess("mem events",
170                     MemEventsPPI,
171                     "script -F pid,event,addr,ip",
172                     /*Wait = */false);
173 
174   launchPerfProcess("process events",
175                     MMapEventsPPI,
176                     "script --show-mmap-events",
177                     /*Wait = */false);
178 
179   launchPerfProcess("task events",
180                     TaskEventsPPI,
181                     "script --show-task-events",
182                     /*Wait = */false);
183 }
184 
185 void DataAggregator::abort() {
186   if (opts::ReadPreAggregated)
187     return;
188 
189   std::string Error;
190 
191   // Kill subprocesses in case they are not finished
192   sys::Wait(TaskEventsPPI.PI, 1, false, &Error);
193   sys::Wait(MMapEventsPPI.PI, 1, false, &Error);
194   sys::Wait(MainEventsPPI.PI, 1, false, &Error);
195   sys::Wait(MemEventsPPI.PI, 1, false, &Error);
196 
197   deleteTempFiles();
198 
199   exit(1);
200 }
201 
202 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
203                                        const char *ArgsString, bool Wait) {
204   SmallVector<StringRef, 4> Argv;
205 
206   outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
207   Argv.push_back(PerfPath.data());
208 
209   char *WritableArgsString = strdup(ArgsString);
210   char *Str = WritableArgsString;
211   do {
212     Argv.push_back(Str);
213     while (*Str && *Str != ' ')
214       ++Str;
215     if (!*Str)
216       break;
217     *Str++ = 0;
218   } while (true);
219 
220   Argv.push_back("-f");
221   Argv.push_back("-i");
222   Argv.push_back(Filename.c_str());
223 
224   if (std::error_code Errc =
225           sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
226     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
227            << " with error " << Errc.message() << "\n";
228     exit(1);
229   }
230   TempFiles.push_back(PPI.StdoutPath.data());
231 
232   if (std::error_code Errc =
233           sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
234     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
235            << " with error " << Errc.message() << "\n";
236     exit(1);
237   }
238   TempFiles.push_back(PPI.StderrPath.data());
239 
240   Optional<StringRef> Redirects[] = {
241       llvm::None,                        // Stdin
242       StringRef(PPI.StdoutPath.data()),  // Stdout
243       StringRef(PPI.StderrPath.data())}; // Stderr
244 
245   LLVM_DEBUG({
246     dbgs() << "Launching perf: ";
247     for (StringRef Arg : Argv)
248       dbgs() << Arg << " ";
249     dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
250            << "\n";
251   });
252 
253   if (Wait)
254     PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
255                                             /*envp*/ llvm::None, Redirects);
256   else
257     PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None,
258                                 Redirects);
259 
260   free(WritableArgsString);
261 }
262 
263 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
264   PerfProcessInfo BuildIDProcessInfo;
265   launchPerfProcess("buildid list",
266                     BuildIDProcessInfo,
267                     "buildid-list",
268                     /*Wait = */true);
269 
270   if (BuildIDProcessInfo.PI.ReturnCode != 0) {
271     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
272         MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
273     StringRef ErrBuf = (*MB)->getBuffer();
274 
275     errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
276            << '\n';
277     errs() << ErrBuf;
278     return;
279   }
280 
281   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
282       MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
283   if (std::error_code EC = MB.getError()) {
284     errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
285            << EC.message() << "\n";
286     return;
287   }
288 
289   FileBuf.reset(MB->release());
290   ParsingBuf = FileBuf->getBuffer();
291   if (ParsingBuf.empty()) {
292     errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
293               "data was recorded without it\n";
294     return;
295   }
296 
297   Col = 0;
298   Line = 1;
299   Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
300   if (!FileName) {
301     errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
302               "This indicates the input binary supplied for data aggregation "
303               "is not the same recorded by perf when collecting profiling "
304               "data, or there were no samples recorded for the binary. "
305               "Use -ignore-build-id option to override.\n";
306     if (!opts::IgnoreBuildID)
307       abort();
308   } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
309     errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
310     BuildIDBinaryName = std::string(*FileName);
311   } else {
312     outs() << "PERF2BOLT: matched build-id and file name\n";
313   }
314 
315   return;
316 }
317 
318 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
319   if (opts::ReadPreAggregated)
320     return true;
321 
322   Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
323   if (!FD)
324     return false;
325 
326   char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
327 
328   auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
329   Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
330       *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0);
331   if (!BytesRead || *BytesRead != 7)
332     return false;
333 
334   if (strncmp(Buf, "PERFILE", 7) == 0)
335     return true;
336   return false;
337 }
338 
339 void DataAggregator::parsePreAggregated() {
340   std::string Error;
341 
342   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
343       MemoryBuffer::getFileOrSTDIN(Filename);
344   if (std::error_code EC = MB.getError()) {
345     errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
346            << EC.message() << "\n";
347     exit(1);
348   }
349 
350   FileBuf.reset(MB->release());
351   ParsingBuf = FileBuf->getBuffer();
352   Col = 0;
353   Line = 1;
354   if (parsePreAggregatedLBRSamples()) {
355     errs() << "PERF2BOLT: failed to parse samples\n";
356     exit(1);
357   }
358 }
359 
360 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
361   outs() << "PERF2BOLT: writing data for autofdo tools...\n";
362   NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
363                      TimerGroupDesc, opts::TimeAggregator);
364 
365   std::error_code EC;
366   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
367   if (EC)
368     return EC;
369 
370   // Format:
371   // number of unique traces
372   // from_1-to_1:count_1
373   // from_2-to_2:count_2
374   // ......
375   // from_n-to_n:count_n
376   // number of unique sample addresses
377   // addr_1:count_1
378   // addr_2:count_2
379   // ......
380   // addr_n:count_n
381   // number of unique LBR entries
382   // src_1->dst_1:count_1
383   // src_2->dst_2:count_2
384   // ......
385   // src_n->dst_n:count_n
386 
387   const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
388 
389   // AutoFDO addresses are relative to the first allocated loadable program
390   // segment
391   auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
392     if (Address < FirstAllocAddress)
393       return 0;
394     return Address - FirstAllocAddress;
395   };
396 
397   OutFile << FallthroughLBRs.size() << "\n";
398   for (const auto &AggrLBR : FallthroughLBRs) {
399     const Trace &Trace = AggrLBR.first;
400     const FTInfo &Info = AggrLBR.second;
401     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-"
402             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
403             << (Info.InternCount + Info.ExternCount) << "\n";
404   }
405 
406   OutFile << BasicSamples.size() << "\n";
407   for (const auto &Sample : BasicSamples) {
408     uint64_t PC = Sample.first;
409     uint64_t HitCount = Sample.second;
410     OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n";
411   }
412 
413   OutFile << BranchLBRs.size() << "\n";
414   for (const auto &AggrLBR : BranchLBRs) {
415     const Trace &Trace = AggrLBR.first;
416     const BranchInfo &Info = AggrLBR.second;
417     OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->"
418             << Twine::utohexstr(filterAddress(Trace.To)) << ":"
419             << Info.TakenCount << "\n";
420   }
421 
422   outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
423          << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
424          << " unique branches to " << OutputFilename << "\n";
425 
426   return std::error_code();
427 }
428 
429 void DataAggregator::filterBinaryMMapInfo() {
430   if (opts::FilterPID) {
431     auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
432     if (MMapInfoIter != BinaryMMapInfo.end()) {
433       MMapInfo MMap = MMapInfoIter->second;
434       BinaryMMapInfo.clear();
435       BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
436     } else {
437       if (errs().has_colors())
438         errs().changeColor(raw_ostream::RED);
439       errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
440              << opts::FilterPID << "\""
441              << " for binary \"" << BC->getFilename() << "\".";
442       assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
443       errs() << " Profile for the following process is available:\n";
444       for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
445         outs() << "  " << MMI.second.PID
446                << (MMI.second.Forked ? " (forked)\n" : "\n");
447 
448       if (errs().has_colors())
449         errs().resetColor();
450 
451       exit(1);
452     }
453   }
454 }
455 
456 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
457   this->BC = &BC;
458 
459   if (opts::ReadPreAggregated) {
460     parsePreAggregated();
461     return Error::success();
462   }
463 
464   if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) {
465     outs() << "BOLT-INFO: binary build-id is:     " << *FileBuildID << "\n";
466     processFileBuildID(*FileBuildID);
467   } else {
468     errs() << "BOLT-WARNING: build-id will not be checked because we could "
469               "not read one from input binary\n";
470   }
471 
472   auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) {
473     std::string Error;
474     outs() << "PERF2BOLT: waiting for perf " << Name
475            << " collection to finish...\n";
476     sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error);
477 
478     if (!Error.empty()) {
479       errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
480       deleteTempFiles();
481       exit(1);
482     }
483 
484     if (PI.ReturnCode != 0) {
485       ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
486           MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
487       StringRef ErrBuf = (*ErrorMB)->getBuffer();
488 
489       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
490       errs() << ErrBuf;
491       deleteTempFiles();
492       exit(1);
493     }
494 
495     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
496         MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
497     if (std::error_code EC = MB.getError()) {
498       errs() << "Cannot open " << Process.StdoutPath.data() << ": "
499              << EC.message() << "\n";
500       deleteTempFiles();
501       exit(1);
502     }
503 
504     FileBuf.reset(MB->release());
505     ParsingBuf = FileBuf->getBuffer();
506     Col = 0;
507     Line = 1;
508   };
509 
510   if (opts::LinuxKernelMode) {
511     // Current MMap parsing logic does not work with linux kernel.
512     // MMap entries for linux kernel uses PERF_RECORD_MMAP
513     // format instead of typical PERF_RECORD_MMAP2 format.
514     // Since linux kernel address mapping is absolute (same as
515     // in the ELF file), we avoid parsing MMap in linux kernel mode.
516     // While generating optimized linux kernel binary, we may need
517     // to parse MMap entries.
518 
519     // In linux kernel mode, we analyze and optimize
520     // all linux kernel binary instructions, irrespective
521     // of whether they are due to system calls or due to
522     // interrupts. Therefore, we cannot ignore interrupt
523     // in Linux kernel mode.
524     opts::IgnoreInterruptLBR = false;
525   } else {
526     prepareToParse("mmap events", MMapEventsPPI);
527     if (parseMMapEvents())
528       errs() << "PERF2BOLT: failed to parse mmap events\n";
529   }
530 
531   prepareToParse("task events", TaskEventsPPI);
532   if (parseTaskEvents())
533     errs() << "PERF2BOLT: failed to parse task events\n";
534 
535   filterBinaryMMapInfo();
536   prepareToParse("events", MainEventsPPI);
537 
538   if (opts::HeatmapMode) {
539     if (std::error_code EC = printLBRHeatMap()) {
540       errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
541       exit(1);
542     }
543     exit(0);
544   }
545 
546   if ((!opts::BasicAggregation && parseBranchEvents()) ||
547       (opts::BasicAggregation && parseBasicEvents()))
548     errs() << "PERF2BOLT: failed to parse samples\n";
549 
550   // We can finish early if the goal is just to generate data for autofdo
551   if (opts::WriteAutoFDOData) {
552     if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
553       errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
554 
555     deleteTempFiles();
556     exit(0);
557   }
558 
559   // Special handling for memory events
560   std::string Error;
561   sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error);
562   if (PI.ReturnCode != 0) {
563     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
564         MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data());
565     StringRef ErrBuf = (*MB)->getBuffer();
566 
567     deleteTempFiles();
568 
569     Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
570                  "Cannot print 'addr' field.");
571     if (!NoData.match(ErrBuf)) {
572       errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
573       errs() << ErrBuf;
574       exit(1);
575     }
576     return Error::success();
577   }
578 
579   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
580       MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data());
581   if (std::error_code EC = MB.getError()) {
582     errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": "
583            << EC.message() << "\n";
584     deleteTempFiles();
585     exit(1);
586   }
587 
588   FileBuf.reset(MB->release());
589   ParsingBuf = FileBuf->getBuffer();
590   Col = 0;
591   Line = 1;
592   if (const std::error_code EC = parseMemEvents())
593     errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
594            << '\n';
595 
596   deleteTempFiles();
597 
598   return Error::success();
599 }
600 
601 Error DataAggregator::readProfile(BinaryContext &BC) {
602   processProfile(BC);
603 
604   for (auto &BFI : BC.getBinaryFunctions()) {
605     BinaryFunction &Function = BFI.second;
606     convertBranchData(Function);
607   }
608 
609   if (opts::AggregateOnly) {
610     if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
611       report_error("cannot create output data file", EC);
612   }
613 
614   return Error::success();
615 }
616 
617 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
618   return Function.hasProfileAvailable();
619 }
620 
621 void DataAggregator::processProfile(BinaryContext &BC) {
622   if (opts::ReadPreAggregated)
623     processPreAggregated();
624   else if (opts::BasicAggregation)
625     processBasicEvents();
626   else
627     processBranchEvents();
628 
629   processMemEvents();
630 
631   // Mark all functions with registered events as having a valid profile.
632   for (auto &BFI : BC.getBinaryFunctions()) {
633     BinaryFunction &BF = BFI.second;
634     if (getBranchData(BF)) {
635       const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
636                                                 : BinaryFunction::PF_LBR;
637       BF.markProfiled(Flags);
638     }
639   }
640 
641   // Release intermediate storage.
642   clear(BranchLBRs);
643   clear(FallthroughLBRs);
644   clear(AggregatedLBRs);
645   clear(BasicSamples);
646   clear(MemSamples);
647 }
648 
649 BinaryFunction *
650 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
651   if (!BC->containsAddress(Address))
652     return nullptr;
653 
654   return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
655                                                 /*UseMaxSize=*/true);
656 }
657 
658 StringRef DataAggregator::getLocationName(BinaryFunction &Func,
659                                           uint64_t Count) {
660   if (!BAT)
661     return Func.getOneName();
662 
663   const BinaryFunction *OrigFunc = &Func;
664   if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
665     NumColdSamples += Count;
666     BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
667     if (HotFunc)
668       OrigFunc = HotFunc;
669   }
670   // If it is a local function, prefer the name containing the file name where
671   // the local function was declared
672   for (StringRef AlternativeName : OrigFunc->getNames()) {
673     size_t FileNameIdx = AlternativeName.find('/');
674     // Confirm the alternative name has the pattern Symbol/FileName/1 before
675     // using it
676     if (FileNameIdx == StringRef::npos ||
677         AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
678       continue;
679     return AlternativeName;
680   }
681   return OrigFunc->getOneName();
682 }
683 
684 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
685                               uint64_t Count) {
686   auto I = NamesToSamples.find(Func.getOneName());
687   if (I == NamesToSamples.end()) {
688     bool Success;
689     StringRef LocName = getLocationName(Func, Count);
690     std::tie(I, Success) = NamesToSamples.insert(
691         std::make_pair(Func.getOneName(),
692                        FuncSampleData(LocName, FuncSampleData::ContainerTy())));
693   }
694 
695   Address -= Func.getAddress();
696   if (BAT)
697     Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false);
698 
699   I->second.bumpCount(Address, Count);
700   return true;
701 }
702 
703 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
704                                    uint64_t To, uint64_t Count,
705                                    uint64_t Mispreds) {
706   FuncBranchData *AggrData = getBranchData(Func);
707   if (!AggrData) {
708     AggrData = &NamesToBranches[Func.getOneName()];
709     AggrData->Name = getLocationName(Func, Count);
710     setBranchData(Func, AggrData);
711   }
712 
713   From -= Func.getAddress();
714   To -= Func.getAddress();
715   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName()
716                     << " @ " << Twine::utohexstr(From) << " -> "
717                     << Func.getPrintName() << " @ " << Twine::utohexstr(To)
718                     << '\n');
719   if (BAT) {
720     From = BAT->translate(Func, From, /*IsBranchSrc=*/true);
721     To = BAT->translate(Func, To, /*IsBranchSrc=*/false);
722     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
723                       << Func.getPrintName() << " @ " << Twine::utohexstr(From)
724                       << " -> " << Func.getPrintName() << " @ "
725                       << Twine::utohexstr(To) << '\n');
726   }
727 
728   AggrData->bumpBranchCount(From, To, Count, Mispreds);
729   return true;
730 }
731 
732 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
733                                    BinaryFunction *ToFunc, uint64_t From,
734                                    uint64_t To, uint64_t Count,
735                                    uint64_t Mispreds) {
736   FuncBranchData *FromAggrData = nullptr;
737   FuncBranchData *ToAggrData = nullptr;
738   StringRef SrcFunc;
739   StringRef DstFunc;
740   if (FromFunc) {
741     SrcFunc = getLocationName(*FromFunc, Count);
742     FromAggrData = getBranchData(*FromFunc);
743     if (!FromAggrData) {
744       FromAggrData = &NamesToBranches[FromFunc->getOneName()];
745       FromAggrData->Name = SrcFunc;
746       setBranchData(*FromFunc, FromAggrData);
747     }
748     From -= FromFunc->getAddress();
749     if (BAT)
750       From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true);
751 
752     recordExit(*FromFunc, From, Mispreds, Count);
753   }
754   if (ToFunc) {
755     DstFunc = getLocationName(*ToFunc, 0);
756     ToAggrData = getBranchData(*ToFunc);
757     if (!ToAggrData) {
758       ToAggrData = &NamesToBranches[ToFunc->getOneName()];
759       ToAggrData->Name = DstFunc;
760       setBranchData(*ToFunc, ToAggrData);
761     }
762     To -= ToFunc->getAddress();
763     if (BAT)
764       To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false);
765 
766     recordEntry(*ToFunc, To, Mispreds, Count);
767   }
768 
769   if (FromAggrData)
770     FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
771                                 Count, Mispreds);
772   if (ToAggrData)
773     ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
774                                Count, Mispreds);
775   return true;
776 }
777 
778 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
779                               uint64_t Mispreds) {
780   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
781   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
782   if (!FromFunc && !ToFunc)
783     return false;
784 
785   if (FromFunc == ToFunc) {
786     recordBranch(*FromFunc, From - FromFunc->getAddress(),
787                  To - FromFunc->getAddress(), Count, Mispreds);
788     return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
789   }
790 
791   return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
792 }
793 
794 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
795                              uint64_t Count) {
796   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
797   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
798   if (!FromFunc || !ToFunc) {
799     LLVM_DEBUG(
800         dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
801                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
802                << " and ending in " << ToFunc->getPrintName() << " @ "
803                << ToFunc->getPrintName() << " @ "
804                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
805     NumLongRangeTraces += Count;
806     return false;
807   }
808   if (FromFunc != ToFunc) {
809     NumInvalidTraces += Count;
810     LLVM_DEBUG(
811         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
812                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
813                << " and ending in " << ToFunc->getPrintName() << " @ "
814                << ToFunc->getPrintName() << " @ "
815                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
816     return false;
817   }
818 
819   Optional<BoltAddressTranslation::FallthroughListTy> FTs =
820       BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From)
821           : getFallthroughsInTrace(*FromFunc, First, Second, Count);
822   if (!FTs) {
823     LLVM_DEBUG(
824         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
825                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
826                << " and ending in " << ToFunc->getPrintName() << " @ "
827                << ToFunc->getPrintName() << " @ "
828                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
829     NumInvalidTraces += Count;
830     return false;
831   }
832 
833   LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
834                     << FromFunc->getPrintName() << ":"
835                     << Twine::utohexstr(First.To) << " to "
836                     << Twine::utohexstr(Second.From) << ".\n");
837   for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
838     doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
839                   Pair.second + FromFunc->getAddress(), Count, false);
840 
841   return true;
842 }
843 
844 bool DataAggregator::recordTrace(
845     BinaryFunction &BF,
846     const LBREntry &FirstLBR,
847     const LBREntry &SecondLBR,
848     uint64_t Count,
849     SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const {
850   BinaryContext &BC = BF.getBinaryContext();
851 
852   if (!BF.isSimple())
853     return false;
854 
855   assert(BF.hasCFG() && "can only record traces in CFG state");
856 
857   // Offsets of the trace within this function.
858   const uint64_t From = FirstLBR.To - BF.getAddress();
859   const uint64_t To = SecondLBR.From - BF.getAddress();
860 
861   if (From > To)
862     return false;
863 
864   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
865   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
866 
867   if (!FromBB || !ToBB)
868     return false;
869 
870   // Adjust FromBB if the first LBR is a return from the last instruction in
871   // the previous block (that instruction should be a call).
872   if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
873       !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
874     BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1];
875     if (PrevBB->getSuccessor(FromBB->getLabel())) {
876       const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
877       if (Instr && BC.MIB->isCall(*Instr))
878         FromBB = PrevBB;
879       else
880         LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
881                           << '\n');
882     } else {
883       LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
884     }
885   }
886 
887   // Fill out information for fall-through edges. The From and To could be
888   // within the same basic block, e.g. when two call instructions are in the
889   // same block. In this case we skip the processing.
890   if (FromBB == ToBB)
891     return true;
892 
893   // Process blocks in the original layout order.
894   BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()];
895   assert(BB == FromBB && "index mismatch");
896   while (BB != ToBB) {
897     BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1];
898     assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
899 
900     // Check for bad LBRs.
901     if (!BB->getSuccessor(NextBB->getLabel())) {
902       LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
903                         << "  " << FirstLBR << '\n'
904                         << "  " << SecondLBR << '\n');
905       return false;
906     }
907 
908     // Record fall-through jumps
909     BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB);
910     BI.Count += Count;
911 
912     if (Branches) {
913       const MCInst *Instr = BB->getLastNonPseudoInstr();
914       uint64_t Offset = 0;
915       if (Instr)
916         Offset = BC.MIB->getAnnotationWithDefault<uint32_t>(*Instr, "Offset");
917       else
918         Offset = BB->getOffset();
919 
920       Branches->emplace_back(Offset, NextBB->getOffset());
921     }
922 
923     BB = NextBB;
924   }
925 
926   return true;
927 }
928 
929 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
930 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
931                                        const LBREntry &FirstLBR,
932                                        const LBREntry &SecondLBR,
933                                        uint64_t Count) const {
934   SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
935 
936   if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res))
937     return NoneType();
938 
939   return Res;
940 }
941 
942 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
943                                  uint64_t Count) const {
944   if (To > BF.getSize())
945     return false;
946 
947   if (!BF.hasProfile())
948     BF.ExecutionCount = 0;
949 
950   BinaryBasicBlock *EntryBB = nullptr;
951   if (To == 0) {
952     BF.ExecutionCount += Count;
953     if (!BF.empty())
954       EntryBB = &BF.front();
955   } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
956     if (BB->isEntryPoint())
957       EntryBB = BB;
958   }
959 
960   if (EntryBB)
961     EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
962 
963   return true;
964 }
965 
966 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
967                                 uint64_t Count) const {
968   if (!BF.isSimple() || From > BF.getSize())
969     return false;
970 
971   if (!BF.hasProfile())
972     BF.ExecutionCount = 0;
973 
974   return true;
975 }
976 
977 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
978   LBREntry Res;
979   ErrorOr<StringRef> FromStrRes = parseString('/');
980   if (std::error_code EC = FromStrRes.getError())
981     return EC;
982   StringRef OffsetStr = FromStrRes.get();
983   if (OffsetStr.getAsInteger(0, Res.From)) {
984     reportError("expected hexadecimal number with From address");
985     Diag << "Found: " << OffsetStr << "\n";
986     return make_error_code(llvm::errc::io_error);
987   }
988 
989   ErrorOr<StringRef> ToStrRes = parseString('/');
990   if (std::error_code EC = ToStrRes.getError())
991     return EC;
992   OffsetStr = ToStrRes.get();
993   if (OffsetStr.getAsInteger(0, Res.To)) {
994     reportError("expected hexadecimal number with To address");
995     Diag << "Found: " << OffsetStr << "\n";
996     return make_error_code(llvm::errc::io_error);
997   }
998 
999   ErrorOr<StringRef> MispredStrRes = parseString('/');
1000   if (std::error_code EC = MispredStrRes.getError())
1001     return EC;
1002   StringRef MispredStr = MispredStrRes.get();
1003   if (MispredStr.size() != 1 ||
1004       (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1005     reportError("expected single char for mispred bit");
1006     Diag << "Found: " << MispredStr << "\n";
1007     return make_error_code(llvm::errc::io_error);
1008   }
1009   Res.Mispred = MispredStr[0] == 'M';
1010 
1011   static bool MispredWarning = true;
1012   if (MispredStr[0] == '-' && MispredWarning) {
1013     errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1014     MispredWarning = false;
1015   }
1016 
1017   ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1018   if (std::error_code EC = Rest.getError())
1019     return EC;
1020   if (Rest.get().size() < 5) {
1021     reportError("expected rest of LBR entry");
1022     Diag << "Found: " << Rest.get() << "\n";
1023     return make_error_code(llvm::errc::io_error);
1024   }
1025   return Res;
1026 }
1027 
1028 bool DataAggregator::checkAndConsumeFS() {
1029   if (ParsingBuf[0] != FieldSeparator)
1030     return false;
1031 
1032   ParsingBuf = ParsingBuf.drop_front(1);
1033   Col += 1;
1034   return true;
1035 }
1036 
1037 void DataAggregator::consumeRestOfLine() {
1038   size_t LineEnd = ParsingBuf.find_first_of('\n');
1039   if (LineEnd == StringRef::npos) {
1040     ParsingBuf = StringRef();
1041     Col = 0;
1042     Line += 1;
1043     return;
1044   }
1045   ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1046   Col = 0;
1047   Line += 1;
1048 }
1049 
1050 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1051   PerfBranchSample Res;
1052 
1053   while (checkAndConsumeFS()) {
1054   }
1055 
1056   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1057   if (std::error_code EC = PIDRes.getError())
1058     return EC;
1059   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1060   if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
1061     consumeRestOfLine();
1062     return make_error_code(errc::no_such_process);
1063   }
1064 
1065   while (checkAndConsumeFS()) {
1066   }
1067 
1068   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1069   if (std::error_code EC = PCRes.getError())
1070     return EC;
1071   Res.PC = PCRes.get();
1072 
1073   if (checkAndConsumeNewLine())
1074     return Res;
1075 
1076   while (!checkAndConsumeNewLine()) {
1077     checkAndConsumeFS();
1078 
1079     ErrorOr<LBREntry> LBRRes = parseLBREntry();
1080     if (std::error_code EC = LBRRes.getError())
1081       return EC;
1082     LBREntry LBR = LBRRes.get();
1083     if (ignoreKernelInterrupt(LBR))
1084       continue;
1085     if (!BC->HasFixedLoadAddress)
1086       adjustLBR(LBR, MMapInfoIter->second);
1087     Res.LBR.push_back(LBR);
1088   }
1089 
1090   return Res;
1091 }
1092 
1093 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1094   while (checkAndConsumeFS()) {
1095   }
1096 
1097   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1098   if (std::error_code EC = PIDRes.getError())
1099     return EC;
1100 
1101   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1102   if (MMapInfoIter == BinaryMMapInfo.end()) {
1103     consumeRestOfLine();
1104     return PerfBasicSample{StringRef(), 0};
1105   }
1106 
1107   while (checkAndConsumeFS()) {
1108   }
1109 
1110   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1111   if (std::error_code EC = Event.getError())
1112     return EC;
1113 
1114   while (checkAndConsumeFS()) {
1115   }
1116 
1117   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1118   if (std::error_code EC = AddrRes.getError())
1119     return EC;
1120 
1121   if (!checkAndConsumeNewLine()) {
1122     reportError("expected end of line");
1123     return make_error_code(llvm::errc::io_error);
1124   }
1125 
1126   uint64_t Address = *AddrRes;
1127   if (!BC->HasFixedLoadAddress)
1128     adjustAddress(Address, MMapInfoIter->second);
1129 
1130   return PerfBasicSample{Event.get(), Address};
1131 }
1132 
1133 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1134   PerfMemSample Res{0, 0};
1135 
1136   while (checkAndConsumeFS()) {
1137   }
1138 
1139   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1140   if (std::error_code EC = PIDRes.getError())
1141     return EC;
1142 
1143   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1144   if (MMapInfoIter == BinaryMMapInfo.end()) {
1145     consumeRestOfLine();
1146     return Res;
1147   }
1148 
1149   while (checkAndConsumeFS()) {
1150   }
1151 
1152   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1153   if (std::error_code EC = Event.getError())
1154     return EC;
1155   if (Event.get().find("mem-loads") == StringRef::npos) {
1156     consumeRestOfLine();
1157     return Res;
1158   }
1159 
1160   while (checkAndConsumeFS()) {
1161   }
1162 
1163   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1164   if (std::error_code EC = AddrRes.getError())
1165     return EC;
1166 
1167   while (checkAndConsumeFS()) {
1168   }
1169 
1170   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1171   if (std::error_code EC = PCRes.getError()) {
1172     consumeRestOfLine();
1173     return EC;
1174   }
1175 
1176   if (!checkAndConsumeNewLine()) {
1177     reportError("expected end of line");
1178     return make_error_code(llvm::errc::io_error);
1179   }
1180 
1181   uint64_t Address = *AddrRes;
1182   if (!BC->HasFixedLoadAddress)
1183     adjustAddress(Address, MMapInfoIter->second);
1184 
1185   return PerfMemSample{PCRes.get(), Address};
1186 }
1187 
1188 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1189   auto parseOffset = [this]() -> ErrorOr<Location> {
1190     ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1191     if (std::error_code EC = Res.getError())
1192       return EC;
1193     return Location(Res.get());
1194   };
1195 
1196   size_t Sep = ParsingBuf.find_first_of(" \n");
1197   if (Sep == StringRef::npos)
1198     return parseOffset();
1199   StringRef LookAhead = ParsingBuf.substr(0, Sep);
1200   if (LookAhead.find_first_of(":") == StringRef::npos)
1201     return parseOffset();
1202 
1203   ErrorOr<StringRef> BuildID = parseString(':');
1204   if (std::error_code EC = BuildID.getError())
1205     return EC;
1206   ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1207   if (std::error_code EC = Offset.getError())
1208     return EC;
1209   return Location(true, BuildID.get(), Offset.get());
1210 }
1211 
1212 ErrorOr<DataAggregator::AggregatedLBREntry>
1213 DataAggregator::parseAggregatedLBREntry() {
1214   while (checkAndConsumeFS()) {
1215   }
1216 
1217   ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1218   if (std::error_code EC = TypeOrErr.getError())
1219     return EC;
1220   auto Type = AggregatedLBREntry::BRANCH;
1221   if (TypeOrErr.get() == "B") {
1222     Type = AggregatedLBREntry::BRANCH;
1223   } else if (TypeOrErr.get() == "F") {
1224     Type = AggregatedLBREntry::FT;
1225   } else if (TypeOrErr.get() == "f") {
1226     Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1227   } else {
1228     reportError("expected B, F or f");
1229     return make_error_code(llvm::errc::io_error);
1230   }
1231 
1232   while (checkAndConsumeFS()) {
1233   }
1234   ErrorOr<Location> From = parseLocationOrOffset();
1235   if (std::error_code EC = From.getError())
1236     return EC;
1237 
1238   while (checkAndConsumeFS()) {
1239   }
1240   ErrorOr<Location> To = parseLocationOrOffset();
1241   if (std::error_code EC = To.getError())
1242     return EC;
1243 
1244   while (checkAndConsumeFS()) {
1245   }
1246   ErrorOr<int64_t> Frequency =
1247       parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1248   if (std::error_code EC = Frequency.getError())
1249     return EC;
1250 
1251   uint64_t Mispreds = 0;
1252   if (Type == AggregatedLBREntry::BRANCH) {
1253     while (checkAndConsumeFS()) {
1254     }
1255     ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1256     if (std::error_code EC = MispredsOrErr.getError())
1257       return EC;
1258     Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1259   }
1260 
1261   if (!checkAndConsumeNewLine()) {
1262     reportError("expected end of line");
1263     return make_error_code(llvm::errc::io_error);
1264   }
1265 
1266   return AggregatedLBREntry{From.get(), To.get(),
1267                             static_cast<uint64_t>(Frequency.get()), Mispreds,
1268                             Type};
1269 }
1270 
1271 bool DataAggregator::hasData() {
1272   if (ParsingBuf.size() == 0)
1273     return false;
1274 
1275   return true;
1276 }
1277 
1278 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1279   return opts::IgnoreInterruptLBR &&
1280          (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1281 }
1282 
1283 std::error_code DataAggregator::printLBRHeatMap() {
1284   outs() << "PERF2BOLT: parse branch events...\n";
1285   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1286                      TimerGroupDesc, opts::TimeAggregator);
1287 
1288   if (opts::LinuxKernelMode) {
1289     opts::HeatmapMaxAddress = 0xffffffffffffffff;
1290     opts::HeatmapMinAddress = KernelBaseAddr;
1291   }
1292   Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1293              opts::HeatmapMaxAddress);
1294   uint64_t NumTotalSamples = 0;
1295 
1296   while (hasData()) {
1297     ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1298     if (std::error_code EC = SampleRes.getError()) {
1299       if (EC == errc::no_such_process)
1300         continue;
1301       return EC;
1302     }
1303 
1304     PerfBranchSample &Sample = SampleRes.get();
1305 
1306     // LBRs are stored in reverse execution order. NextLBR refers to the next
1307     // executed branch record.
1308     const LBREntry *NextLBR = nullptr;
1309     for (const LBREntry &LBR : Sample.LBR) {
1310       if (NextLBR) {
1311         // Record fall-through trace.
1312         const uint64_t TraceFrom = LBR.To;
1313         const uint64_t TraceTo = NextLBR->From;
1314         ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1315       }
1316       NextLBR = &LBR;
1317     }
1318     if (!Sample.LBR.empty()) {
1319       HM.registerAddress(Sample.LBR.front().To);
1320       HM.registerAddress(Sample.LBR.back().From);
1321     }
1322     NumTotalSamples += Sample.LBR.size();
1323   }
1324 
1325   if (!NumTotalSamples) {
1326     errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1327               "Cannot build heatmap.\n";
1328     exit(1);
1329   }
1330 
1331   outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1332   outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1333 
1334   outs() << "HEATMAP: building heat map...\n";
1335 
1336   for (const auto &LBR : FallthroughLBRs) {
1337     const Trace &Trace = LBR.first;
1338     const FTInfo &Info = LBR.second;
1339     HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1340   }
1341 
1342   if (HM.getNumInvalidRanges())
1343     outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1344 
1345   if (!HM.size()) {
1346     errs() << "HEATMAP-ERROR: no valid traces registered\n";
1347     exit(1);
1348   }
1349 
1350   HM.print(opts::HeatmapFile);
1351   if (opts::HeatmapFile == "-")
1352     HM.printCDF(opts::HeatmapFile);
1353   else
1354     HM.printCDF(opts::HeatmapFile + ".csv");
1355 
1356   return std::error_code();
1357 }
1358 
1359 std::error_code DataAggregator::parseBranchEvents() {
1360   outs() << "PERF2BOLT: parse branch events...\n";
1361   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1362                      TimerGroupDesc, opts::TimeAggregator);
1363 
1364   uint64_t NumTotalSamples = 0;
1365   uint64_t NumEntries = 0;
1366   uint64_t NumSamples = 0;
1367   uint64_t NumSamplesNoLBR = 0;
1368   uint64_t NumTraces = 0;
1369   bool NeedsSkylakeFix = false;
1370 
1371   while (hasData() && NumTotalSamples < opts::MaxSamples) {
1372     ++NumTotalSamples;
1373 
1374     ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1375     if (std::error_code EC = SampleRes.getError()) {
1376       if (EC == errc::no_such_process)
1377         continue;
1378       return EC;
1379     }
1380     ++NumSamples;
1381 
1382     PerfBranchSample &Sample = SampleRes.get();
1383     if (opts::WriteAutoFDOData)
1384       ++BasicSamples[Sample.PC];
1385 
1386     if (Sample.LBR.empty()) {
1387       ++NumSamplesNoLBR;
1388       continue;
1389     }
1390 
1391     NumEntries += Sample.LBR.size();
1392     if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1393       errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1394       NeedsSkylakeFix = true;
1395     }
1396 
1397     // LBRs are stored in reverse execution order. NextPC refers to the next
1398     // recorded executed PC.
1399     uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1400     uint32_t NumEntry = 0;
1401     for (const LBREntry &LBR : Sample.LBR) {
1402       ++NumEntry;
1403       // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1404       // sometimes record entry 32 as an exact copy of entry 31. This will cause
1405       // us to likely record an invalid trace and generate a stale function for
1406       // BAT mode (non BAT disassembles the function and is able to ignore this
1407       // trace at aggregation time). Drop first 2 entries (last two, in
1408       // chronological order)
1409       if (NeedsSkylakeFix && NumEntry <= 2)
1410         continue;
1411       if (NextPC) {
1412         // Record fall-through trace.
1413         const uint64_t TraceFrom = LBR.To;
1414         const uint64_t TraceTo = NextPC;
1415         const BinaryFunction *TraceBF =
1416             getBinaryFunctionContainingAddress(TraceFrom);
1417         if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1418           FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1419           if (TraceBF->containsAddress(LBR.From))
1420             ++Info.InternCount;
1421           else
1422             ++Info.ExternCount;
1423         } else {
1424           if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) {
1425             LLVM_DEBUG(dbgs()
1426                        << "Invalid trace starting in "
1427                        << TraceBF->getPrintName() << " @ "
1428                        << Twine::utohexstr(TraceFrom - TraceBF->getAddress())
1429                        << " and ending @ " << Twine::utohexstr(TraceTo)
1430                        << '\n');
1431             ++NumInvalidTraces;
1432           } else {
1433             LLVM_DEBUG(dbgs()
1434                        << "Out of range trace starting in "
1435                        << (TraceBF ? TraceBF->getPrintName() : "None") << " @ "
1436                        << Twine::utohexstr(
1437                               TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1438                        << " and ending in "
1439                        << (getBinaryFunctionContainingAddress(TraceTo)
1440                                ? getBinaryFunctionContainingAddress(TraceTo)
1441                                      ->getPrintName()
1442                                : "None")
1443                        << " @ "
1444                        << Twine::utohexstr(
1445                               TraceTo -
1446                               (getBinaryFunctionContainingAddress(TraceTo)
1447                                    ? getBinaryFunctionContainingAddress(TraceTo)
1448                                          ->getAddress()
1449                                    : 0))
1450                        << '\n');
1451             ++NumLongRangeTraces;
1452           }
1453         }
1454         ++NumTraces;
1455       }
1456       NextPC = LBR.From;
1457 
1458       uint64_t From = LBR.From;
1459       if (!getBinaryFunctionContainingAddress(From))
1460         From = 0;
1461       uint64_t To = LBR.To;
1462       if (!getBinaryFunctionContainingAddress(To))
1463         To = 0;
1464       if (!From && !To)
1465         continue;
1466       BranchInfo &Info = BranchLBRs[Trace(From, To)];
1467       ++Info.TakenCount;
1468       Info.MispredCount += LBR.Mispred;
1469     }
1470   }
1471 
1472   for (const auto &LBR : BranchLBRs) {
1473     const Trace &Trace = LBR.first;
1474     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From))
1475       BF->setHasProfileAvailable();
1476     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To))
1477       BF->setHasProfileAvailable();
1478   }
1479 
1480   auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1481     OS << " (";
1482     if (OS.has_colors()) {
1483       if (Percent > T2)
1484         OS.changeColor(raw_ostream::RED);
1485       else if (Percent > T1)
1486         OS.changeColor(raw_ostream::YELLOW);
1487       else
1488         OS.changeColor(raw_ostream::GREEN);
1489     }
1490     OS << format("%.1f%%", Percent);
1491     if (OS.has_colors())
1492       OS.resetColor();
1493     OS << ")";
1494   };
1495 
1496   outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1497          << " LBR entries\n";
1498   if (NumTotalSamples) {
1499     if (NumSamples && NumSamplesNoLBR == NumSamples) {
1500       // Note: we don't know if perf2bolt is being used to parse memory samples
1501       // at this point. In this case, it is OK to parse zero LBRs.
1502       errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1503                 "LBR. Record profile with perf record -j any or run perf2bolt "
1504                 "in no-LBR mode with -nl (the performance improvement in -nl "
1505                 "mode may be limited)\n";
1506     } else {
1507       const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1508       const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1509       outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1510       printColored(outs(), PercentIgnored, 20, 50);
1511       outs() << " were ignored\n";
1512       if (PercentIgnored > 50.0f)
1513         errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1514                   "were attributed to the input binary\n";
1515     }
1516   }
1517   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1518          << NumInvalidTraces;
1519   float Perc = 0.0f;
1520   if (NumTraces > 0) {
1521     Perc = NumInvalidTraces * 100.0f / NumTraces;
1522     printColored(outs(), Perc, 5, 10);
1523   }
1524   outs() << "\n";
1525   if (Perc > 10.0f)
1526     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1527               "binary is probably not the same binary used during profiling "
1528               "collection. The generated data may be ineffective for improving "
1529               "performance.\n\n";
1530 
1531   outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1532          << NumLongRangeTraces;
1533   if (NumTraces > 0)
1534     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1535   outs() << "\n";
1536 
1537   if (NumColdSamples > 0) {
1538     const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1539     outs() << "PERF2BOLT: " << NumColdSamples
1540            << format(" (%.1f%%)", ColdSamples)
1541            << " samples recorded in cold regions of split functions.\n";
1542     if (ColdSamples > 5.0f)
1543       outs()
1544           << "WARNING: The BOLT-processed binary where samples were collected "
1545              "likely used bad data or your service observed a large shift in "
1546              "profile. You may want to audit this.\n";
1547   }
1548 
1549   return std::error_code();
1550 }
1551 
1552 void DataAggregator::processBranchEvents() {
1553   outs() << "PERF2BOLT: processing branch events...\n";
1554   NamedRegionTimer T("processBranch", "Processing branch events",
1555                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1556 
1557   for (const auto &AggrLBR : FallthroughLBRs) {
1558     const Trace &Loc = AggrLBR.first;
1559     const FTInfo &Info = AggrLBR.second;
1560     LBREntry First{Loc.From, Loc.From, false};
1561     LBREntry Second{Loc.To, Loc.To, false};
1562     if (Info.InternCount)
1563       doTrace(First, Second, Info.InternCount);
1564     if (Info.ExternCount) {
1565       First.From = 0;
1566       doTrace(First, Second, Info.ExternCount);
1567     }
1568   }
1569 
1570   for (const auto &AggrLBR : BranchLBRs) {
1571     const Trace &Loc = AggrLBR.first;
1572     const BranchInfo &Info = AggrLBR.second;
1573     doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
1574   }
1575 }
1576 
1577 std::error_code DataAggregator::parseBasicEvents() {
1578   outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1579   NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1580                      TimerGroupDesc, opts::TimeAggregator);
1581   while (hasData()) {
1582     ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1583     if (std::error_code EC = Sample.getError())
1584       return EC;
1585 
1586     if (!Sample->PC)
1587       continue;
1588 
1589     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1590       BF->setHasProfileAvailable();
1591 
1592     ++BasicSamples[Sample->PC];
1593     EventNames.insert(Sample->EventName);
1594   }
1595 
1596   return std::error_code();
1597 }
1598 
1599 void DataAggregator::processBasicEvents() {
1600   outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1601   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1602                      TimerGroupDesc, opts::TimeAggregator);
1603   uint64_t OutOfRangeSamples = 0;
1604   uint64_t NumSamples = 0;
1605   for (auto &Sample : BasicSamples) {
1606     const uint64_t PC = Sample.first;
1607     const uint64_t HitCount = Sample.second;
1608     NumSamples += HitCount;
1609     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1610     if (!Func) {
1611       OutOfRangeSamples += HitCount;
1612       continue;
1613     }
1614 
1615     doSample(*Func, PC, HitCount);
1616   }
1617   outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1618 
1619   outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1620          << OutOfRangeSamples;
1621   float Perc = 0.0f;
1622   if (NumSamples > 0) {
1623     outs() << " (";
1624     Perc = OutOfRangeSamples * 100.0f / NumSamples;
1625     if (outs().has_colors()) {
1626       if (Perc > 60.0f)
1627         outs().changeColor(raw_ostream::RED);
1628       else if (Perc > 40.0f)
1629         outs().changeColor(raw_ostream::YELLOW);
1630       else
1631         outs().changeColor(raw_ostream::GREEN);
1632     }
1633     outs() << format("%.1f%%", Perc);
1634     if (outs().has_colors())
1635       outs().resetColor();
1636     outs() << ")";
1637   }
1638   outs() << "\n";
1639   if (Perc > 80.0f)
1640     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1641               "binary is probably not the same binary used during profiling "
1642               "collection. The generated data may be ineffective for improving "
1643               "performance.\n\n";
1644 }
1645 
1646 std::error_code DataAggregator::parseMemEvents() {
1647   outs() << "PERF2BOLT: parsing memory events...\n";
1648   NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1649                      TimerGroupDesc, opts::TimeAggregator);
1650   while (hasData()) {
1651     ErrorOr<PerfMemSample> Sample = parseMemSample();
1652     if (std::error_code EC = Sample.getError())
1653       return EC;
1654 
1655     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1656       BF->setHasProfileAvailable();
1657 
1658     MemSamples.emplace_back(std::move(Sample.get()));
1659   }
1660 
1661   return std::error_code();
1662 }
1663 
1664 void DataAggregator::processMemEvents() {
1665   NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1666                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1667   for (const PerfMemSample &Sample : MemSamples) {
1668     uint64_t PC = Sample.PC;
1669     uint64_t Addr = Sample.Addr;
1670     StringRef FuncName;
1671     StringRef MemName;
1672 
1673     // Try to resolve symbol for PC
1674     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1675     if (!Func) {
1676       LLVM_DEBUG(if (PC != 0) {
1677         dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x"
1678                << Twine::utohexstr(Addr) << "\n";
1679       });
1680       continue;
1681     }
1682 
1683     FuncName = Func->getOneName();
1684     PC -= Func->getAddress();
1685 
1686     // Try to resolve symbol for memory load
1687     if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1688       MemName = BD->getName();
1689       Addr -= BD->getAddress();
1690     } else if (opts::FilterMemProfile) {
1691       // Filter out heap/stack accesses
1692       continue;
1693     }
1694 
1695     const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1696     const Location AddrLoc(!MemName.empty(), MemName, Addr);
1697 
1698     FuncMemData *MemData = &NamesToMemEvents[FuncName];
1699     setMemData(*Func, MemData);
1700     MemData->update(FuncLoc, AddrLoc);
1701     LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1702   }
1703 }
1704 
1705 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1706   outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1707   NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1708                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1709   while (hasData()) {
1710     ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1711     if (std::error_code EC = AggrEntry.getError())
1712       return EC;
1713 
1714     if (BinaryFunction *BF =
1715             getBinaryFunctionContainingAddress(AggrEntry->From.Offset))
1716       BF->setHasProfileAvailable();
1717     if (BinaryFunction *BF =
1718             getBinaryFunctionContainingAddress(AggrEntry->To.Offset))
1719       BF->setHasProfileAvailable();
1720 
1721     AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1722   }
1723 
1724   return std::error_code();
1725 }
1726 
1727 void DataAggregator::processPreAggregated() {
1728   outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1729   NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1730                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1731 
1732   uint64_t NumTraces = 0;
1733   for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1734     switch (AggrEntry.EntryType) {
1735     case AggregatedLBREntry::BRANCH:
1736       doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1737                AggrEntry.Mispreds);
1738       break;
1739     case AggregatedLBREntry::FT:
1740     case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1741       LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1742                          ? AggrEntry.From.Offset
1743                          : 0,
1744                      AggrEntry.From.Offset, false};
1745       LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1746       doTrace(First, Second, AggrEntry.Count);
1747       NumTraces += AggrEntry.Count;
1748       break;
1749     }
1750     }
1751   }
1752 
1753   outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1754          << " aggregated LBR entries\n";
1755   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1756          << NumInvalidTraces;
1757   float Perc = 0.0f;
1758   if (NumTraces > 0) {
1759     outs() << " (";
1760     Perc = NumInvalidTraces * 100.0f / NumTraces;
1761     if (outs().has_colors()) {
1762       if (Perc > 10.0f)
1763         outs().changeColor(raw_ostream::RED);
1764       else if (Perc > 5.0f)
1765         outs().changeColor(raw_ostream::YELLOW);
1766       else
1767         outs().changeColor(raw_ostream::GREEN);
1768     }
1769     outs() << format("%.1f%%", Perc);
1770     if (outs().has_colors())
1771       outs().resetColor();
1772     outs() << ")";
1773   }
1774   outs() << "\n";
1775   if (Perc > 10.0f)
1776     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1777               "binary is probably not the same binary used during profiling "
1778               "collection. The generated data may be ineffective for improving "
1779               "performance.\n\n";
1780 
1781   outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1782          << NumLongRangeTraces;
1783   if (NumTraces > 0)
1784     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1785   outs() << "\n";
1786 }
1787 
1788 Optional<int32_t> DataAggregator::parseCommExecEvent() {
1789   size_t LineEnd = ParsingBuf.find_first_of("\n");
1790   if (LineEnd == StringRef::npos) {
1791     reportError("expected rest of line");
1792     Diag << "Found: " << ParsingBuf << "\n";
1793     return NoneType();
1794   }
1795   StringRef Line = ParsingBuf.substr(0, LineEnd);
1796 
1797   size_t Pos = Line.find("PERF_RECORD_COMM exec");
1798   if (Pos == StringRef::npos)
1799     return NoneType();
1800   Line = Line.drop_front(Pos);
1801 
1802   // Line:
1803   //  PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1804   StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1805   int32_t PID;
1806   if (PIDStr.getAsInteger(10, PID)) {
1807     reportError("expected PID");
1808     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1809     return NoneType();
1810   }
1811 
1812   return PID;
1813 }
1814 
1815 namespace {
1816 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1817   const StringRef SecTimeStr = TimeStr.split('.').first;
1818   const StringRef USecTimeStr = TimeStr.split('.').second;
1819   uint64_t SecTime;
1820   uint64_t USecTime;
1821   if (SecTimeStr.getAsInteger(10, SecTime) ||
1822       USecTimeStr.getAsInteger(10, USecTime))
1823     return NoneType();
1824   return SecTime * 1000000ULL + USecTime;
1825 }
1826 }
1827 
1828 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1829   while (checkAndConsumeFS()) {
1830   }
1831 
1832   size_t LineEnd = ParsingBuf.find_first_of("\n");
1833   if (LineEnd == StringRef::npos) {
1834     reportError("expected rest of line");
1835     Diag << "Found: " << ParsingBuf << "\n";
1836     return NoneType();
1837   }
1838   StringRef Line = ParsingBuf.substr(0, LineEnd);
1839 
1840   size_t Pos = Line.find("PERF_RECORD_FORK");
1841   if (Pos == StringRef::npos) {
1842     consumeRestOfLine();
1843     return NoneType();
1844   }
1845 
1846   ForkInfo FI;
1847 
1848   const StringRef TimeStr =
1849       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1850   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1851     FI.Time = *TimeRes;
1852   }
1853 
1854   Line = Line.drop_front(Pos);
1855 
1856   // Line:
1857   //  PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1858   const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1859   if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1860     reportError("expected PID");
1861     Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1862     return NoneType();
1863   }
1864 
1865   const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1866   if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1867     reportError("expected PID");
1868     Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1869     return NoneType();
1870   }
1871 
1872   consumeRestOfLine();
1873 
1874   return FI;
1875 }
1876 
1877 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1878 DataAggregator::parseMMapEvent() {
1879   while (checkAndConsumeFS()) {
1880   }
1881 
1882   MMapInfo ParsedInfo;
1883 
1884   size_t LineEnd = ParsingBuf.find_first_of("\n");
1885   if (LineEnd == StringRef::npos) {
1886     reportError("expected rest of line");
1887     Diag << "Found: " << ParsingBuf << "\n";
1888     return make_error_code(llvm::errc::io_error);
1889   }
1890   StringRef Line = ParsingBuf.substr(0, LineEnd);
1891 
1892   size_t Pos = Line.find("PERF_RECORD_MMAP2");
1893   if (Pos == StringRef::npos) {
1894     consumeRestOfLine();
1895     return std::make_pair(StringRef(), ParsedInfo);
1896   }
1897 
1898   // Line:
1899   //   {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1900 
1901   const StringRef TimeStr =
1902       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1903   if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1904     ParsedInfo.Time = *TimeRes;
1905 
1906   Line = Line.drop_front(Pos);
1907 
1908   // Line:
1909   //   PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1910 
1911   StringRef FileName = Line.rsplit(FieldSeparator).second;
1912   if (FileName.startswith("//") || FileName.startswith("[")) {
1913     consumeRestOfLine();
1914     return std::make_pair(StringRef(), ParsedInfo);
1915   }
1916   FileName = sys::path::filename(FileName);
1917 
1918   const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
1919   if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
1920     reportError("expected PID");
1921     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1922     return make_error_code(llvm::errc::io_error);
1923   }
1924 
1925   const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
1926   if (BaseAddressStr.getAsInteger(0, ParsedInfo.BaseAddress)) {
1927     reportError("expected base address");
1928     Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1929     return make_error_code(llvm::errc::io_error);
1930   }
1931 
1932   const StringRef SizeStr = Line.split('(').second.split(')').first;
1933   if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
1934     reportError("expected mmaped size");
1935     Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1936     return make_error_code(llvm::errc::io_error);
1937   }
1938 
1939   const StringRef OffsetStr =
1940       Line.split('@').second.ltrim().split(FieldSeparator).first;
1941   if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
1942     reportError("expected mmaped page-aligned offset");
1943     Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1944     return make_error_code(llvm::errc::io_error);
1945   }
1946 
1947   consumeRestOfLine();
1948 
1949   return std::make_pair(FileName, ParsedInfo);
1950 }
1951 
1952 std::error_code DataAggregator::parseMMapEvents() {
1953   outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1954   NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1955                      TimerGroupDesc, opts::TimeAggregator);
1956 
1957   std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1958   while (hasData()) {
1959     ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1960     if (std::error_code EC = FileMMapInfoRes.getError())
1961       return EC;
1962 
1963     std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1964     if (FileMMapInfo.second.PID == -1)
1965       continue;
1966 
1967     // Consider only the first mapping of the file for any given PID
1968     bool PIDExists = false;
1969     auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
1970     for (auto MI = Range.first; MI != Range.second; ++MI) {
1971       if (MI->second.PID == FileMMapInfo.second.PID) {
1972         PIDExists = true;
1973         break;
1974       }
1975     }
1976     if (PIDExists)
1977       continue;
1978 
1979     GlobalMMapInfo.insert(FileMMapInfo);
1980   }
1981 
1982   LLVM_DEBUG({
1983     dbgs() << "FileName -> mmap info:\n";
1984     for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo)
1985       dbgs() << "  " << Pair.first << " : " << Pair.second.PID << " [0x"
1986              << Twine::utohexstr(Pair.second.BaseAddress) << ", "
1987              << Twine::utohexstr(Pair.second.Size) << " @ "
1988              << Twine::utohexstr(Pair.second.Offset) << "]\n";
1989   });
1990 
1991   StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
1992   if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
1993     errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
1994            << "\" for profile matching\n";
1995     NameToUse = BuildIDBinaryName;
1996   }
1997 
1998   auto Range = GlobalMMapInfo.equal_range(NameToUse);
1999   for (auto I = Range.first; I != Range.second; ++I) {
2000     const MMapInfo &MMapInfo = I->second;
2001     if (BC->HasFixedLoadAddress && MMapInfo.BaseAddress) {
2002       // Check that the binary mapping matches one of the segments.
2003       bool MatchFound = false;
2004       for (auto &KV : BC->SegmentMapInfo) {
2005         SegmentInfo &SegInfo = KV.second;
2006         // The mapping is page-aligned and hence the BaseAddress could be
2007         // different from the segment start address. We cannot know the page
2008         // size of the mapping, but we know it should not exceed the segment
2009         // alignment value. Hence we are performing an approximate check.
2010         if (SegInfo.Address >= MMapInfo.BaseAddress &&
2011             SegInfo.Address - MMapInfo.BaseAddress < SegInfo.Alignment) {
2012           MatchFound = true;
2013           break;
2014         }
2015       }
2016       if (!MatchFound) {
2017         errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2018                << " at 0x" << Twine::utohexstr(MMapInfo.BaseAddress) << '\n';
2019         continue;
2020       }
2021     }
2022 
2023     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2024   }
2025 
2026   if (BinaryMMapInfo.empty()) {
2027     if (errs().has_colors())
2028       errs().changeColor(raw_ostream::RED);
2029     errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2030            << BC->getFilename() << "\".";
2031     if (!GlobalMMapInfo.empty()) {
2032       errs() << " Profile for the following binary name(s) is available:\n";
2033       for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2034            I = GlobalMMapInfo.upper_bound(I->first))
2035         errs() << "  " << I->first << '\n';
2036       errs() << "Please rename the input binary.\n";
2037     } else {
2038       errs() << " Failed to extract any binary name from a profile.\n";
2039     }
2040     if (errs().has_colors())
2041       errs().resetColor();
2042 
2043     exit(1);
2044   }
2045 
2046   return std::error_code();
2047 }
2048 
2049 std::error_code DataAggregator::parseTaskEvents() {
2050   outs() << "PERF2BOLT: parsing perf-script task events output\n";
2051   NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2052                      TimerGroupDesc, opts::TimeAggregator);
2053 
2054   while (hasData()) {
2055     if (Optional<int32_t> CommInfo = parseCommExecEvent()) {
2056       // Remove forked child that ran execve
2057       auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2058       if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2059         BinaryMMapInfo.erase(MMapInfoIter);
2060       consumeRestOfLine();
2061       continue;
2062     }
2063 
2064     Optional<ForkInfo> ForkInfo = parseForkEvent();
2065     if (!ForkInfo)
2066       continue;
2067 
2068     if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2069       continue;
2070 
2071     if (ForkInfo->Time == 0) {
2072       // Process was forked and mmaped before perf ran. In this case the child
2073       // should have its own mmap entry unless it was execve'd.
2074       continue;
2075     }
2076 
2077     auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2078     if (MMapInfoIter == BinaryMMapInfo.end())
2079       continue;
2080 
2081     MMapInfo MMapInfo = MMapInfoIter->second;
2082     MMapInfo.PID = ForkInfo->ChildPID;
2083     MMapInfo.Forked = true;
2084     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2085   }
2086 
2087   outs() << "PERF2BOLT: input binary is associated with "
2088          << BinaryMMapInfo.size() << " PID(s)\n";
2089 
2090   LLVM_DEBUG({
2091     for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
2092       outs() << "  " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "")
2093              << ": (0x" << Twine::utohexstr(MMI.second.BaseAddress) << ": 0x"
2094              << Twine::utohexstr(MMI.second.Size) << ")\n";
2095   });
2096 
2097   return std::error_code();
2098 }
2099 
2100 Optional<std::pair<StringRef, StringRef>>
2101 DataAggregator::parseNameBuildIDPair() {
2102   while (checkAndConsumeFS()) {
2103   }
2104 
2105   ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2106   if (std::error_code EC = BuildIDStr.getError())
2107     return NoneType();
2108 
2109   ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2110   if (std::error_code EC = NameStr.getError())
2111     return NoneType();
2112 
2113   consumeRestOfLine();
2114   return std::make_pair(NameStr.get(), BuildIDStr.get());
2115 }
2116 
2117 Optional<StringRef>
2118 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2119   while (hasData()) {
2120     Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair();
2121     if (!IDPair)
2122       return NoneType();
2123 
2124     if (IDPair->second.startswith(FileBuildID))
2125       return sys::path::filename(IDPair->first);
2126   }
2127   return NoneType();
2128 }
2129 
2130 std::error_code
2131 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2132   std::error_code EC;
2133   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2134   if (EC)
2135     return EC;
2136 
2137   bool WriteMemLocs = false;
2138 
2139   auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2140     if (WriteMemLocs)
2141       OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2142     else
2143       OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2144     OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2145             << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2146   };
2147 
2148   uint64_t BranchValues = 0;
2149   uint64_t MemValues = 0;
2150 
2151   if (BAT)
2152     OutFile << "boltedcollection\n";
2153   if (opts::BasicAggregation) {
2154     OutFile << "no_lbr";
2155     for (const StringMapEntry<NoneType> &Entry : EventNames)
2156       OutFile << " " << Entry.getKey();
2157     OutFile << "\n";
2158 
2159     for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
2160       for (const SampleInfo &SI : Func.getValue().Data) {
2161         writeLocation(SI.Loc);
2162         OutFile << SI.Hits << "\n";
2163         ++BranchValues;
2164       }
2165     }
2166   } else {
2167     for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
2168       for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) {
2169         writeLocation(BI.From);
2170         writeLocation(BI.To);
2171         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2172         ++BranchValues;
2173       }
2174       for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) {
2175         // Do not output if source is a known symbol, since this was already
2176         // accounted for in the source function
2177         if (BI.From.IsSymbol)
2178           continue;
2179         writeLocation(BI.From);
2180         writeLocation(BI.To);
2181         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2182         ++BranchValues;
2183       }
2184     }
2185 
2186     WriteMemLocs = true;
2187     for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
2188       for (const MemInfo &MemEvent : Func.getValue().Data) {
2189         writeLocation(MemEvent.Offset);
2190         writeLocation(MemEvent.Addr);
2191         OutFile << MemEvent.Count << "\n";
2192         ++MemValues;
2193       }
2194     }
2195   }
2196 
2197   outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2198          << " memory objects to " << OutputFilename << "\n";
2199 
2200   return std::error_code();
2201 }
2202 
2203 void DataAggregator::dump() const { DataReader::dump(); }
2204 
2205 void DataAggregator::dump(const LBREntry &LBR) const {
2206   Diag << "From: " << Twine::utohexstr(LBR.From)
2207        << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2208        << "\n";
2209 }
2210 
2211 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2212   Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2213   for (const LBREntry &LBR : Sample.LBR)
2214     dump(LBR);
2215 }
2216 
2217 void DataAggregator::dump(const PerfMemSample &Sample) const {
2218   Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
2219 }
2220