1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Profile/BoltAddressTranslation.h"
18 #include "bolt/Profile/Heatmap.h"
19 #include "bolt/Utils/CommandLineOpts.h"
20 #include "bolt/Utils/Utils.h"
21 #include "llvm/ADT/ScopeExit.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/Process.h"
27 #include "llvm/Support/Program.h"
28 #include "llvm/Support/Regex.h"
29 #include "llvm/Support/Timer.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <map>
32 #include <unordered_map>
33 #include <utility>
34
35 #define DEBUG_TYPE "aggregator"
36
37 using namespace llvm;
38 using namespace bolt;
39
40 namespace opts {
41
42 static cl::opt<bool>
43 BasicAggregation("nl",
44 cl::desc("aggregate basic samples (without LBR info)"),
45 cl::cat(AggregatorCategory));
46
47 static cl::opt<bool>
48 FilterMemProfile("filter-mem-profile",
49 cl::desc("if processing a memory profile, filter out stack or heap accesses "
50 "that won't be useful for BOLT to reduce profile file size"),
51 cl::init(true),
52 cl::cat(AggregatorCategory));
53
54 static cl::opt<unsigned long long>
55 FilterPID("pid",
56 cl::desc("only use samples from process with specified PID"),
57 cl::init(0),
58 cl::Optional,
59 cl::cat(AggregatorCategory));
60
61 static cl::opt<bool>
62 IgnoreBuildID("ignore-build-id",
63 cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
64 cl::init(false),
65 cl::cat(AggregatorCategory));
66
67 static cl::opt<bool> IgnoreInterruptLBR(
68 "ignore-interrupt-lbr",
69 cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
70 cl::init(true), cl::cat(AggregatorCategory));
71
72 static cl::opt<unsigned long long>
73 MaxSamples("max-samples",
74 cl::init(-1ULL),
75 cl::desc("maximum number of samples to read from LBR profile"),
76 cl::Optional,
77 cl::Hidden,
78 cl::cat(AggregatorCategory));
79
80 cl::opt<bool> ReadPreAggregated(
81 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
82 cl::cat(AggregatorCategory));
83
84 static cl::opt<bool>
85 TimeAggregator("time-aggr",
86 cl::desc("time BOLT aggregator"),
87 cl::init(false),
88 cl::ZeroOrMore,
89 cl::cat(AggregatorCategory));
90
91 static cl::opt<bool>
92 UseEventPC("use-event-pc",
93 cl::desc("use event PC in combination with LBR sampling"),
94 cl::cat(AggregatorCategory));
95
96 static cl::opt<bool> WriteAutoFDOData(
97 "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
98 cl::cat(AggregatorCategory));
99
100 } // namespace opts
101
102 namespace {
103
104 const char TimerGroupName[] = "aggregator";
105 const char TimerGroupDesc[] = "Aggregator";
106
getTextSections(const BinaryContext * BC)107 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
108 std::vector<SectionNameAndRange> sections;
109 for (BinarySection &Section : BC->sections()) {
110 if (!Section.isText())
111 continue;
112 if (Section.getSize() == 0)
113 continue;
114 sections.push_back(
115 {Section.getName(), Section.getAddress(), Section.getEndAddress()});
116 }
117 llvm::sort(sections,
118 [](const SectionNameAndRange &A, const SectionNameAndRange &B) {
119 return A.BeginAddress < B.BeginAddress;
120 });
121 return sections;
122 }
123 }
124
125 constexpr uint64_t DataAggregator::KernelBaseAddr;
126
~DataAggregator()127 DataAggregator::~DataAggregator() { deleteTempFiles(); }
128
129 namespace {
deleteTempFile(const std::string & FileName)130 void deleteTempFile(const std::string &FileName) {
131 if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
132 errs() << "PERF2BOLT: failed to delete temporary file " << FileName
133 << " with error " << Errc.message() << "\n";
134 }
135 }
136
deleteTempFiles()137 void DataAggregator::deleteTempFiles() {
138 for (std::string &FileName : TempFiles)
139 deleteTempFile(FileName);
140 TempFiles.clear();
141 }
142
findPerfExecutable()143 void DataAggregator::findPerfExecutable() {
144 Optional<std::string> PerfExecutable =
145 sys::Process::FindInEnvPath("PATH", "perf");
146 if (!PerfExecutable) {
147 outs() << "PERF2BOLT: No perf executable found!\n";
148 exit(1);
149 }
150 PerfPath = *PerfExecutable;
151 }
152
start()153 void DataAggregator::start() {
154 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
155
156 // Don't launch perf for pre-aggregated files
157 if (opts::ReadPreAggregated)
158 return;
159
160 findPerfExecutable();
161
162 if (opts::BasicAggregation)
163 launchPerfProcess("events without LBR",
164 MainEventsPPI,
165 "script -F pid,event,ip",
166 /*Wait = */false);
167 else
168 launchPerfProcess("branch events",
169 MainEventsPPI,
170 "script -F pid,ip,brstack",
171 /*Wait = */false);
172
173 // Note: we launch script for mem events regardless of the option, as the
174 // command fails fairly fast if mem events were not collected.
175 launchPerfProcess("mem events",
176 MemEventsPPI,
177 "script -F pid,event,addr,ip",
178 /*Wait = */false);
179
180 launchPerfProcess("process events",
181 MMapEventsPPI,
182 "script --show-mmap-events",
183 /*Wait = */false);
184
185 launchPerfProcess("task events",
186 TaskEventsPPI,
187 "script --show-task-events",
188 /*Wait = */false);
189 }
190
abort()191 void DataAggregator::abort() {
192 if (opts::ReadPreAggregated)
193 return;
194
195 std::string Error;
196
197 // Kill subprocesses in case they are not finished
198 sys::Wait(TaskEventsPPI.PI, 1, false, &Error);
199 sys::Wait(MMapEventsPPI.PI, 1, false, &Error);
200 sys::Wait(MainEventsPPI.PI, 1, false, &Error);
201 sys::Wait(MemEventsPPI.PI, 1, false, &Error);
202
203 deleteTempFiles();
204
205 exit(1);
206 }
207
launchPerfProcess(StringRef Name,PerfProcessInfo & PPI,const char * ArgsString,bool Wait)208 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
209 const char *ArgsString, bool Wait) {
210 SmallVector<StringRef, 4> Argv;
211
212 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
213 Argv.push_back(PerfPath.data());
214
215 char *WritableArgsString = strdup(ArgsString);
216 char *Str = WritableArgsString;
217 do {
218 Argv.push_back(Str);
219 while (*Str && *Str != ' ')
220 ++Str;
221 if (!*Str)
222 break;
223 *Str++ = 0;
224 } while (true);
225
226 Argv.push_back("-f");
227 Argv.push_back("-i");
228 Argv.push_back(Filename.c_str());
229
230 if (std::error_code Errc =
231 sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
232 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
233 << " with error " << Errc.message() << "\n";
234 exit(1);
235 }
236 TempFiles.push_back(PPI.StdoutPath.data());
237
238 if (std::error_code Errc =
239 sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
240 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
241 << " with error " << Errc.message() << "\n";
242 exit(1);
243 }
244 TempFiles.push_back(PPI.StderrPath.data());
245
246 Optional<StringRef> Redirects[] = {
247 llvm::None, // Stdin
248 StringRef(PPI.StdoutPath.data()), // Stdout
249 StringRef(PPI.StderrPath.data())}; // Stderr
250
251 LLVM_DEBUG({
252 dbgs() << "Launching perf: ";
253 for (StringRef Arg : Argv)
254 dbgs() << Arg << " ";
255 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
256 << "\n";
257 });
258
259 if (Wait)
260 PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
261 /*envp*/ llvm::None, Redirects);
262 else
263 PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None,
264 Redirects);
265
266 free(WritableArgsString);
267 }
268
processFileBuildID(StringRef FileBuildID)269 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
270 PerfProcessInfo BuildIDProcessInfo;
271 launchPerfProcess("buildid list",
272 BuildIDProcessInfo,
273 "buildid-list",
274 /*Wait = */true);
275
276 if (BuildIDProcessInfo.PI.ReturnCode != 0) {
277 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
278 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
279 StringRef ErrBuf = (*MB)->getBuffer();
280
281 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
282 << '\n';
283 errs() << ErrBuf;
284 return;
285 }
286
287 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
288 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
289 if (std::error_code EC = MB.getError()) {
290 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
291 << EC.message() << "\n";
292 return;
293 }
294
295 FileBuf = std::move(*MB);
296 ParsingBuf = FileBuf->getBuffer();
297
298 Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
299 if (!FileName) {
300 if (hasAllBuildIDs()) {
301 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
302 "This indicates the input binary supplied for data aggregation "
303 "is not the same recorded by perf when collecting profiling "
304 "data, or there were no samples recorded for the binary. "
305 "Use -ignore-build-id option to override.\n";
306 if (!opts::IgnoreBuildID)
307 abort();
308 } else {
309 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
310 "data was recorded without it\n";
311 return;
312 }
313 } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
314 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
315 BuildIDBinaryName = std::string(*FileName);
316 } else {
317 outs() << "PERF2BOLT: matched build-id and file name\n";
318 }
319
320 return;
321 }
322
checkPerfDataMagic(StringRef FileName)323 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
324 if (opts::ReadPreAggregated)
325 return true;
326
327 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
328 if (!FD)
329 return false;
330
331 char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
332
333 auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
334 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
335 *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0);
336 if (!BytesRead || *BytesRead != 7)
337 return false;
338
339 if (strncmp(Buf, "PERFILE", 7) == 0)
340 return true;
341 return false;
342 }
343
parsePreAggregated()344 void DataAggregator::parsePreAggregated() {
345 std::string Error;
346
347 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
348 MemoryBuffer::getFileOrSTDIN(Filename);
349 if (std::error_code EC = MB.getError()) {
350 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
351 << EC.message() << "\n";
352 exit(1);
353 }
354
355 FileBuf = std::move(*MB);
356 ParsingBuf = FileBuf->getBuffer();
357 Col = 0;
358 Line = 1;
359 if (parsePreAggregatedLBRSamples()) {
360 errs() << "PERF2BOLT: failed to parse samples\n";
361 exit(1);
362 }
363 }
364
writeAutoFDOData(StringRef OutputFilename)365 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
366 outs() << "PERF2BOLT: writing data for autofdo tools...\n";
367 NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
368 TimerGroupDesc, opts::TimeAggregator);
369
370 std::error_code EC;
371 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
372 if (EC)
373 return EC;
374
375 // Format:
376 // number of unique traces
377 // from_1-to_1:count_1
378 // from_2-to_2:count_2
379 // ......
380 // from_n-to_n:count_n
381 // number of unique sample addresses
382 // addr_1:count_1
383 // addr_2:count_2
384 // ......
385 // addr_n:count_n
386 // number of unique LBR entries
387 // src_1->dst_1:count_1
388 // src_2->dst_2:count_2
389 // ......
390 // src_n->dst_n:count_n
391
392 const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
393
394 // AutoFDO addresses are relative to the first allocated loadable program
395 // segment
396 auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
397 if (Address < FirstAllocAddress)
398 return 0;
399 return Address - FirstAllocAddress;
400 };
401
402 OutFile << FallthroughLBRs.size() << "\n";
403 for (const auto &AggrLBR : FallthroughLBRs) {
404 const Trace &Trace = AggrLBR.first;
405 const FTInfo &Info = AggrLBR.second;
406 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-"
407 << Twine::utohexstr(filterAddress(Trace.To)) << ":"
408 << (Info.InternCount + Info.ExternCount) << "\n";
409 }
410
411 OutFile << BasicSamples.size() << "\n";
412 for (const auto &Sample : BasicSamples) {
413 uint64_t PC = Sample.first;
414 uint64_t HitCount = Sample.second;
415 OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n";
416 }
417
418 OutFile << BranchLBRs.size() << "\n";
419 for (const auto &AggrLBR : BranchLBRs) {
420 const Trace &Trace = AggrLBR.first;
421 const BranchInfo &Info = AggrLBR.second;
422 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->"
423 << Twine::utohexstr(filterAddress(Trace.To)) << ":"
424 << Info.TakenCount << "\n";
425 }
426
427 outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
428 << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
429 << " unique branches to " << OutputFilename << "\n";
430
431 return std::error_code();
432 }
433
filterBinaryMMapInfo()434 void DataAggregator::filterBinaryMMapInfo() {
435 if (opts::FilterPID) {
436 auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
437 if (MMapInfoIter != BinaryMMapInfo.end()) {
438 MMapInfo MMap = MMapInfoIter->second;
439 BinaryMMapInfo.clear();
440 BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
441 } else {
442 if (errs().has_colors())
443 errs().changeColor(raw_ostream::RED);
444 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
445 << opts::FilterPID << "\""
446 << " for binary \"" << BC->getFilename() << "\".";
447 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
448 errs() << " Profile for the following process is available:\n";
449 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
450 outs() << " " << MMI.second.PID
451 << (MMI.second.Forked ? " (forked)\n" : "\n");
452
453 if (errs().has_colors())
454 errs().resetColor();
455
456 exit(1);
457 }
458 }
459 }
460
preprocessProfile(BinaryContext & BC)461 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
462 this->BC = &BC;
463
464 if (opts::ReadPreAggregated) {
465 parsePreAggregated();
466 return Error::success();
467 }
468
469 if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) {
470 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
471 processFileBuildID(*FileBuildID);
472 } else {
473 errs() << "BOLT-WARNING: build-id will not be checked because we could "
474 "not read one from input binary\n";
475 }
476
477 auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) {
478 std::string Error;
479 outs() << "PERF2BOLT: waiting for perf " << Name
480 << " collection to finish...\n";
481 sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error);
482
483 if (!Error.empty()) {
484 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
485 deleteTempFiles();
486 exit(1);
487 }
488
489 if (PI.ReturnCode != 0) {
490 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
491 MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
492 StringRef ErrBuf = (*ErrorMB)->getBuffer();
493
494 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
495 errs() << ErrBuf;
496 deleteTempFiles();
497 exit(1);
498 }
499
500 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
501 MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
502 if (std::error_code EC = MB.getError()) {
503 errs() << "Cannot open " << Process.StdoutPath.data() << ": "
504 << EC.message() << "\n";
505 deleteTempFiles();
506 exit(1);
507 }
508
509 FileBuf = std::move(*MB);
510 ParsingBuf = FileBuf->getBuffer();
511 Col = 0;
512 Line = 1;
513 };
514
515 if (opts::LinuxKernelMode) {
516 // Current MMap parsing logic does not work with linux kernel.
517 // MMap entries for linux kernel uses PERF_RECORD_MMAP
518 // format instead of typical PERF_RECORD_MMAP2 format.
519 // Since linux kernel address mapping is absolute (same as
520 // in the ELF file), we avoid parsing MMap in linux kernel mode.
521 // While generating optimized linux kernel binary, we may need
522 // to parse MMap entries.
523
524 // In linux kernel mode, we analyze and optimize
525 // all linux kernel binary instructions, irrespective
526 // of whether they are due to system calls or due to
527 // interrupts. Therefore, we cannot ignore interrupt
528 // in Linux kernel mode.
529 opts::IgnoreInterruptLBR = false;
530 } else {
531 prepareToParse("mmap events", MMapEventsPPI);
532 if (parseMMapEvents())
533 errs() << "PERF2BOLT: failed to parse mmap events\n";
534 }
535
536 prepareToParse("task events", TaskEventsPPI);
537 if (parseTaskEvents())
538 errs() << "PERF2BOLT: failed to parse task events\n";
539
540 filterBinaryMMapInfo();
541 prepareToParse("events", MainEventsPPI);
542
543 if (opts::HeatmapMode) {
544 if (std::error_code EC = printLBRHeatMap()) {
545 errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
546 exit(1);
547 }
548 exit(0);
549 }
550
551 if ((!opts::BasicAggregation && parseBranchEvents()) ||
552 (opts::BasicAggregation && parseBasicEvents()))
553 errs() << "PERF2BOLT: failed to parse samples\n";
554
555 // We can finish early if the goal is just to generate data for autofdo
556 if (opts::WriteAutoFDOData) {
557 if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
558 errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
559
560 deleteTempFiles();
561 exit(0);
562 }
563
564 // Special handling for memory events
565 std::string Error;
566 sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error);
567 if (PI.ReturnCode != 0) {
568 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
569 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data());
570 StringRef ErrBuf = (*MB)->getBuffer();
571
572 deleteTempFiles();
573
574 Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
575 "Cannot print 'addr' field.");
576 if (!NoData.match(ErrBuf)) {
577 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n";
578 errs() << ErrBuf;
579 exit(1);
580 }
581 return Error::success();
582 }
583
584 ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
585 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data());
586 if (std::error_code EC = MB.getError()) {
587 errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": "
588 << EC.message() << "\n";
589 deleteTempFiles();
590 exit(1);
591 }
592
593 FileBuf = std::move(*MB);
594 ParsingBuf = FileBuf->getBuffer();
595 Col = 0;
596 Line = 1;
597 if (const std::error_code EC = parseMemEvents())
598 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
599 << '\n';
600
601 deleteTempFiles();
602
603 return Error::success();
604 }
605
readProfile(BinaryContext & BC)606 Error DataAggregator::readProfile(BinaryContext &BC) {
607 processProfile(BC);
608
609 for (auto &BFI : BC.getBinaryFunctions()) {
610 BinaryFunction &Function = BFI.second;
611 convertBranchData(Function);
612 }
613
614 if (opts::AggregateOnly) {
615 if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
616 report_error("cannot create output data file", EC);
617 }
618
619 return Error::success();
620 }
621
mayHaveProfileData(const BinaryFunction & Function)622 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
623 return Function.hasProfileAvailable();
624 }
625
processProfile(BinaryContext & BC)626 void DataAggregator::processProfile(BinaryContext &BC) {
627 if (opts::ReadPreAggregated)
628 processPreAggregated();
629 else if (opts::BasicAggregation)
630 processBasicEvents();
631 else
632 processBranchEvents();
633
634 processMemEvents();
635
636 // Mark all functions with registered events as having a valid profile.
637 for (auto &BFI : BC.getBinaryFunctions()) {
638 BinaryFunction &BF = BFI.second;
639 if (getBranchData(BF)) {
640 const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
641 : BinaryFunction::PF_LBR;
642 BF.markProfiled(Flags);
643 }
644 }
645
646 // Release intermediate storage.
647 clear(BranchLBRs);
648 clear(FallthroughLBRs);
649 clear(AggregatedLBRs);
650 clear(BasicSamples);
651 clear(MemSamples);
652 }
653
654 BinaryFunction *
getBinaryFunctionContainingAddress(uint64_t Address) const655 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
656 if (!BC->containsAddress(Address))
657 return nullptr;
658
659 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
660 /*UseMaxSize=*/true);
661 }
662
getLocationName(BinaryFunction & Func,uint64_t Count)663 StringRef DataAggregator::getLocationName(BinaryFunction &Func,
664 uint64_t Count) {
665 if (!BAT)
666 return Func.getOneName();
667
668 const BinaryFunction *OrigFunc = &Func;
669 if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
670 NumColdSamples += Count;
671 BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
672 if (HotFunc)
673 OrigFunc = HotFunc;
674 }
675 // If it is a local function, prefer the name containing the file name where
676 // the local function was declared
677 for (StringRef AlternativeName : OrigFunc->getNames()) {
678 size_t FileNameIdx = AlternativeName.find('/');
679 // Confirm the alternative name has the pattern Symbol/FileName/1 before
680 // using it
681 if (FileNameIdx == StringRef::npos ||
682 AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
683 continue;
684 return AlternativeName;
685 }
686 return OrigFunc->getOneName();
687 }
688
doSample(BinaryFunction & Func,uint64_t Address,uint64_t Count)689 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
690 uint64_t Count) {
691 auto I = NamesToSamples.find(Func.getOneName());
692 if (I == NamesToSamples.end()) {
693 bool Success;
694 StringRef LocName = getLocationName(Func, Count);
695 std::tie(I, Success) = NamesToSamples.insert(
696 std::make_pair(Func.getOneName(),
697 FuncSampleData(LocName, FuncSampleData::ContainerTy())));
698 }
699
700 Address -= Func.getAddress();
701 if (BAT)
702 Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false);
703
704 I->second.bumpCount(Address, Count);
705 return true;
706 }
707
doIntraBranch(BinaryFunction & Func,uint64_t From,uint64_t To,uint64_t Count,uint64_t Mispreds)708 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
709 uint64_t To, uint64_t Count,
710 uint64_t Mispreds) {
711 FuncBranchData *AggrData = getBranchData(Func);
712 if (!AggrData) {
713 AggrData = &NamesToBranches[Func.getOneName()];
714 AggrData->Name = getLocationName(Func, Count);
715 setBranchData(Func, AggrData);
716 }
717
718 From -= Func.getAddress();
719 To -= Func.getAddress();
720 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName()
721 << " @ " << Twine::utohexstr(From) << " -> "
722 << Func.getPrintName() << " @ " << Twine::utohexstr(To)
723 << '\n');
724 if (BAT) {
725 From = BAT->translate(Func, From, /*IsBranchSrc=*/true);
726 To = BAT->translate(Func, To, /*IsBranchSrc=*/false);
727 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
728 << Func.getPrintName() << " @ " << Twine::utohexstr(From)
729 << " -> " << Func.getPrintName() << " @ "
730 << Twine::utohexstr(To) << '\n');
731 }
732
733 AggrData->bumpBranchCount(From, To, Count, Mispreds);
734 return true;
735 }
736
doInterBranch(BinaryFunction * FromFunc,BinaryFunction * ToFunc,uint64_t From,uint64_t To,uint64_t Count,uint64_t Mispreds)737 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
738 BinaryFunction *ToFunc, uint64_t From,
739 uint64_t To, uint64_t Count,
740 uint64_t Mispreds) {
741 FuncBranchData *FromAggrData = nullptr;
742 FuncBranchData *ToAggrData = nullptr;
743 StringRef SrcFunc;
744 StringRef DstFunc;
745 if (FromFunc) {
746 SrcFunc = getLocationName(*FromFunc, Count);
747 FromAggrData = getBranchData(*FromFunc);
748 if (!FromAggrData) {
749 FromAggrData = &NamesToBranches[FromFunc->getOneName()];
750 FromAggrData->Name = SrcFunc;
751 setBranchData(*FromFunc, FromAggrData);
752 }
753 From -= FromFunc->getAddress();
754 if (BAT)
755 From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true);
756
757 recordExit(*FromFunc, From, Mispreds, Count);
758 }
759 if (ToFunc) {
760 DstFunc = getLocationName(*ToFunc, 0);
761 ToAggrData = getBranchData(*ToFunc);
762 if (!ToAggrData) {
763 ToAggrData = &NamesToBranches[ToFunc->getOneName()];
764 ToAggrData->Name = DstFunc;
765 setBranchData(*ToFunc, ToAggrData);
766 }
767 To -= ToFunc->getAddress();
768 if (BAT)
769 To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false);
770
771 recordEntry(*ToFunc, To, Mispreds, Count);
772 }
773
774 if (FromAggrData)
775 FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
776 Count, Mispreds);
777 if (ToAggrData)
778 ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
779 Count, Mispreds);
780 return true;
781 }
782
doBranch(uint64_t From,uint64_t To,uint64_t Count,uint64_t Mispreds)783 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
784 uint64_t Mispreds) {
785 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
786 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
787 if (!FromFunc && !ToFunc)
788 return false;
789
790 if (FromFunc == ToFunc) {
791 recordBranch(*FromFunc, From - FromFunc->getAddress(),
792 To - FromFunc->getAddress(), Count, Mispreds);
793 return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
794 }
795
796 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
797 }
798
doTrace(const LBREntry & First,const LBREntry & Second,uint64_t Count)799 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
800 uint64_t Count) {
801 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
802 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
803 if (!FromFunc || !ToFunc) {
804 LLVM_DEBUG(
805 dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
806 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
807 << " and ending in " << ToFunc->getPrintName() << " @ "
808 << ToFunc->getPrintName() << " @ "
809 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
810 NumLongRangeTraces += Count;
811 return false;
812 }
813 if (FromFunc != ToFunc) {
814 NumInvalidTraces += Count;
815 LLVM_DEBUG(
816 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
817 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
818 << " and ending in " << ToFunc->getPrintName() << " @ "
819 << ToFunc->getPrintName() << " @ "
820 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
821 return false;
822 }
823
824 Optional<BoltAddressTranslation::FallthroughListTy> FTs =
825 BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From)
826 : getFallthroughsInTrace(*FromFunc, First, Second, Count);
827 if (!FTs) {
828 LLVM_DEBUG(
829 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
830 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
831 << " and ending in " << ToFunc->getPrintName() << " @ "
832 << ToFunc->getPrintName() << " @ "
833 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
834 NumInvalidTraces += Count;
835 return false;
836 }
837
838 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
839 << FromFunc->getPrintName() << ":"
840 << Twine::utohexstr(First.To) << " to "
841 << Twine::utohexstr(Second.From) << ".\n");
842 for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
843 doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
844 Pair.second + FromFunc->getAddress(), Count, false);
845
846 return true;
847 }
848
recordTrace(BinaryFunction & BF,const LBREntry & FirstLBR,const LBREntry & SecondLBR,uint64_t Count,SmallVector<std::pair<uint64_t,uint64_t>,16> * Branches) const849 bool DataAggregator::recordTrace(
850 BinaryFunction &BF,
851 const LBREntry &FirstLBR,
852 const LBREntry &SecondLBR,
853 uint64_t Count,
854 SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const {
855 BinaryContext &BC = BF.getBinaryContext();
856
857 if (!BF.isSimple())
858 return false;
859
860 assert(BF.hasCFG() && "can only record traces in CFG state");
861
862 // Offsets of the trace within this function.
863 const uint64_t From = FirstLBR.To - BF.getAddress();
864 const uint64_t To = SecondLBR.From - BF.getAddress();
865
866 if (From > To)
867 return false;
868
869 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
870 BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
871
872 if (!FromBB || !ToBB)
873 return false;
874
875 // Adjust FromBB if the first LBR is a return from the last instruction in
876 // the previous block (that instruction should be a call).
877 if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
878 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
879 BinaryBasicBlock *PrevBB = BF.getLayout().getBlock(FromBB->getIndex() - 1);
880 if (PrevBB->getSuccessor(FromBB->getLabel())) {
881 const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
882 if (Instr && BC.MIB->isCall(*Instr))
883 FromBB = PrevBB;
884 else
885 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
886 << '\n');
887 } else {
888 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
889 }
890 }
891
892 // Fill out information for fall-through edges. The From and To could be
893 // within the same basic block, e.g. when two call instructions are in the
894 // same block. In this case we skip the processing.
895 if (FromBB == ToBB)
896 return true;
897
898 // Process blocks in the original layout order.
899 BinaryBasicBlock *BB = BF.getLayout().getBlock(FromBB->getIndex());
900 assert(BB == FromBB && "index mismatch");
901 while (BB != ToBB) {
902 BinaryBasicBlock *NextBB = BF.getLayout().getBlock(BB->getIndex() + 1);
903 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
904
905 // Check for bad LBRs.
906 if (!BB->getSuccessor(NextBB->getLabel())) {
907 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
908 << " " << FirstLBR << '\n'
909 << " " << SecondLBR << '\n');
910 return false;
911 }
912
913 // Record fall-through jumps
914 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB);
915 BI.Count += Count;
916
917 if (Branches) {
918 const MCInst *Instr = BB->getLastNonPseudoInstr();
919 uint64_t Offset = 0;
920 if (Instr)
921 Offset = BC.MIB->getOffsetWithDefault(*Instr, 0);
922 else
923 Offset = BB->getOffset();
924
925 Branches->emplace_back(Offset, NextBB->getOffset());
926 }
927
928 BB = NextBB;
929 }
930
931 return true;
932 }
933
934 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
getFallthroughsInTrace(BinaryFunction & BF,const LBREntry & FirstLBR,const LBREntry & SecondLBR,uint64_t Count) const935 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
936 const LBREntry &FirstLBR,
937 const LBREntry &SecondLBR,
938 uint64_t Count) const {
939 SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
940
941 if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res))
942 return NoneType();
943
944 return Res;
945 }
946
recordEntry(BinaryFunction & BF,uint64_t To,bool Mispred,uint64_t Count) const947 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
948 uint64_t Count) const {
949 if (To > BF.getSize())
950 return false;
951
952 if (!BF.hasProfile())
953 BF.ExecutionCount = 0;
954
955 BinaryBasicBlock *EntryBB = nullptr;
956 if (To == 0) {
957 BF.ExecutionCount += Count;
958 if (!BF.empty())
959 EntryBB = &BF.front();
960 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
961 if (BB->isEntryPoint())
962 EntryBB = BB;
963 }
964
965 if (EntryBB)
966 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
967
968 return true;
969 }
970
recordExit(BinaryFunction & BF,uint64_t From,bool Mispred,uint64_t Count) const971 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
972 uint64_t Count) const {
973 if (!BF.isSimple() || From > BF.getSize())
974 return false;
975
976 if (!BF.hasProfile())
977 BF.ExecutionCount = 0;
978
979 return true;
980 }
981
parseLBREntry()982 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
983 LBREntry Res;
984 ErrorOr<StringRef> FromStrRes = parseString('/');
985 if (std::error_code EC = FromStrRes.getError())
986 return EC;
987 StringRef OffsetStr = FromStrRes.get();
988 if (OffsetStr.getAsInteger(0, Res.From)) {
989 reportError("expected hexadecimal number with From address");
990 Diag << "Found: " << OffsetStr << "\n";
991 return make_error_code(llvm::errc::io_error);
992 }
993
994 ErrorOr<StringRef> ToStrRes = parseString('/');
995 if (std::error_code EC = ToStrRes.getError())
996 return EC;
997 OffsetStr = ToStrRes.get();
998 if (OffsetStr.getAsInteger(0, Res.To)) {
999 reportError("expected hexadecimal number with To address");
1000 Diag << "Found: " << OffsetStr << "\n";
1001 return make_error_code(llvm::errc::io_error);
1002 }
1003
1004 ErrorOr<StringRef> MispredStrRes = parseString('/');
1005 if (std::error_code EC = MispredStrRes.getError())
1006 return EC;
1007 StringRef MispredStr = MispredStrRes.get();
1008 if (MispredStr.size() != 1 ||
1009 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1010 reportError("expected single char for mispred bit");
1011 Diag << "Found: " << MispredStr << "\n";
1012 return make_error_code(llvm::errc::io_error);
1013 }
1014 Res.Mispred = MispredStr[0] == 'M';
1015
1016 static bool MispredWarning = true;
1017 if (MispredStr[0] == '-' && MispredWarning) {
1018 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1019 MispredWarning = false;
1020 }
1021
1022 ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1023 if (std::error_code EC = Rest.getError())
1024 return EC;
1025 if (Rest.get().size() < 5) {
1026 reportError("expected rest of LBR entry");
1027 Diag << "Found: " << Rest.get() << "\n";
1028 return make_error_code(llvm::errc::io_error);
1029 }
1030 return Res;
1031 }
1032
checkAndConsumeFS()1033 bool DataAggregator::checkAndConsumeFS() {
1034 if (ParsingBuf[0] != FieldSeparator)
1035 return false;
1036
1037 ParsingBuf = ParsingBuf.drop_front(1);
1038 Col += 1;
1039 return true;
1040 }
1041
consumeRestOfLine()1042 void DataAggregator::consumeRestOfLine() {
1043 size_t LineEnd = ParsingBuf.find_first_of('\n');
1044 if (LineEnd == StringRef::npos) {
1045 ParsingBuf = StringRef();
1046 Col = 0;
1047 Line += 1;
1048 return;
1049 }
1050 ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1051 Col = 0;
1052 Line += 1;
1053 }
1054
parseBranchSample()1055 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1056 PerfBranchSample Res;
1057
1058 while (checkAndConsumeFS()) {
1059 }
1060
1061 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1062 if (std::error_code EC = PIDRes.getError())
1063 return EC;
1064 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1065 if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) {
1066 consumeRestOfLine();
1067 return make_error_code(errc::no_such_process);
1068 }
1069
1070 while (checkAndConsumeFS()) {
1071 }
1072
1073 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1074 if (std::error_code EC = PCRes.getError())
1075 return EC;
1076 Res.PC = PCRes.get();
1077
1078 if (checkAndConsumeNewLine())
1079 return Res;
1080
1081 while (!checkAndConsumeNewLine()) {
1082 checkAndConsumeFS();
1083
1084 ErrorOr<LBREntry> LBRRes = parseLBREntry();
1085 if (std::error_code EC = LBRRes.getError())
1086 return EC;
1087 LBREntry LBR = LBRRes.get();
1088 if (ignoreKernelInterrupt(LBR))
1089 continue;
1090 if (!BC->HasFixedLoadAddress)
1091 adjustLBR(LBR, MMapInfoIter->second);
1092 Res.LBR.push_back(LBR);
1093 }
1094
1095 return Res;
1096 }
1097
parseBasicSample()1098 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1099 while (checkAndConsumeFS()) {
1100 }
1101
1102 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1103 if (std::error_code EC = PIDRes.getError())
1104 return EC;
1105
1106 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1107 if (MMapInfoIter == BinaryMMapInfo.end()) {
1108 consumeRestOfLine();
1109 return PerfBasicSample{StringRef(), 0};
1110 }
1111
1112 while (checkAndConsumeFS()) {
1113 }
1114
1115 ErrorOr<StringRef> Event = parseString(FieldSeparator);
1116 if (std::error_code EC = Event.getError())
1117 return EC;
1118
1119 while (checkAndConsumeFS()) {
1120 }
1121
1122 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1123 if (std::error_code EC = AddrRes.getError())
1124 return EC;
1125
1126 if (!checkAndConsumeNewLine()) {
1127 reportError("expected end of line");
1128 return make_error_code(llvm::errc::io_error);
1129 }
1130
1131 uint64_t Address = *AddrRes;
1132 if (!BC->HasFixedLoadAddress)
1133 adjustAddress(Address, MMapInfoIter->second);
1134
1135 return PerfBasicSample{Event.get(), Address};
1136 }
1137
parseMemSample()1138 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1139 PerfMemSample Res{0, 0};
1140
1141 while (checkAndConsumeFS()) {
1142 }
1143
1144 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1145 if (std::error_code EC = PIDRes.getError())
1146 return EC;
1147
1148 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1149 if (MMapInfoIter == BinaryMMapInfo.end()) {
1150 consumeRestOfLine();
1151 return Res;
1152 }
1153
1154 while (checkAndConsumeFS()) {
1155 }
1156
1157 ErrorOr<StringRef> Event = parseString(FieldSeparator);
1158 if (std::error_code EC = Event.getError())
1159 return EC;
1160 if (Event.get().find("mem-loads") == StringRef::npos) {
1161 consumeRestOfLine();
1162 return Res;
1163 }
1164
1165 while (checkAndConsumeFS()) {
1166 }
1167
1168 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1169 if (std::error_code EC = AddrRes.getError())
1170 return EC;
1171
1172 while (checkAndConsumeFS()) {
1173 }
1174
1175 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1176 if (std::error_code EC = PCRes.getError()) {
1177 consumeRestOfLine();
1178 return EC;
1179 }
1180
1181 if (!checkAndConsumeNewLine()) {
1182 reportError("expected end of line");
1183 return make_error_code(llvm::errc::io_error);
1184 }
1185
1186 uint64_t Address = *AddrRes;
1187 if (!BC->HasFixedLoadAddress)
1188 adjustAddress(Address, MMapInfoIter->second);
1189
1190 return PerfMemSample{PCRes.get(), Address};
1191 }
1192
parseLocationOrOffset()1193 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1194 auto parseOffset = [this]() -> ErrorOr<Location> {
1195 ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1196 if (std::error_code EC = Res.getError())
1197 return EC;
1198 return Location(Res.get());
1199 };
1200
1201 size_t Sep = ParsingBuf.find_first_of(" \n");
1202 if (Sep == StringRef::npos)
1203 return parseOffset();
1204 StringRef LookAhead = ParsingBuf.substr(0, Sep);
1205 if (LookAhead.find_first_of(":") == StringRef::npos)
1206 return parseOffset();
1207
1208 ErrorOr<StringRef> BuildID = parseString(':');
1209 if (std::error_code EC = BuildID.getError())
1210 return EC;
1211 ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1212 if (std::error_code EC = Offset.getError())
1213 return EC;
1214 return Location(true, BuildID.get(), Offset.get());
1215 }
1216
1217 ErrorOr<DataAggregator::AggregatedLBREntry>
parseAggregatedLBREntry()1218 DataAggregator::parseAggregatedLBREntry() {
1219 while (checkAndConsumeFS()) {
1220 }
1221
1222 ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1223 if (std::error_code EC = TypeOrErr.getError())
1224 return EC;
1225 auto Type = AggregatedLBREntry::BRANCH;
1226 if (TypeOrErr.get() == "B") {
1227 Type = AggregatedLBREntry::BRANCH;
1228 } else if (TypeOrErr.get() == "F") {
1229 Type = AggregatedLBREntry::FT;
1230 } else if (TypeOrErr.get() == "f") {
1231 Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1232 } else {
1233 reportError("expected B, F or f");
1234 return make_error_code(llvm::errc::io_error);
1235 }
1236
1237 while (checkAndConsumeFS()) {
1238 }
1239 ErrorOr<Location> From = parseLocationOrOffset();
1240 if (std::error_code EC = From.getError())
1241 return EC;
1242
1243 while (checkAndConsumeFS()) {
1244 }
1245 ErrorOr<Location> To = parseLocationOrOffset();
1246 if (std::error_code EC = To.getError())
1247 return EC;
1248
1249 while (checkAndConsumeFS()) {
1250 }
1251 ErrorOr<int64_t> Frequency =
1252 parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1253 if (std::error_code EC = Frequency.getError())
1254 return EC;
1255
1256 uint64_t Mispreds = 0;
1257 if (Type == AggregatedLBREntry::BRANCH) {
1258 while (checkAndConsumeFS()) {
1259 }
1260 ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1261 if (std::error_code EC = MispredsOrErr.getError())
1262 return EC;
1263 Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1264 }
1265
1266 if (!checkAndConsumeNewLine()) {
1267 reportError("expected end of line");
1268 return make_error_code(llvm::errc::io_error);
1269 }
1270
1271 return AggregatedLBREntry{From.get(), To.get(),
1272 static_cast<uint64_t>(Frequency.get()), Mispreds,
1273 Type};
1274 }
1275
ignoreKernelInterrupt(LBREntry & LBR) const1276 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1277 return opts::IgnoreInterruptLBR &&
1278 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1279 }
1280
printLBRHeatMap()1281 std::error_code DataAggregator::printLBRHeatMap() {
1282 outs() << "PERF2BOLT: parse branch events...\n";
1283 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1284 TimerGroupDesc, opts::TimeAggregator);
1285
1286 if (opts::LinuxKernelMode) {
1287 opts::HeatmapMaxAddress = 0xffffffffffffffff;
1288 opts::HeatmapMinAddress = KernelBaseAddr;
1289 }
1290 Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1291 opts::HeatmapMaxAddress, getTextSections(BC));
1292 uint64_t NumTotalSamples = 0;
1293
1294 if (opts::BasicAggregation) {
1295 while (hasData()) {
1296 ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1297 if (std::error_code EC = SampleRes.getError()) {
1298 if (EC == errc::no_such_process)
1299 continue;
1300 return EC;
1301 }
1302 PerfBasicSample &Sample = SampleRes.get();
1303 HM.registerAddress(Sample.PC);
1304 NumTotalSamples++;
1305 }
1306 outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
1307 } else {
1308 while (hasData()) {
1309 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1310 if (std::error_code EC = SampleRes.getError()) {
1311 if (EC == errc::no_such_process)
1312 continue;
1313 return EC;
1314 }
1315
1316 PerfBranchSample &Sample = SampleRes.get();
1317
1318 // LBRs are stored in reverse execution order. NextLBR refers to the next
1319 // executed branch record.
1320 const LBREntry *NextLBR = nullptr;
1321 for (const LBREntry &LBR : Sample.LBR) {
1322 if (NextLBR) {
1323 // Record fall-through trace.
1324 const uint64_t TraceFrom = LBR.To;
1325 const uint64_t TraceTo = NextLBR->From;
1326 ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1327 }
1328 NextLBR = &LBR;
1329 }
1330 if (!Sample.LBR.empty()) {
1331 HM.registerAddress(Sample.LBR.front().To);
1332 HM.registerAddress(Sample.LBR.back().From);
1333 }
1334 NumTotalSamples += Sample.LBR.size();
1335 }
1336 outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1337 outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1338 }
1339
1340 if (!NumTotalSamples) {
1341 if (opts::BasicAggregation) {
1342 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1343 "Cannot build heatmap.";
1344 } else {
1345 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1346 "Cannot build heatmap. Use -nl for building heatmap from "
1347 "basic events.\n";
1348 }
1349 exit(1);
1350 }
1351
1352 outs() << "HEATMAP: building heat map...\n";
1353
1354 for (const auto &LBR : FallthroughLBRs) {
1355 const Trace &Trace = LBR.first;
1356 const FTInfo &Info = LBR.second;
1357 HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1358 }
1359
1360 if (HM.getNumInvalidRanges())
1361 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1362
1363 if (!HM.size()) {
1364 errs() << "HEATMAP-ERROR: no valid traces registered\n";
1365 exit(1);
1366 }
1367
1368 HM.print(opts::OutputFilename);
1369 if (opts::OutputFilename == "-")
1370 HM.printCDF(opts::OutputFilename);
1371 else
1372 HM.printCDF(opts::OutputFilename + ".csv");
1373 if (opts::OutputFilename == "-")
1374 HM.printSectionHotness(opts::OutputFilename);
1375 else
1376 HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv");
1377
1378 return std::error_code();
1379 }
1380
parseBranchEvents()1381 std::error_code DataAggregator::parseBranchEvents() {
1382 outs() << "PERF2BOLT: parse branch events...\n";
1383 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1384 TimerGroupDesc, opts::TimeAggregator);
1385
1386 uint64_t NumTotalSamples = 0;
1387 uint64_t NumEntries = 0;
1388 uint64_t NumSamples = 0;
1389 uint64_t NumSamplesNoLBR = 0;
1390 uint64_t NumTraces = 0;
1391 bool NeedsSkylakeFix = false;
1392
1393 while (hasData() && NumTotalSamples < opts::MaxSamples) {
1394 ++NumTotalSamples;
1395
1396 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1397 if (std::error_code EC = SampleRes.getError()) {
1398 if (EC == errc::no_such_process)
1399 continue;
1400 return EC;
1401 }
1402 ++NumSamples;
1403
1404 PerfBranchSample &Sample = SampleRes.get();
1405 if (opts::WriteAutoFDOData)
1406 ++BasicSamples[Sample.PC];
1407
1408 if (Sample.LBR.empty()) {
1409 ++NumSamplesNoLBR;
1410 continue;
1411 }
1412
1413 NumEntries += Sample.LBR.size();
1414 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1415 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1416 NeedsSkylakeFix = true;
1417 }
1418
1419 // LBRs are stored in reverse execution order. NextPC refers to the next
1420 // recorded executed PC.
1421 uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1422 uint32_t NumEntry = 0;
1423 for (const LBREntry &LBR : Sample.LBR) {
1424 ++NumEntry;
1425 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1426 // sometimes record entry 32 as an exact copy of entry 31. This will cause
1427 // us to likely record an invalid trace and generate a stale function for
1428 // BAT mode (non BAT disassembles the function and is able to ignore this
1429 // trace at aggregation time). Drop first 2 entries (last two, in
1430 // chronological order)
1431 if (NeedsSkylakeFix && NumEntry <= 2)
1432 continue;
1433 if (NextPC) {
1434 // Record fall-through trace.
1435 const uint64_t TraceFrom = LBR.To;
1436 const uint64_t TraceTo = NextPC;
1437 const BinaryFunction *TraceBF =
1438 getBinaryFunctionContainingAddress(TraceFrom);
1439 if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1440 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1441 if (TraceBF->containsAddress(LBR.From))
1442 ++Info.InternCount;
1443 else
1444 ++Info.ExternCount;
1445 } else {
1446 if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) {
1447 LLVM_DEBUG(dbgs()
1448 << "Invalid trace starting in "
1449 << TraceBF->getPrintName() << " @ "
1450 << Twine::utohexstr(TraceFrom - TraceBF->getAddress())
1451 << " and ending @ " << Twine::utohexstr(TraceTo)
1452 << '\n');
1453 ++NumInvalidTraces;
1454 } else {
1455 LLVM_DEBUG(dbgs()
1456 << "Out of range trace starting in "
1457 << (TraceBF ? TraceBF->getPrintName() : "None") << " @ "
1458 << Twine::utohexstr(
1459 TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1460 << " and ending in "
1461 << (getBinaryFunctionContainingAddress(TraceTo)
1462 ? getBinaryFunctionContainingAddress(TraceTo)
1463 ->getPrintName()
1464 : "None")
1465 << " @ "
1466 << Twine::utohexstr(
1467 TraceTo -
1468 (getBinaryFunctionContainingAddress(TraceTo)
1469 ? getBinaryFunctionContainingAddress(TraceTo)
1470 ->getAddress()
1471 : 0))
1472 << '\n');
1473 ++NumLongRangeTraces;
1474 }
1475 }
1476 ++NumTraces;
1477 }
1478 NextPC = LBR.From;
1479
1480 uint64_t From = LBR.From;
1481 if (!getBinaryFunctionContainingAddress(From))
1482 From = 0;
1483 uint64_t To = LBR.To;
1484 if (!getBinaryFunctionContainingAddress(To))
1485 To = 0;
1486 if (!From && !To)
1487 continue;
1488 BranchInfo &Info = BranchLBRs[Trace(From, To)];
1489 ++Info.TakenCount;
1490 Info.MispredCount += LBR.Mispred;
1491 }
1492 }
1493
1494 for (const auto &LBR : BranchLBRs) {
1495 const Trace &Trace = LBR.first;
1496 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From))
1497 BF->setHasProfileAvailable();
1498 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To))
1499 BF->setHasProfileAvailable();
1500 }
1501
1502 auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1503 OS << " (";
1504 if (OS.has_colors()) {
1505 if (Percent > T2)
1506 OS.changeColor(raw_ostream::RED);
1507 else if (Percent > T1)
1508 OS.changeColor(raw_ostream::YELLOW);
1509 else
1510 OS.changeColor(raw_ostream::GREEN);
1511 }
1512 OS << format("%.1f%%", Percent);
1513 if (OS.has_colors())
1514 OS.resetColor();
1515 OS << ")";
1516 };
1517
1518 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1519 << " LBR entries\n";
1520 if (NumTotalSamples) {
1521 if (NumSamples && NumSamplesNoLBR == NumSamples) {
1522 // Note: we don't know if perf2bolt is being used to parse memory samples
1523 // at this point. In this case, it is OK to parse zero LBRs.
1524 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1525 "LBR. Record profile with perf record -j any or run perf2bolt "
1526 "in no-LBR mode with -nl (the performance improvement in -nl "
1527 "mode may be limited)\n";
1528 } else {
1529 const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1530 const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1531 outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1532 printColored(outs(), PercentIgnored, 20, 50);
1533 outs() << " were ignored\n";
1534 if (PercentIgnored > 50.0f)
1535 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1536 "were attributed to the input binary\n";
1537 }
1538 }
1539 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1540 << NumInvalidTraces;
1541 float Perc = 0.0f;
1542 if (NumTraces > 0) {
1543 Perc = NumInvalidTraces * 100.0f / NumTraces;
1544 printColored(outs(), Perc, 5, 10);
1545 }
1546 outs() << "\n";
1547 if (Perc > 10.0f)
1548 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1549 "binary is probably not the same binary used during profiling "
1550 "collection. The generated data may be ineffective for improving "
1551 "performance.\n\n";
1552
1553 outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1554 << NumLongRangeTraces;
1555 if (NumTraces > 0)
1556 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1557 outs() << "\n";
1558
1559 if (NumColdSamples > 0) {
1560 const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1561 outs() << "PERF2BOLT: " << NumColdSamples
1562 << format(" (%.1f%%)", ColdSamples)
1563 << " samples recorded in cold regions of split functions.\n";
1564 if (ColdSamples > 5.0f)
1565 outs()
1566 << "WARNING: The BOLT-processed binary where samples were collected "
1567 "likely used bad data or your service observed a large shift in "
1568 "profile. You may want to audit this.\n";
1569 }
1570
1571 return std::error_code();
1572 }
1573
processBranchEvents()1574 void DataAggregator::processBranchEvents() {
1575 outs() << "PERF2BOLT: processing branch events...\n";
1576 NamedRegionTimer T("processBranch", "Processing branch events",
1577 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1578
1579 for (const auto &AggrLBR : FallthroughLBRs) {
1580 const Trace &Loc = AggrLBR.first;
1581 const FTInfo &Info = AggrLBR.second;
1582 LBREntry First{Loc.From, Loc.From, false};
1583 LBREntry Second{Loc.To, Loc.To, false};
1584 if (Info.InternCount)
1585 doTrace(First, Second, Info.InternCount);
1586 if (Info.ExternCount) {
1587 First.From = 0;
1588 doTrace(First, Second, Info.ExternCount);
1589 }
1590 }
1591
1592 for (const auto &AggrLBR : BranchLBRs) {
1593 const Trace &Loc = AggrLBR.first;
1594 const BranchInfo &Info = AggrLBR.second;
1595 doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
1596 }
1597 }
1598
parseBasicEvents()1599 std::error_code DataAggregator::parseBasicEvents() {
1600 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1601 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1602 TimerGroupDesc, opts::TimeAggregator);
1603 while (hasData()) {
1604 ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1605 if (std::error_code EC = Sample.getError())
1606 return EC;
1607
1608 if (!Sample->PC)
1609 continue;
1610
1611 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1612 BF->setHasProfileAvailable();
1613
1614 ++BasicSamples[Sample->PC];
1615 EventNames.insert(Sample->EventName);
1616 }
1617
1618 return std::error_code();
1619 }
1620
processBasicEvents()1621 void DataAggregator::processBasicEvents() {
1622 outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1623 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1624 TimerGroupDesc, opts::TimeAggregator);
1625 uint64_t OutOfRangeSamples = 0;
1626 uint64_t NumSamples = 0;
1627 for (auto &Sample : BasicSamples) {
1628 const uint64_t PC = Sample.first;
1629 const uint64_t HitCount = Sample.second;
1630 NumSamples += HitCount;
1631 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1632 if (!Func) {
1633 OutOfRangeSamples += HitCount;
1634 continue;
1635 }
1636
1637 doSample(*Func, PC, HitCount);
1638 }
1639 outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1640
1641 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1642 << OutOfRangeSamples;
1643 float Perc = 0.0f;
1644 if (NumSamples > 0) {
1645 outs() << " (";
1646 Perc = OutOfRangeSamples * 100.0f / NumSamples;
1647 if (outs().has_colors()) {
1648 if (Perc > 60.0f)
1649 outs().changeColor(raw_ostream::RED);
1650 else if (Perc > 40.0f)
1651 outs().changeColor(raw_ostream::YELLOW);
1652 else
1653 outs().changeColor(raw_ostream::GREEN);
1654 }
1655 outs() << format("%.1f%%", Perc);
1656 if (outs().has_colors())
1657 outs().resetColor();
1658 outs() << ")";
1659 }
1660 outs() << "\n";
1661 if (Perc > 80.0f)
1662 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1663 "binary is probably not the same binary used during profiling "
1664 "collection. The generated data may be ineffective for improving "
1665 "performance.\n\n";
1666 }
1667
parseMemEvents()1668 std::error_code DataAggregator::parseMemEvents() {
1669 outs() << "PERF2BOLT: parsing memory events...\n";
1670 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1671 TimerGroupDesc, opts::TimeAggregator);
1672 while (hasData()) {
1673 ErrorOr<PerfMemSample> Sample = parseMemSample();
1674 if (std::error_code EC = Sample.getError())
1675 return EC;
1676
1677 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1678 BF->setHasProfileAvailable();
1679
1680 MemSamples.emplace_back(std::move(Sample.get()));
1681 }
1682
1683 return std::error_code();
1684 }
1685
processMemEvents()1686 void DataAggregator::processMemEvents() {
1687 NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1688 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1689 for (const PerfMemSample &Sample : MemSamples) {
1690 uint64_t PC = Sample.PC;
1691 uint64_t Addr = Sample.Addr;
1692 StringRef FuncName;
1693 StringRef MemName;
1694
1695 // Try to resolve symbol for PC
1696 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1697 if (!Func) {
1698 LLVM_DEBUG(if (PC != 0) {
1699 dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x"
1700 << Twine::utohexstr(Addr) << "\n";
1701 });
1702 continue;
1703 }
1704
1705 FuncName = Func->getOneName();
1706 PC -= Func->getAddress();
1707
1708 // Try to resolve symbol for memory load
1709 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1710 MemName = BD->getName();
1711 Addr -= BD->getAddress();
1712 } else if (opts::FilterMemProfile) {
1713 // Filter out heap/stack accesses
1714 continue;
1715 }
1716
1717 const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1718 const Location AddrLoc(!MemName.empty(), MemName, Addr);
1719
1720 FuncMemData *MemData = &NamesToMemEvents[FuncName];
1721 setMemData(*Func, MemData);
1722 MemData->update(FuncLoc, AddrLoc);
1723 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1724 }
1725 }
1726
parsePreAggregatedLBRSamples()1727 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1728 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1729 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1730 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1731 while (hasData()) {
1732 ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1733 if (std::error_code EC = AggrEntry.getError())
1734 return EC;
1735
1736 if (BinaryFunction *BF =
1737 getBinaryFunctionContainingAddress(AggrEntry->From.Offset))
1738 BF->setHasProfileAvailable();
1739 if (BinaryFunction *BF =
1740 getBinaryFunctionContainingAddress(AggrEntry->To.Offset))
1741 BF->setHasProfileAvailable();
1742
1743 AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1744 }
1745
1746 return std::error_code();
1747 }
1748
processPreAggregated()1749 void DataAggregator::processPreAggregated() {
1750 outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1751 NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1752 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1753
1754 uint64_t NumTraces = 0;
1755 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1756 switch (AggrEntry.EntryType) {
1757 case AggregatedLBREntry::BRANCH:
1758 doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1759 AggrEntry.Mispreds);
1760 break;
1761 case AggregatedLBREntry::FT:
1762 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1763 LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1764 ? AggrEntry.From.Offset
1765 : 0,
1766 AggrEntry.From.Offset, false};
1767 LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1768 doTrace(First, Second, AggrEntry.Count);
1769 NumTraces += AggrEntry.Count;
1770 break;
1771 }
1772 }
1773 }
1774
1775 outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1776 << " aggregated LBR entries\n";
1777 outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1778 << NumInvalidTraces;
1779 float Perc = 0.0f;
1780 if (NumTraces > 0) {
1781 outs() << " (";
1782 Perc = NumInvalidTraces * 100.0f / NumTraces;
1783 if (outs().has_colors()) {
1784 if (Perc > 10.0f)
1785 outs().changeColor(raw_ostream::RED);
1786 else if (Perc > 5.0f)
1787 outs().changeColor(raw_ostream::YELLOW);
1788 else
1789 outs().changeColor(raw_ostream::GREEN);
1790 }
1791 outs() << format("%.1f%%", Perc);
1792 if (outs().has_colors())
1793 outs().resetColor();
1794 outs() << ")";
1795 }
1796 outs() << "\n";
1797 if (Perc > 10.0f)
1798 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1799 "binary is probably not the same binary used during profiling "
1800 "collection. The generated data may be ineffective for improving "
1801 "performance.\n\n";
1802
1803 outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1804 << NumLongRangeTraces;
1805 if (NumTraces > 0)
1806 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1807 outs() << "\n";
1808 }
1809
parseCommExecEvent()1810 Optional<int32_t> DataAggregator::parseCommExecEvent() {
1811 size_t LineEnd = ParsingBuf.find_first_of("\n");
1812 if (LineEnd == StringRef::npos) {
1813 reportError("expected rest of line");
1814 Diag << "Found: " << ParsingBuf << "\n";
1815 return NoneType();
1816 }
1817 StringRef Line = ParsingBuf.substr(0, LineEnd);
1818
1819 size_t Pos = Line.find("PERF_RECORD_COMM exec");
1820 if (Pos == StringRef::npos)
1821 return NoneType();
1822 Line = Line.drop_front(Pos);
1823
1824 // Line:
1825 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1826 StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1827 int32_t PID;
1828 if (PIDStr.getAsInteger(10, PID)) {
1829 reportError("expected PID");
1830 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1831 return NoneType();
1832 }
1833
1834 return PID;
1835 }
1836
1837 namespace {
parsePerfTime(const StringRef TimeStr)1838 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1839 const StringRef SecTimeStr = TimeStr.split('.').first;
1840 const StringRef USecTimeStr = TimeStr.split('.').second;
1841 uint64_t SecTime;
1842 uint64_t USecTime;
1843 if (SecTimeStr.getAsInteger(10, SecTime) ||
1844 USecTimeStr.getAsInteger(10, USecTime))
1845 return NoneType();
1846 return SecTime * 1000000ULL + USecTime;
1847 }
1848 }
1849
parseForkEvent()1850 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1851 while (checkAndConsumeFS()) {
1852 }
1853
1854 size_t LineEnd = ParsingBuf.find_first_of("\n");
1855 if (LineEnd == StringRef::npos) {
1856 reportError("expected rest of line");
1857 Diag << "Found: " << ParsingBuf << "\n";
1858 return NoneType();
1859 }
1860 StringRef Line = ParsingBuf.substr(0, LineEnd);
1861
1862 size_t Pos = Line.find("PERF_RECORD_FORK");
1863 if (Pos == StringRef::npos) {
1864 consumeRestOfLine();
1865 return NoneType();
1866 }
1867
1868 ForkInfo FI;
1869
1870 const StringRef TimeStr =
1871 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1872 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1873 FI.Time = *TimeRes;
1874 }
1875
1876 Line = Line.drop_front(Pos);
1877
1878 // Line:
1879 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1880 const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1881 if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1882 reportError("expected PID");
1883 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1884 return NoneType();
1885 }
1886
1887 const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1888 if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1889 reportError("expected PID");
1890 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1891 return NoneType();
1892 }
1893
1894 consumeRestOfLine();
1895
1896 return FI;
1897 }
1898
1899 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
parseMMapEvent()1900 DataAggregator::parseMMapEvent() {
1901 while (checkAndConsumeFS()) {
1902 }
1903
1904 MMapInfo ParsedInfo;
1905
1906 size_t LineEnd = ParsingBuf.find_first_of("\n");
1907 if (LineEnd == StringRef::npos) {
1908 reportError("expected rest of line");
1909 Diag << "Found: " << ParsingBuf << "\n";
1910 return make_error_code(llvm::errc::io_error);
1911 }
1912 StringRef Line = ParsingBuf.substr(0, LineEnd);
1913
1914 size_t Pos = Line.find("PERF_RECORD_MMAP2");
1915 if (Pos == StringRef::npos) {
1916 consumeRestOfLine();
1917 return std::make_pair(StringRef(), ParsedInfo);
1918 }
1919
1920 // Line:
1921 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1922
1923 const StringRef TimeStr =
1924 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1925 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1926 ParsedInfo.Time = *TimeRes;
1927
1928 Line = Line.drop_front(Pos);
1929
1930 // Line:
1931 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1932
1933 StringRef FileName = Line.rsplit(FieldSeparator).second;
1934 if (FileName.startswith("//") || FileName.startswith("[")) {
1935 consumeRestOfLine();
1936 return std::make_pair(StringRef(), ParsedInfo);
1937 }
1938 FileName = sys::path::filename(FileName);
1939
1940 const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
1941 if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
1942 reportError("expected PID");
1943 Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1944 return make_error_code(llvm::errc::io_error);
1945 }
1946
1947 const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
1948 if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) {
1949 reportError("expected base address");
1950 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1951 return make_error_code(llvm::errc::io_error);
1952 }
1953
1954 const StringRef SizeStr = Line.split('(').second.split(')').first;
1955 if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
1956 reportError("expected mmaped size");
1957 Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1958 return make_error_code(llvm::errc::io_error);
1959 }
1960
1961 const StringRef OffsetStr =
1962 Line.split('@').second.ltrim().split(FieldSeparator).first;
1963 if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
1964 reportError("expected mmaped page-aligned offset");
1965 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1966 return make_error_code(llvm::errc::io_error);
1967 }
1968
1969 consumeRestOfLine();
1970
1971 return std::make_pair(FileName, ParsedInfo);
1972 }
1973
parseMMapEvents()1974 std::error_code DataAggregator::parseMMapEvents() {
1975 outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1976 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1977 TimerGroupDesc, opts::TimeAggregator);
1978
1979 std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1980 while (hasData()) {
1981 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1982 if (std::error_code EC = FileMMapInfoRes.getError())
1983 return EC;
1984
1985 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1986 if (FileMMapInfo.second.PID == -1)
1987 continue;
1988
1989 // Consider only the first mapping of the file for any given PID
1990 bool PIDExists = false;
1991 auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
1992 for (auto MI = Range.first; MI != Range.second; ++MI) {
1993 if (MI->second.PID == FileMMapInfo.second.PID) {
1994 PIDExists = true;
1995 break;
1996 }
1997 }
1998 if (PIDExists)
1999 continue;
2000
2001 GlobalMMapInfo.insert(FileMMapInfo);
2002 }
2003
2004 LLVM_DEBUG({
2005 dbgs() << "FileName -> mmap info:\n";
2006 for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo)
2007 dbgs() << " " << Pair.first << " : " << Pair.second.PID << " [0x"
2008 << Twine::utohexstr(Pair.second.MMapAddress) << ", "
2009 << Twine::utohexstr(Pair.second.Size) << " @ "
2010 << Twine::utohexstr(Pair.second.Offset) << "]\n";
2011 });
2012
2013 StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
2014 if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
2015 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
2016 << "\" for profile matching\n";
2017 NameToUse = BuildIDBinaryName;
2018 }
2019
2020 auto Range = GlobalMMapInfo.equal_range(NameToUse);
2021 for (auto I = Range.first; I != Range.second; ++I) {
2022 MMapInfo &MMapInfo = I->second;
2023 if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
2024 // Check that the binary mapping matches one of the segments.
2025 bool MatchFound = false;
2026 for (auto &KV : BC->SegmentMapInfo) {
2027 SegmentInfo &SegInfo = KV.second;
2028 // The mapping is page-aligned and hence the MMapAddress could be
2029 // different from the segment start address. We cannot know the page
2030 // size of the mapping, but we know it should not exceed the segment
2031 // alignment value. Hence we are performing an approximate check.
2032 if (SegInfo.Address >= MMapInfo.MMapAddress &&
2033 SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) {
2034 MatchFound = true;
2035 break;
2036 }
2037 }
2038 if (!MatchFound) {
2039 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2040 << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n';
2041 continue;
2042 }
2043 }
2044
2045 // Set base address for shared objects.
2046 if (!BC->HasFixedLoadAddress) {
2047 Optional<uint64_t> BaseAddress =
2048 BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset);
2049 if (!BaseAddress) {
2050 errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2051 "binary when memory mapped at 0x"
2052 << Twine::utohexstr(MMapInfo.MMapAddress)
2053 << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
2054 << ". Ignoring profile data for this mapping\n";
2055 continue;
2056 } else {
2057 MMapInfo.BaseAddress = *BaseAddress;
2058 }
2059 }
2060
2061 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2062 }
2063
2064 if (BinaryMMapInfo.empty()) {
2065 if (errs().has_colors())
2066 errs().changeColor(raw_ostream::RED);
2067 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2068 << BC->getFilename() << "\".";
2069 if (!GlobalMMapInfo.empty()) {
2070 errs() << " Profile for the following binary name(s) is available:\n";
2071 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2072 I = GlobalMMapInfo.upper_bound(I->first))
2073 errs() << " " << I->first << '\n';
2074 errs() << "Please rename the input binary.\n";
2075 } else {
2076 errs() << " Failed to extract any binary name from a profile.\n";
2077 }
2078 if (errs().has_colors())
2079 errs().resetColor();
2080
2081 exit(1);
2082 }
2083
2084 return std::error_code();
2085 }
2086
parseTaskEvents()2087 std::error_code DataAggregator::parseTaskEvents() {
2088 outs() << "PERF2BOLT: parsing perf-script task events output\n";
2089 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2090 TimerGroupDesc, opts::TimeAggregator);
2091
2092 while (hasData()) {
2093 if (Optional<int32_t> CommInfo = parseCommExecEvent()) {
2094 // Remove forked child that ran execve
2095 auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2096 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2097 BinaryMMapInfo.erase(MMapInfoIter);
2098 consumeRestOfLine();
2099 continue;
2100 }
2101
2102 Optional<ForkInfo> ForkInfo = parseForkEvent();
2103 if (!ForkInfo)
2104 continue;
2105
2106 if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2107 continue;
2108
2109 if (ForkInfo->Time == 0) {
2110 // Process was forked and mmaped before perf ran. In this case the child
2111 // should have its own mmap entry unless it was execve'd.
2112 continue;
2113 }
2114
2115 auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2116 if (MMapInfoIter == BinaryMMapInfo.end())
2117 continue;
2118
2119 MMapInfo MMapInfo = MMapInfoIter->second;
2120 MMapInfo.PID = ForkInfo->ChildPID;
2121 MMapInfo.Forked = true;
2122 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2123 }
2124
2125 outs() << "PERF2BOLT: input binary is associated with "
2126 << BinaryMMapInfo.size() << " PID(s)\n";
2127
2128 LLVM_DEBUG({
2129 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
2130 outs() << " " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "")
2131 << ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x"
2132 << Twine::utohexstr(MMI.second.Size) << ")\n";
2133 });
2134
2135 return std::error_code();
2136 }
2137
2138 Optional<std::pair<StringRef, StringRef>>
parseNameBuildIDPair()2139 DataAggregator::parseNameBuildIDPair() {
2140 while (checkAndConsumeFS()) {
2141 }
2142
2143 ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2144 if (std::error_code EC = BuildIDStr.getError())
2145 return NoneType();
2146
2147 // If one of the strings is missing, don't issue a parsing error, but still
2148 // do not return a value.
2149 if (ParsingBuf[0] == '\n')
2150 return NoneType();
2151
2152 ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2153 if (std::error_code EC = NameStr.getError())
2154 return NoneType();
2155
2156 consumeRestOfLine();
2157 return std::make_pair(NameStr.get(), BuildIDStr.get());
2158 }
2159
hasAllBuildIDs()2160 bool DataAggregator::hasAllBuildIDs() {
2161 const StringRef SavedParsingBuf = ParsingBuf;
2162
2163 if (!hasData())
2164 return false;
2165
2166 bool HasInvalidEntries = false;
2167 while (hasData()) {
2168 if (!parseNameBuildIDPair()) {
2169 HasInvalidEntries = true;
2170 break;
2171 }
2172 }
2173
2174 ParsingBuf = SavedParsingBuf;
2175
2176 return !HasInvalidEntries;
2177 }
2178
2179 Optional<StringRef>
getFileNameForBuildID(StringRef FileBuildID)2180 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2181 const StringRef SavedParsingBuf = ParsingBuf;
2182
2183 StringRef FileName;
2184 while (hasData()) {
2185 Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair();
2186 if (!IDPair) {
2187 consumeRestOfLine();
2188 continue;
2189 }
2190
2191 if (IDPair->second.startswith(FileBuildID)) {
2192 FileName = sys::path::filename(IDPair->first);
2193 break;
2194 }
2195 }
2196
2197 ParsingBuf = SavedParsingBuf;
2198
2199 if (!FileName.empty())
2200 return FileName;
2201
2202 return NoneType();
2203 }
2204
2205 std::error_code
writeAggregatedFile(StringRef OutputFilename) const2206 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2207 std::error_code EC;
2208 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2209 if (EC)
2210 return EC;
2211
2212 bool WriteMemLocs = false;
2213
2214 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2215 if (WriteMemLocs)
2216 OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2217 else
2218 OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2219 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2220 << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2221 };
2222
2223 uint64_t BranchValues = 0;
2224 uint64_t MemValues = 0;
2225
2226 if (BAT)
2227 OutFile << "boltedcollection\n";
2228 if (opts::BasicAggregation) {
2229 OutFile << "no_lbr";
2230 for (const StringMapEntry<NoneType> &Entry : EventNames)
2231 OutFile << " " << Entry.getKey();
2232 OutFile << "\n";
2233
2234 for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
2235 for (const SampleInfo &SI : Func.getValue().Data) {
2236 writeLocation(SI.Loc);
2237 OutFile << SI.Hits << "\n";
2238 ++BranchValues;
2239 }
2240 }
2241 } else {
2242 for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
2243 for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) {
2244 writeLocation(BI.From);
2245 writeLocation(BI.To);
2246 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2247 ++BranchValues;
2248 }
2249 for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) {
2250 // Do not output if source is a known symbol, since this was already
2251 // accounted for in the source function
2252 if (BI.From.IsSymbol)
2253 continue;
2254 writeLocation(BI.From);
2255 writeLocation(BI.To);
2256 OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2257 ++BranchValues;
2258 }
2259 }
2260
2261 WriteMemLocs = true;
2262 for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
2263 for (const MemInfo &MemEvent : Func.getValue().Data) {
2264 writeLocation(MemEvent.Offset);
2265 writeLocation(MemEvent.Addr);
2266 OutFile << MemEvent.Count << "\n";
2267 ++MemValues;
2268 }
2269 }
2270 }
2271
2272 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2273 << " memory objects to " << OutputFilename << "\n";
2274
2275 return std::error_code();
2276 }
2277
dump() const2278 void DataAggregator::dump() const { DataReader::dump(); }
2279
dump(const LBREntry & LBR) const2280 void DataAggregator::dump(const LBREntry &LBR) const {
2281 Diag << "From: " << Twine::utohexstr(LBR.From)
2282 << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2283 << "\n";
2284 }
2285
dump(const PerfBranchSample & Sample) const2286 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2287 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2288 for (const LBREntry &LBR : Sample.LBR)
2289 dump(LBR);
2290 }
2291
dump(const PerfMemSample & Sample) const2292 void DataAggregator::dump(const PerfMemSample &Sample) const {
2293 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
2294 }
2295