1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions reads profile data written by perf record, 10 // aggregate it and then write it back to an output file. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "bolt/Profile/DataAggregator.h" 15 #include "bolt/Core/BinaryContext.h" 16 #include "bolt/Core/BinaryFunction.h" 17 #include "bolt/Profile/BoltAddressTranslation.h" 18 #include "bolt/Profile/Heatmap.h" 19 #include "bolt/Utils/CommandLineOpts.h" 20 #include "bolt/Utils/Utils.h" 21 #include "llvm/ADT/ScopeExit.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/Errc.h" 25 #include "llvm/Support/FileSystem.h" 26 #include "llvm/Support/Process.h" 27 #include "llvm/Support/Program.h" 28 #include "llvm/Support/Regex.h" 29 #include "llvm/Support/Timer.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include <map> 32 #include <unordered_map> 33 #include <utility> 34 35 #define DEBUG_TYPE "aggregator" 36 37 using namespace llvm; 38 using namespace bolt; 39 40 namespace opts { 41 42 static cl::opt<bool> 43 BasicAggregation("nl", 44 cl::desc("aggregate basic samples (without LBR info)"), 45 cl::init(false), 46 cl::ZeroOrMore, 47 cl::cat(AggregatorCategory)); 48 49 static cl::opt<bool> 50 FilterMemProfile("filter-mem-profile", 51 cl::desc("if processing a memory profile, filter out stack or heap accesses " 52 "that won't be useful for BOLT to reduce profile file size"), 53 cl::init(true), 54 cl::cat(AggregatorCategory)); 55 56 static cl::opt<unsigned long long> 57 FilterPID("pid", 58 cl::desc("only use samples from process with specified PID"), 59 cl::init(0), 60 cl::Optional, 61 cl::cat(AggregatorCategory)); 62 63 static cl::opt<bool> 64 IgnoreBuildID("ignore-build-id", 65 cl::desc("continue even if build-ids in input binary and perf.data mismatch"), 66 cl::init(false), 67 cl::cat(AggregatorCategory)); 68 69 static cl::opt<bool> 70 IgnoreInterruptLBR("ignore-interrupt-lbr", 71 cl::desc("ignore kernel interrupt LBR that happens asynchronously"), 72 cl::init(true), 73 cl::ZeroOrMore, 74 cl::cat(AggregatorCategory)); 75 76 static cl::opt<unsigned long long> 77 MaxSamples("max-samples", 78 cl::init(-1ULL), 79 cl::desc("maximum number of samples to read from LBR profile"), 80 cl::Optional, 81 cl::Hidden, 82 cl::cat(AggregatorCategory)); 83 84 static cl::opt<bool> 85 ReadPreAggregated("pa", 86 cl::desc("skip perf and read data from a pre-aggregated file format"), 87 cl::init(false), 88 cl::ZeroOrMore, 89 cl::cat(AggregatorCategory)); 90 91 static cl::opt<bool> 92 TimeAggregator("time-aggr", 93 cl::desc("time BOLT aggregator"), 94 cl::init(false), 95 cl::ZeroOrMore, 96 cl::cat(AggregatorCategory)); 97 98 static cl::opt<bool> 99 UseEventPC("use-event-pc", 100 cl::desc("use event PC in combination with LBR sampling"), 101 cl::init(false), 102 cl::ZeroOrMore, 103 cl::cat(AggregatorCategory)); 104 105 static cl::opt<bool> 106 WriteAutoFDOData("autofdo", 107 cl::desc("generate autofdo textual data instead of bolt data"), 108 cl::init(false), 109 cl::ZeroOrMore, 110 cl::cat(AggregatorCategory)); 111 112 } // namespace opts 113 114 namespace { 115 116 const char TimerGroupName[] = "aggregator"; 117 const char TimerGroupDesc[] = "Aggregator"; 118 119 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) { 120 std::vector<SectionNameAndRange> sections; 121 for (BinarySection &Section : BC->sections()) { 122 if (!Section.isText()) 123 continue; 124 if (Section.getSize() == 0) 125 continue; 126 sections.push_back( 127 {Section.getName(), Section.getAddress(), Section.getEndAddress()}); 128 } 129 std::sort(sections.begin(), sections.end(), 130 [](const SectionNameAndRange &A, const SectionNameAndRange &B) { 131 return A.BeginAddress < B.BeginAddress; 132 }); 133 return sections; 134 } 135 } 136 137 constexpr uint64_t DataAggregator::KernelBaseAddr; 138 139 DataAggregator::~DataAggregator() { deleteTempFiles(); } 140 141 namespace { 142 void deleteTempFile(const std::string &FileName) { 143 if (std::error_code Errc = sys::fs::remove(FileName.c_str())) 144 errs() << "PERF2BOLT: failed to delete temporary file " << FileName 145 << " with error " << Errc.message() << "\n"; 146 } 147 } 148 149 void DataAggregator::deleteTempFiles() { 150 for (std::string &FileName : TempFiles) 151 deleteTempFile(FileName); 152 TempFiles.clear(); 153 } 154 155 void DataAggregator::findPerfExecutable() { 156 Optional<std::string> PerfExecutable = 157 sys::Process::FindInEnvPath("PATH", "perf"); 158 if (!PerfExecutable) { 159 outs() << "PERF2BOLT: No perf executable found!\n"; 160 exit(1); 161 } 162 PerfPath = *PerfExecutable; 163 } 164 165 void DataAggregator::start() { 166 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; 167 168 // Don't launch perf for pre-aggregated files 169 if (opts::ReadPreAggregated) 170 return; 171 172 findPerfExecutable(); 173 174 if (opts::BasicAggregation) 175 launchPerfProcess("events without LBR", 176 MainEventsPPI, 177 "script -F pid,event,ip", 178 /*Wait = */false); 179 else 180 launchPerfProcess("branch events", 181 MainEventsPPI, 182 "script -F pid,ip,brstack", 183 /*Wait = */false); 184 185 // Note: we launch script for mem events regardless of the option, as the 186 // command fails fairly fast if mem events were not collected. 187 launchPerfProcess("mem events", 188 MemEventsPPI, 189 "script -F pid,event,addr,ip", 190 /*Wait = */false); 191 192 launchPerfProcess("process events", 193 MMapEventsPPI, 194 "script --show-mmap-events", 195 /*Wait = */false); 196 197 launchPerfProcess("task events", 198 TaskEventsPPI, 199 "script --show-task-events", 200 /*Wait = */false); 201 } 202 203 void DataAggregator::abort() { 204 if (opts::ReadPreAggregated) 205 return; 206 207 std::string Error; 208 209 // Kill subprocesses in case they are not finished 210 sys::Wait(TaskEventsPPI.PI, 1, false, &Error); 211 sys::Wait(MMapEventsPPI.PI, 1, false, &Error); 212 sys::Wait(MainEventsPPI.PI, 1, false, &Error); 213 sys::Wait(MemEventsPPI.PI, 1, false, &Error); 214 215 deleteTempFiles(); 216 217 exit(1); 218 } 219 220 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, 221 const char *ArgsString, bool Wait) { 222 SmallVector<StringRef, 4> Argv; 223 224 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n'; 225 Argv.push_back(PerfPath.data()); 226 227 char *WritableArgsString = strdup(ArgsString); 228 char *Str = WritableArgsString; 229 do { 230 Argv.push_back(Str); 231 while (*Str && *Str != ' ') 232 ++Str; 233 if (!*Str) 234 break; 235 *Str++ = 0; 236 } while (true); 237 238 Argv.push_back("-f"); 239 Argv.push_back("-i"); 240 Argv.push_back(Filename.c_str()); 241 242 if (std::error_code Errc = 243 sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) { 244 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath 245 << " with error " << Errc.message() << "\n"; 246 exit(1); 247 } 248 TempFiles.push_back(PPI.StdoutPath.data()); 249 250 if (std::error_code Errc = 251 sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) { 252 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath 253 << " with error " << Errc.message() << "\n"; 254 exit(1); 255 } 256 TempFiles.push_back(PPI.StderrPath.data()); 257 258 Optional<StringRef> Redirects[] = { 259 llvm::None, // Stdin 260 StringRef(PPI.StdoutPath.data()), // Stdout 261 StringRef(PPI.StderrPath.data())}; // Stderr 262 263 LLVM_DEBUG({ 264 dbgs() << "Launching perf: "; 265 for (StringRef Arg : Argv) 266 dbgs() << Arg << " "; 267 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data() 268 << "\n"; 269 }); 270 271 if (Wait) 272 PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv, 273 /*envp*/ llvm::None, Redirects); 274 else 275 PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None, 276 Redirects); 277 278 free(WritableArgsString); 279 } 280 281 void DataAggregator::processFileBuildID(StringRef FileBuildID) { 282 PerfProcessInfo BuildIDProcessInfo; 283 launchPerfProcess("buildid list", 284 BuildIDProcessInfo, 285 "buildid-list", 286 /*Wait = */true); 287 288 if (BuildIDProcessInfo.PI.ReturnCode != 0) { 289 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 290 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data()); 291 StringRef ErrBuf = (*MB)->getBuffer(); 292 293 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode 294 << '\n'; 295 errs() << ErrBuf; 296 return; 297 } 298 299 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 300 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data()); 301 if (std::error_code EC = MB.getError()) { 302 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": " 303 << EC.message() << "\n"; 304 return; 305 } 306 307 FileBuf = std::move(*MB); 308 ParsingBuf = FileBuf->getBuffer(); 309 if (ParsingBuf.empty()) { 310 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf " 311 "data was recorded without it\n"; 312 return; 313 } 314 315 Col = 0; 316 Line = 1; 317 Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID); 318 if (!FileName) { 319 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. " 320 "This indicates the input binary supplied for data aggregation " 321 "is not the same recorded by perf when collecting profiling " 322 "data, or there were no samples recorded for the binary. " 323 "Use -ignore-build-id option to override.\n"; 324 if (!opts::IgnoreBuildID) 325 abort(); 326 } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) { 327 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n"; 328 BuildIDBinaryName = std::string(*FileName); 329 } else { 330 outs() << "PERF2BOLT: matched build-id and file name\n"; 331 } 332 333 return; 334 } 335 336 bool DataAggregator::checkPerfDataMagic(StringRef FileName) { 337 if (opts::ReadPreAggregated) 338 return true; 339 340 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName); 341 if (!FD) 342 return false; 343 344 char Buf[7] = {0, 0, 0, 0, 0, 0, 0}; 345 346 auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); }); 347 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice( 348 *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0); 349 if (!BytesRead || *BytesRead != 7) 350 return false; 351 352 if (strncmp(Buf, "PERFILE", 7) == 0) 353 return true; 354 return false; 355 } 356 357 void DataAggregator::parsePreAggregated() { 358 std::string Error; 359 360 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 361 MemoryBuffer::getFileOrSTDIN(Filename); 362 if (std::error_code EC = MB.getError()) { 363 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": " 364 << EC.message() << "\n"; 365 exit(1); 366 } 367 368 FileBuf = std::move(*MB); 369 ParsingBuf = FileBuf->getBuffer(); 370 Col = 0; 371 Line = 1; 372 if (parsePreAggregatedLBRSamples()) { 373 errs() << "PERF2BOLT: failed to parse samples\n"; 374 exit(1); 375 } 376 } 377 378 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) { 379 outs() << "PERF2BOLT: writing data for autofdo tools...\n"; 380 NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName, 381 TimerGroupDesc, opts::TimeAggregator); 382 383 std::error_code EC; 384 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 385 if (EC) 386 return EC; 387 388 // Format: 389 // number of unique traces 390 // from_1-to_1:count_1 391 // from_2-to_2:count_2 392 // ...... 393 // from_n-to_n:count_n 394 // number of unique sample addresses 395 // addr_1:count_1 396 // addr_2:count_2 397 // ...... 398 // addr_n:count_n 399 // number of unique LBR entries 400 // src_1->dst_1:count_1 401 // src_2->dst_2:count_2 402 // ...... 403 // src_n->dst_n:count_n 404 405 const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress; 406 407 // AutoFDO addresses are relative to the first allocated loadable program 408 // segment 409 auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t { 410 if (Address < FirstAllocAddress) 411 return 0; 412 return Address - FirstAllocAddress; 413 }; 414 415 OutFile << FallthroughLBRs.size() << "\n"; 416 for (const auto &AggrLBR : FallthroughLBRs) { 417 const Trace &Trace = AggrLBR.first; 418 const FTInfo &Info = AggrLBR.second; 419 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-" 420 << Twine::utohexstr(filterAddress(Trace.To)) << ":" 421 << (Info.InternCount + Info.ExternCount) << "\n"; 422 } 423 424 OutFile << BasicSamples.size() << "\n"; 425 for (const auto &Sample : BasicSamples) { 426 uint64_t PC = Sample.first; 427 uint64_t HitCount = Sample.second; 428 OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n"; 429 } 430 431 OutFile << BranchLBRs.size() << "\n"; 432 for (const auto &AggrLBR : BranchLBRs) { 433 const Trace &Trace = AggrLBR.first; 434 const BranchInfo &Info = AggrLBR.second; 435 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->" 436 << Twine::utohexstr(filterAddress(Trace.To)) << ":" 437 << Info.TakenCount << "\n"; 438 } 439 440 outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, " 441 << BasicSamples.size() << " sample addresses and " << BranchLBRs.size() 442 << " unique branches to " << OutputFilename << "\n"; 443 444 return std::error_code(); 445 } 446 447 void DataAggregator::filterBinaryMMapInfo() { 448 if (opts::FilterPID) { 449 auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID); 450 if (MMapInfoIter != BinaryMMapInfo.end()) { 451 MMapInfo MMap = MMapInfoIter->second; 452 BinaryMMapInfo.clear(); 453 BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap)); 454 } else { 455 if (errs().has_colors()) 456 errs().changeColor(raw_ostream::RED); 457 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \"" 458 << opts::FilterPID << "\"" 459 << " for binary \"" << BC->getFilename() << "\"."; 460 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary"); 461 errs() << " Profile for the following process is available:\n"; 462 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) 463 outs() << " " << MMI.second.PID 464 << (MMI.second.Forked ? " (forked)\n" : "\n"); 465 466 if (errs().has_colors()) 467 errs().resetColor(); 468 469 exit(1); 470 } 471 } 472 } 473 474 Error DataAggregator::preprocessProfile(BinaryContext &BC) { 475 this->BC = &BC; 476 477 if (opts::ReadPreAggregated) { 478 parsePreAggregated(); 479 return Error::success(); 480 } 481 482 if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) { 483 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; 484 processFileBuildID(*FileBuildID); 485 } else { 486 errs() << "BOLT-WARNING: build-id will not be checked because we could " 487 "not read one from input binary\n"; 488 } 489 490 auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) { 491 std::string Error; 492 outs() << "PERF2BOLT: waiting for perf " << Name 493 << " collection to finish...\n"; 494 sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error); 495 496 if (!Error.empty()) { 497 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n"; 498 deleteTempFiles(); 499 exit(1); 500 } 501 502 if (PI.ReturnCode != 0) { 503 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB = 504 MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data()); 505 StringRef ErrBuf = (*ErrorMB)->getBuffer(); 506 507 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n"; 508 errs() << ErrBuf; 509 deleteTempFiles(); 510 exit(1); 511 } 512 513 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 514 MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data()); 515 if (std::error_code EC = MB.getError()) { 516 errs() << "Cannot open " << Process.StdoutPath.data() << ": " 517 << EC.message() << "\n"; 518 deleteTempFiles(); 519 exit(1); 520 } 521 522 FileBuf = std::move(*MB); 523 ParsingBuf = FileBuf->getBuffer(); 524 Col = 0; 525 Line = 1; 526 }; 527 528 if (opts::LinuxKernelMode) { 529 // Current MMap parsing logic does not work with linux kernel. 530 // MMap entries for linux kernel uses PERF_RECORD_MMAP 531 // format instead of typical PERF_RECORD_MMAP2 format. 532 // Since linux kernel address mapping is absolute (same as 533 // in the ELF file), we avoid parsing MMap in linux kernel mode. 534 // While generating optimized linux kernel binary, we may need 535 // to parse MMap entries. 536 537 // In linux kernel mode, we analyze and optimize 538 // all linux kernel binary instructions, irrespective 539 // of whether they are due to system calls or due to 540 // interrupts. Therefore, we cannot ignore interrupt 541 // in Linux kernel mode. 542 opts::IgnoreInterruptLBR = false; 543 } else { 544 prepareToParse("mmap events", MMapEventsPPI); 545 if (parseMMapEvents()) 546 errs() << "PERF2BOLT: failed to parse mmap events\n"; 547 } 548 549 prepareToParse("task events", TaskEventsPPI); 550 if (parseTaskEvents()) 551 errs() << "PERF2BOLT: failed to parse task events\n"; 552 553 filterBinaryMMapInfo(); 554 prepareToParse("events", MainEventsPPI); 555 556 if (opts::HeatmapMode) { 557 if (std::error_code EC = printLBRHeatMap()) { 558 errs() << "ERROR: failed to print heat map: " << EC.message() << '\n'; 559 exit(1); 560 } 561 exit(0); 562 } 563 564 if ((!opts::BasicAggregation && parseBranchEvents()) || 565 (opts::BasicAggregation && parseBasicEvents())) 566 errs() << "PERF2BOLT: failed to parse samples\n"; 567 568 // We can finish early if the goal is just to generate data for autofdo 569 if (opts::WriteAutoFDOData) { 570 if (std::error_code EC = writeAutoFDOData(opts::OutputFilename)) 571 errs() << "Error writing autofdo data to file: " << EC.message() << "\n"; 572 573 deleteTempFiles(); 574 exit(0); 575 } 576 577 // Special handling for memory events 578 std::string Error; 579 sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error); 580 if (PI.ReturnCode != 0) { 581 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 582 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data()); 583 StringRef ErrBuf = (*MB)->getBuffer(); 584 585 deleteTempFiles(); 586 587 Regex NoData("Samples for '.*' event do not have ADDR attribute set. " 588 "Cannot print 'addr' field."); 589 if (!NoData.match(ErrBuf)) { 590 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n"; 591 errs() << ErrBuf; 592 exit(1); 593 } 594 return Error::success(); 595 } 596 597 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 598 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data()); 599 if (std::error_code EC = MB.getError()) { 600 errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": " 601 << EC.message() << "\n"; 602 deleteTempFiles(); 603 exit(1); 604 } 605 606 FileBuf = std::move(*MB); 607 ParsingBuf = FileBuf->getBuffer(); 608 Col = 0; 609 Line = 1; 610 if (const std::error_code EC = parseMemEvents()) 611 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message() 612 << '\n'; 613 614 deleteTempFiles(); 615 616 return Error::success(); 617 } 618 619 Error DataAggregator::readProfile(BinaryContext &BC) { 620 processProfile(BC); 621 622 for (auto &BFI : BC.getBinaryFunctions()) { 623 BinaryFunction &Function = BFI.second; 624 convertBranchData(Function); 625 } 626 627 if (opts::AggregateOnly) { 628 if (std::error_code EC = writeAggregatedFile(opts::OutputFilename)) 629 report_error("cannot create output data file", EC); 630 } 631 632 return Error::success(); 633 } 634 635 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { 636 return Function.hasProfileAvailable(); 637 } 638 639 void DataAggregator::processProfile(BinaryContext &BC) { 640 if (opts::ReadPreAggregated) 641 processPreAggregated(); 642 else if (opts::BasicAggregation) 643 processBasicEvents(); 644 else 645 processBranchEvents(); 646 647 processMemEvents(); 648 649 // Mark all functions with registered events as having a valid profile. 650 for (auto &BFI : BC.getBinaryFunctions()) { 651 BinaryFunction &BF = BFI.second; 652 if (getBranchData(BF)) { 653 const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE 654 : BinaryFunction::PF_LBR; 655 BF.markProfiled(Flags); 656 } 657 } 658 659 // Release intermediate storage. 660 clear(BranchLBRs); 661 clear(FallthroughLBRs); 662 clear(AggregatedLBRs); 663 clear(BasicSamples); 664 clear(MemSamples); 665 } 666 667 BinaryFunction * 668 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const { 669 if (!BC->containsAddress(Address)) 670 return nullptr; 671 672 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false, 673 /*UseMaxSize=*/true); 674 } 675 676 StringRef DataAggregator::getLocationName(BinaryFunction &Func, 677 uint64_t Count) { 678 if (!BAT) 679 return Func.getOneName(); 680 681 const BinaryFunction *OrigFunc = &Func; 682 if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) { 683 NumColdSamples += Count; 684 BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr); 685 if (HotFunc) 686 OrigFunc = HotFunc; 687 } 688 // If it is a local function, prefer the name containing the file name where 689 // the local function was declared 690 for (StringRef AlternativeName : OrigFunc->getNames()) { 691 size_t FileNameIdx = AlternativeName.find('/'); 692 // Confirm the alternative name has the pattern Symbol/FileName/1 before 693 // using it 694 if (FileNameIdx == StringRef::npos || 695 AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos) 696 continue; 697 return AlternativeName; 698 } 699 return OrigFunc->getOneName(); 700 } 701 702 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address, 703 uint64_t Count) { 704 auto I = NamesToSamples.find(Func.getOneName()); 705 if (I == NamesToSamples.end()) { 706 bool Success; 707 StringRef LocName = getLocationName(Func, Count); 708 std::tie(I, Success) = NamesToSamples.insert( 709 std::make_pair(Func.getOneName(), 710 FuncSampleData(LocName, FuncSampleData::ContainerTy()))); 711 } 712 713 Address -= Func.getAddress(); 714 if (BAT) 715 Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false); 716 717 I->second.bumpCount(Address, Count); 718 return true; 719 } 720 721 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From, 722 uint64_t To, uint64_t Count, 723 uint64_t Mispreds) { 724 FuncBranchData *AggrData = getBranchData(Func); 725 if (!AggrData) { 726 AggrData = &NamesToBranches[Func.getOneName()]; 727 AggrData->Name = getLocationName(Func, Count); 728 setBranchData(Func, AggrData); 729 } 730 731 From -= Func.getAddress(); 732 To -= Func.getAddress(); 733 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName() 734 << " @ " << Twine::utohexstr(From) << " -> " 735 << Func.getPrintName() << " @ " << Twine::utohexstr(To) 736 << '\n'); 737 if (BAT) { 738 From = BAT->translate(Func, From, /*IsBranchSrc=*/true); 739 To = BAT->translate(Func, To, /*IsBranchSrc=*/false); 740 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: " 741 << Func.getPrintName() << " @ " << Twine::utohexstr(From) 742 << " -> " << Func.getPrintName() << " @ " 743 << Twine::utohexstr(To) << '\n'); 744 } 745 746 AggrData->bumpBranchCount(From, To, Count, Mispreds); 747 return true; 748 } 749 750 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc, 751 BinaryFunction *ToFunc, uint64_t From, 752 uint64_t To, uint64_t Count, 753 uint64_t Mispreds) { 754 FuncBranchData *FromAggrData = nullptr; 755 FuncBranchData *ToAggrData = nullptr; 756 StringRef SrcFunc; 757 StringRef DstFunc; 758 if (FromFunc) { 759 SrcFunc = getLocationName(*FromFunc, Count); 760 FromAggrData = getBranchData(*FromFunc); 761 if (!FromAggrData) { 762 FromAggrData = &NamesToBranches[FromFunc->getOneName()]; 763 FromAggrData->Name = SrcFunc; 764 setBranchData(*FromFunc, FromAggrData); 765 } 766 From -= FromFunc->getAddress(); 767 if (BAT) 768 From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true); 769 770 recordExit(*FromFunc, From, Mispreds, Count); 771 } 772 if (ToFunc) { 773 DstFunc = getLocationName(*ToFunc, 0); 774 ToAggrData = getBranchData(*ToFunc); 775 if (!ToAggrData) { 776 ToAggrData = &NamesToBranches[ToFunc->getOneName()]; 777 ToAggrData->Name = DstFunc; 778 setBranchData(*ToFunc, ToAggrData); 779 } 780 To -= ToFunc->getAddress(); 781 if (BAT) 782 To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false); 783 784 recordEntry(*ToFunc, To, Mispreds, Count); 785 } 786 787 if (FromAggrData) 788 FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To), 789 Count, Mispreds); 790 if (ToAggrData) 791 ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To, 792 Count, Mispreds); 793 return true; 794 } 795 796 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, 797 uint64_t Mispreds) { 798 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From); 799 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To); 800 if (!FromFunc && !ToFunc) 801 return false; 802 803 if (FromFunc == ToFunc) { 804 recordBranch(*FromFunc, From - FromFunc->getAddress(), 805 To - FromFunc->getAddress(), Count, Mispreds); 806 return doIntraBranch(*FromFunc, From, To, Count, Mispreds); 807 } 808 809 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds); 810 } 811 812 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, 813 uint64_t Count) { 814 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To); 815 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From); 816 if (!FromFunc || !ToFunc) { 817 LLVM_DEBUG( 818 dbgs() << "Out of range trace starting in " << FromFunc->getPrintName() 819 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 820 << " and ending in " << ToFunc->getPrintName() << " @ " 821 << ToFunc->getPrintName() << " @ " 822 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 823 NumLongRangeTraces += Count; 824 return false; 825 } 826 if (FromFunc != ToFunc) { 827 NumInvalidTraces += Count; 828 LLVM_DEBUG( 829 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 830 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 831 << " and ending in " << ToFunc->getPrintName() << " @ " 832 << ToFunc->getPrintName() << " @ " 833 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 834 return false; 835 } 836 837 Optional<BoltAddressTranslation::FallthroughListTy> FTs = 838 BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From) 839 : getFallthroughsInTrace(*FromFunc, First, Second, Count); 840 if (!FTs) { 841 LLVM_DEBUG( 842 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 843 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 844 << " and ending in " << ToFunc->getPrintName() << " @ " 845 << ToFunc->getPrintName() << " @ " 846 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 847 NumInvalidTraces += Count; 848 return false; 849 } 850 851 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for " 852 << FromFunc->getPrintName() << ":" 853 << Twine::utohexstr(First.To) << " to " 854 << Twine::utohexstr(Second.From) << ".\n"); 855 for (const std::pair<uint64_t, uint64_t> &Pair : *FTs) 856 doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(), 857 Pair.second + FromFunc->getAddress(), Count, false); 858 859 return true; 860 } 861 862 bool DataAggregator::recordTrace( 863 BinaryFunction &BF, 864 const LBREntry &FirstLBR, 865 const LBREntry &SecondLBR, 866 uint64_t Count, 867 SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const { 868 BinaryContext &BC = BF.getBinaryContext(); 869 870 if (!BF.isSimple()) 871 return false; 872 873 assert(BF.hasCFG() && "can only record traces in CFG state"); 874 875 // Offsets of the trace within this function. 876 const uint64_t From = FirstLBR.To - BF.getAddress(); 877 const uint64_t To = SecondLBR.From - BF.getAddress(); 878 879 if (From > To) 880 return false; 881 882 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); 883 BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); 884 885 if (!FromBB || !ToBB) 886 return false; 887 888 // Adjust FromBB if the first LBR is a return from the last instruction in 889 // the previous block (that instruction should be a call). 890 if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && 891 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { 892 BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1]; 893 if (PrevBB->getSuccessor(FromBB->getLabel())) { 894 const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); 895 if (Instr && BC.MIB->isCall(*Instr)) 896 FromBB = PrevBB; 897 else 898 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR 899 << '\n'); 900 } else { 901 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n'); 902 } 903 } 904 905 // Fill out information for fall-through edges. The From and To could be 906 // within the same basic block, e.g. when two call instructions are in the 907 // same block. In this case we skip the processing. 908 if (FromBB == ToBB) 909 return true; 910 911 // Process blocks in the original layout order. 912 BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()]; 913 assert(BB == FromBB && "index mismatch"); 914 while (BB != ToBB) { 915 BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1]; 916 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout"); 917 918 // Check for bad LBRs. 919 if (!BB->getSuccessor(NextBB->getLabel())) { 920 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n" 921 << " " << FirstLBR << '\n' 922 << " " << SecondLBR << '\n'); 923 return false; 924 } 925 926 // Record fall-through jumps 927 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB); 928 BI.Count += Count; 929 930 if (Branches) { 931 const MCInst *Instr = BB->getLastNonPseudoInstr(); 932 uint64_t Offset = 0; 933 if (Instr) 934 Offset = BC.MIB->getOffsetWithDefault(*Instr, 0); 935 else 936 Offset = BB->getOffset(); 937 938 Branches->emplace_back(Offset, NextBB->getOffset()); 939 } 940 941 BB = NextBB; 942 } 943 944 return true; 945 } 946 947 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>> 948 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, 949 const LBREntry &FirstLBR, 950 const LBREntry &SecondLBR, 951 uint64_t Count) const { 952 SmallVector<std::pair<uint64_t, uint64_t>, 16> Res; 953 954 if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res)) 955 return NoneType(); 956 957 return Res; 958 } 959 960 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred, 961 uint64_t Count) const { 962 if (To > BF.getSize()) 963 return false; 964 965 if (!BF.hasProfile()) 966 BF.ExecutionCount = 0; 967 968 BinaryBasicBlock *EntryBB = nullptr; 969 if (To == 0) { 970 BF.ExecutionCount += Count; 971 if (!BF.empty()) 972 EntryBB = &BF.front(); 973 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) { 974 if (BB->isEntryPoint()) 975 EntryBB = BB; 976 } 977 978 if (EntryBB) 979 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count); 980 981 return true; 982 } 983 984 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred, 985 uint64_t Count) const { 986 if (!BF.isSimple() || From > BF.getSize()) 987 return false; 988 989 if (!BF.hasProfile()) 990 BF.ExecutionCount = 0; 991 992 return true; 993 } 994 995 ErrorOr<LBREntry> DataAggregator::parseLBREntry() { 996 LBREntry Res; 997 ErrorOr<StringRef> FromStrRes = parseString('/'); 998 if (std::error_code EC = FromStrRes.getError()) 999 return EC; 1000 StringRef OffsetStr = FromStrRes.get(); 1001 if (OffsetStr.getAsInteger(0, Res.From)) { 1002 reportError("expected hexadecimal number with From address"); 1003 Diag << "Found: " << OffsetStr << "\n"; 1004 return make_error_code(llvm::errc::io_error); 1005 } 1006 1007 ErrorOr<StringRef> ToStrRes = parseString('/'); 1008 if (std::error_code EC = ToStrRes.getError()) 1009 return EC; 1010 OffsetStr = ToStrRes.get(); 1011 if (OffsetStr.getAsInteger(0, Res.To)) { 1012 reportError("expected hexadecimal number with To address"); 1013 Diag << "Found: " << OffsetStr << "\n"; 1014 return make_error_code(llvm::errc::io_error); 1015 } 1016 1017 ErrorOr<StringRef> MispredStrRes = parseString('/'); 1018 if (std::error_code EC = MispredStrRes.getError()) 1019 return EC; 1020 StringRef MispredStr = MispredStrRes.get(); 1021 if (MispredStr.size() != 1 || 1022 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) { 1023 reportError("expected single char for mispred bit"); 1024 Diag << "Found: " << MispredStr << "\n"; 1025 return make_error_code(llvm::errc::io_error); 1026 } 1027 Res.Mispred = MispredStr[0] == 'M'; 1028 1029 static bool MispredWarning = true; 1030 if (MispredStr[0] == '-' && MispredWarning) { 1031 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n"; 1032 MispredWarning = false; 1033 } 1034 1035 ErrorOr<StringRef> Rest = parseString(FieldSeparator, true); 1036 if (std::error_code EC = Rest.getError()) 1037 return EC; 1038 if (Rest.get().size() < 5) { 1039 reportError("expected rest of LBR entry"); 1040 Diag << "Found: " << Rest.get() << "\n"; 1041 return make_error_code(llvm::errc::io_error); 1042 } 1043 return Res; 1044 } 1045 1046 bool DataAggregator::checkAndConsumeFS() { 1047 if (ParsingBuf[0] != FieldSeparator) 1048 return false; 1049 1050 ParsingBuf = ParsingBuf.drop_front(1); 1051 Col += 1; 1052 return true; 1053 } 1054 1055 void DataAggregator::consumeRestOfLine() { 1056 size_t LineEnd = ParsingBuf.find_first_of('\n'); 1057 if (LineEnd == StringRef::npos) { 1058 ParsingBuf = StringRef(); 1059 Col = 0; 1060 Line += 1; 1061 return; 1062 } 1063 ParsingBuf = ParsingBuf.drop_front(LineEnd + 1); 1064 Col = 0; 1065 Line += 1; 1066 } 1067 1068 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() { 1069 PerfBranchSample Res; 1070 1071 while (checkAndConsumeFS()) { 1072 } 1073 1074 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1075 if (std::error_code EC = PIDRes.getError()) 1076 return EC; 1077 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1078 if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) { 1079 consumeRestOfLine(); 1080 return make_error_code(errc::no_such_process); 1081 } 1082 1083 while (checkAndConsumeFS()) { 1084 } 1085 1086 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true); 1087 if (std::error_code EC = PCRes.getError()) 1088 return EC; 1089 Res.PC = PCRes.get(); 1090 1091 if (checkAndConsumeNewLine()) 1092 return Res; 1093 1094 while (!checkAndConsumeNewLine()) { 1095 checkAndConsumeFS(); 1096 1097 ErrorOr<LBREntry> LBRRes = parseLBREntry(); 1098 if (std::error_code EC = LBRRes.getError()) 1099 return EC; 1100 LBREntry LBR = LBRRes.get(); 1101 if (ignoreKernelInterrupt(LBR)) 1102 continue; 1103 if (!BC->HasFixedLoadAddress) 1104 adjustLBR(LBR, MMapInfoIter->second); 1105 Res.LBR.push_back(LBR); 1106 } 1107 1108 return Res; 1109 } 1110 1111 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() { 1112 while (checkAndConsumeFS()) { 1113 } 1114 1115 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1116 if (std::error_code EC = PIDRes.getError()) 1117 return EC; 1118 1119 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1120 if (MMapInfoIter == BinaryMMapInfo.end()) { 1121 consumeRestOfLine(); 1122 return PerfBasicSample{StringRef(), 0}; 1123 } 1124 1125 while (checkAndConsumeFS()) { 1126 } 1127 1128 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1129 if (std::error_code EC = Event.getError()) 1130 return EC; 1131 1132 while (checkAndConsumeFS()) { 1133 } 1134 1135 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true); 1136 if (std::error_code EC = AddrRes.getError()) 1137 return EC; 1138 1139 if (!checkAndConsumeNewLine()) { 1140 reportError("expected end of line"); 1141 return make_error_code(llvm::errc::io_error); 1142 } 1143 1144 uint64_t Address = *AddrRes; 1145 if (!BC->HasFixedLoadAddress) 1146 adjustAddress(Address, MMapInfoIter->second); 1147 1148 return PerfBasicSample{Event.get(), Address}; 1149 } 1150 1151 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() { 1152 PerfMemSample Res{0, 0}; 1153 1154 while (checkAndConsumeFS()) { 1155 } 1156 1157 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1158 if (std::error_code EC = PIDRes.getError()) 1159 return EC; 1160 1161 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1162 if (MMapInfoIter == BinaryMMapInfo.end()) { 1163 consumeRestOfLine(); 1164 return Res; 1165 } 1166 1167 while (checkAndConsumeFS()) { 1168 } 1169 1170 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1171 if (std::error_code EC = Event.getError()) 1172 return EC; 1173 if (Event.get().find("mem-loads") == StringRef::npos) { 1174 consumeRestOfLine(); 1175 return Res; 1176 } 1177 1178 while (checkAndConsumeFS()) { 1179 } 1180 1181 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator); 1182 if (std::error_code EC = AddrRes.getError()) 1183 return EC; 1184 1185 while (checkAndConsumeFS()) { 1186 } 1187 1188 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true); 1189 if (std::error_code EC = PCRes.getError()) { 1190 consumeRestOfLine(); 1191 return EC; 1192 } 1193 1194 if (!checkAndConsumeNewLine()) { 1195 reportError("expected end of line"); 1196 return make_error_code(llvm::errc::io_error); 1197 } 1198 1199 uint64_t Address = *AddrRes; 1200 if (!BC->HasFixedLoadAddress) 1201 adjustAddress(Address, MMapInfoIter->second); 1202 1203 return PerfMemSample{PCRes.get(), Address}; 1204 } 1205 1206 ErrorOr<Location> DataAggregator::parseLocationOrOffset() { 1207 auto parseOffset = [this]() -> ErrorOr<Location> { 1208 ErrorOr<uint64_t> Res = parseHexField(FieldSeparator); 1209 if (std::error_code EC = Res.getError()) 1210 return EC; 1211 return Location(Res.get()); 1212 }; 1213 1214 size_t Sep = ParsingBuf.find_first_of(" \n"); 1215 if (Sep == StringRef::npos) 1216 return parseOffset(); 1217 StringRef LookAhead = ParsingBuf.substr(0, Sep); 1218 if (LookAhead.find_first_of(":") == StringRef::npos) 1219 return parseOffset(); 1220 1221 ErrorOr<StringRef> BuildID = parseString(':'); 1222 if (std::error_code EC = BuildID.getError()) 1223 return EC; 1224 ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator); 1225 if (std::error_code EC = Offset.getError()) 1226 return EC; 1227 return Location(true, BuildID.get(), Offset.get()); 1228 } 1229 1230 ErrorOr<DataAggregator::AggregatedLBREntry> 1231 DataAggregator::parseAggregatedLBREntry() { 1232 while (checkAndConsumeFS()) { 1233 } 1234 1235 ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator); 1236 if (std::error_code EC = TypeOrErr.getError()) 1237 return EC; 1238 auto Type = AggregatedLBREntry::BRANCH; 1239 if (TypeOrErr.get() == "B") { 1240 Type = AggregatedLBREntry::BRANCH; 1241 } else if (TypeOrErr.get() == "F") { 1242 Type = AggregatedLBREntry::FT; 1243 } else if (TypeOrErr.get() == "f") { 1244 Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN; 1245 } else { 1246 reportError("expected B, F or f"); 1247 return make_error_code(llvm::errc::io_error); 1248 } 1249 1250 while (checkAndConsumeFS()) { 1251 } 1252 ErrorOr<Location> From = parseLocationOrOffset(); 1253 if (std::error_code EC = From.getError()) 1254 return EC; 1255 1256 while (checkAndConsumeFS()) { 1257 } 1258 ErrorOr<Location> To = parseLocationOrOffset(); 1259 if (std::error_code EC = To.getError()) 1260 return EC; 1261 1262 while (checkAndConsumeFS()) { 1263 } 1264 ErrorOr<int64_t> Frequency = 1265 parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH); 1266 if (std::error_code EC = Frequency.getError()) 1267 return EC; 1268 1269 uint64_t Mispreds = 0; 1270 if (Type == AggregatedLBREntry::BRANCH) { 1271 while (checkAndConsumeFS()) { 1272 } 1273 ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true); 1274 if (std::error_code EC = MispredsOrErr.getError()) 1275 return EC; 1276 Mispreds = static_cast<uint64_t>(MispredsOrErr.get()); 1277 } 1278 1279 if (!checkAndConsumeNewLine()) { 1280 reportError("expected end of line"); 1281 return make_error_code(llvm::errc::io_error); 1282 } 1283 1284 return AggregatedLBREntry{From.get(), To.get(), 1285 static_cast<uint64_t>(Frequency.get()), Mispreds, 1286 Type}; 1287 } 1288 1289 bool DataAggregator::hasData() { 1290 if (ParsingBuf.size() == 0) 1291 return false; 1292 1293 return true; 1294 } 1295 1296 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const { 1297 return opts::IgnoreInterruptLBR && 1298 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr); 1299 } 1300 1301 std::error_code DataAggregator::printLBRHeatMap() { 1302 outs() << "PERF2BOLT: parse branch events...\n"; 1303 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1304 TimerGroupDesc, opts::TimeAggregator); 1305 1306 if (opts::LinuxKernelMode) { 1307 opts::HeatmapMaxAddress = 0xffffffffffffffff; 1308 opts::HeatmapMinAddress = KernelBaseAddr; 1309 } 1310 Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress, 1311 opts::HeatmapMaxAddress, getTextSections(BC)); 1312 uint64_t NumTotalSamples = 0; 1313 1314 if (opts::BasicAggregation) { 1315 while (hasData()) { 1316 ErrorOr<PerfBasicSample> SampleRes = parseBasicSample(); 1317 if (std::error_code EC = SampleRes.getError()) { 1318 if (EC == errc::no_such_process) 1319 continue; 1320 return EC; 1321 } 1322 PerfBasicSample &Sample = SampleRes.get(); 1323 HM.registerAddress(Sample.PC); 1324 NumTotalSamples++; 1325 } 1326 outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n"; 1327 } else { 1328 while (hasData()) { 1329 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1330 if (std::error_code EC = SampleRes.getError()) { 1331 if (EC == errc::no_such_process) 1332 continue; 1333 return EC; 1334 } 1335 1336 PerfBranchSample &Sample = SampleRes.get(); 1337 1338 // LBRs are stored in reverse execution order. NextLBR refers to the next 1339 // executed branch record. 1340 const LBREntry *NextLBR = nullptr; 1341 for (const LBREntry &LBR : Sample.LBR) { 1342 if (NextLBR) { 1343 // Record fall-through trace. 1344 const uint64_t TraceFrom = LBR.To; 1345 const uint64_t TraceTo = NextLBR->From; 1346 ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount; 1347 } 1348 NextLBR = &LBR; 1349 } 1350 if (!Sample.LBR.empty()) { 1351 HM.registerAddress(Sample.LBR.front().To); 1352 HM.registerAddress(Sample.LBR.back().From); 1353 } 1354 NumTotalSamples += Sample.LBR.size(); 1355 } 1356 outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n"; 1357 outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n"; 1358 } 1359 1360 if (!NumTotalSamples) { 1361 if (opts::BasicAggregation) { 1362 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. " 1363 "Cannot build heatmap."; 1364 } else { 1365 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. " 1366 "Cannot build heatmap. Use -nl for building heatmap from " 1367 "basic events.\n"; 1368 } 1369 exit(1); 1370 } 1371 1372 outs() << "HEATMAP: building heat map...\n"; 1373 1374 for (const auto &LBR : FallthroughLBRs) { 1375 const Trace &Trace = LBR.first; 1376 const FTInfo &Info = LBR.second; 1377 HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount); 1378 } 1379 1380 if (HM.getNumInvalidRanges()) 1381 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n'; 1382 1383 if (!HM.size()) { 1384 errs() << "HEATMAP-ERROR: no valid traces registered\n"; 1385 exit(1); 1386 } 1387 1388 HM.print(opts::OutputFilename); 1389 if (opts::OutputFilename == "-") 1390 HM.printCDF(opts::OutputFilename); 1391 else 1392 HM.printCDF(opts::OutputFilename + ".csv"); 1393 if (opts::OutputFilename == "-") 1394 HM.printSectionHotness(opts::OutputFilename); 1395 else 1396 HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv"); 1397 1398 return std::error_code(); 1399 } 1400 1401 std::error_code DataAggregator::parseBranchEvents() { 1402 outs() << "PERF2BOLT: parse branch events...\n"; 1403 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1404 TimerGroupDesc, opts::TimeAggregator); 1405 1406 uint64_t NumTotalSamples = 0; 1407 uint64_t NumEntries = 0; 1408 uint64_t NumSamples = 0; 1409 uint64_t NumSamplesNoLBR = 0; 1410 uint64_t NumTraces = 0; 1411 bool NeedsSkylakeFix = false; 1412 1413 while (hasData() && NumTotalSamples < opts::MaxSamples) { 1414 ++NumTotalSamples; 1415 1416 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1417 if (std::error_code EC = SampleRes.getError()) { 1418 if (EC == errc::no_such_process) 1419 continue; 1420 return EC; 1421 } 1422 ++NumSamples; 1423 1424 PerfBranchSample &Sample = SampleRes.get(); 1425 if (opts::WriteAutoFDOData) 1426 ++BasicSamples[Sample.PC]; 1427 1428 if (Sample.LBR.empty()) { 1429 ++NumSamplesNoLBR; 1430 continue; 1431 } 1432 1433 NumEntries += Sample.LBR.size(); 1434 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) { 1435 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n"; 1436 NeedsSkylakeFix = true; 1437 } 1438 1439 // LBRs are stored in reverse execution order. NextPC refers to the next 1440 // recorded executed PC. 1441 uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0; 1442 uint32_t NumEntry = 0; 1443 for (const LBREntry &LBR : Sample.LBR) { 1444 ++NumEntry; 1445 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries) 1446 // sometimes record entry 32 as an exact copy of entry 31. This will cause 1447 // us to likely record an invalid trace and generate a stale function for 1448 // BAT mode (non BAT disassembles the function and is able to ignore this 1449 // trace at aggregation time). Drop first 2 entries (last two, in 1450 // chronological order) 1451 if (NeedsSkylakeFix && NumEntry <= 2) 1452 continue; 1453 if (NextPC) { 1454 // Record fall-through trace. 1455 const uint64_t TraceFrom = LBR.To; 1456 const uint64_t TraceTo = NextPC; 1457 const BinaryFunction *TraceBF = 1458 getBinaryFunctionContainingAddress(TraceFrom); 1459 if (TraceBF && TraceBF->containsAddress(TraceTo)) { 1460 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; 1461 if (TraceBF->containsAddress(LBR.From)) 1462 ++Info.InternCount; 1463 else 1464 ++Info.ExternCount; 1465 } else { 1466 if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) { 1467 LLVM_DEBUG(dbgs() 1468 << "Invalid trace starting in " 1469 << TraceBF->getPrintName() << " @ " 1470 << Twine::utohexstr(TraceFrom - TraceBF->getAddress()) 1471 << " and ending @ " << Twine::utohexstr(TraceTo) 1472 << '\n'); 1473 ++NumInvalidTraces; 1474 } else { 1475 LLVM_DEBUG(dbgs() 1476 << "Out of range trace starting in " 1477 << (TraceBF ? TraceBF->getPrintName() : "None") << " @ " 1478 << Twine::utohexstr( 1479 TraceFrom - (TraceBF ? TraceBF->getAddress() : 0)) 1480 << " and ending in " 1481 << (getBinaryFunctionContainingAddress(TraceTo) 1482 ? getBinaryFunctionContainingAddress(TraceTo) 1483 ->getPrintName() 1484 : "None") 1485 << " @ " 1486 << Twine::utohexstr( 1487 TraceTo - 1488 (getBinaryFunctionContainingAddress(TraceTo) 1489 ? getBinaryFunctionContainingAddress(TraceTo) 1490 ->getAddress() 1491 : 0)) 1492 << '\n'); 1493 ++NumLongRangeTraces; 1494 } 1495 } 1496 ++NumTraces; 1497 } 1498 NextPC = LBR.From; 1499 1500 uint64_t From = LBR.From; 1501 if (!getBinaryFunctionContainingAddress(From)) 1502 From = 0; 1503 uint64_t To = LBR.To; 1504 if (!getBinaryFunctionContainingAddress(To)) 1505 To = 0; 1506 if (!From && !To) 1507 continue; 1508 BranchInfo &Info = BranchLBRs[Trace(From, To)]; 1509 ++Info.TakenCount; 1510 Info.MispredCount += LBR.Mispred; 1511 } 1512 } 1513 1514 for (const auto &LBR : BranchLBRs) { 1515 const Trace &Trace = LBR.first; 1516 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From)) 1517 BF->setHasProfileAvailable(); 1518 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To)) 1519 BF->setHasProfileAvailable(); 1520 } 1521 1522 auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) { 1523 OS << " ("; 1524 if (OS.has_colors()) { 1525 if (Percent > T2) 1526 OS.changeColor(raw_ostream::RED); 1527 else if (Percent > T1) 1528 OS.changeColor(raw_ostream::YELLOW); 1529 else 1530 OS.changeColor(raw_ostream::GREEN); 1531 } 1532 OS << format("%.1f%%", Percent); 1533 if (OS.has_colors()) 1534 OS.resetColor(); 1535 OS << ")"; 1536 }; 1537 1538 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries 1539 << " LBR entries\n"; 1540 if (NumTotalSamples) { 1541 if (NumSamples && NumSamplesNoLBR == NumSamples) { 1542 // Note: we don't know if perf2bolt is being used to parse memory samples 1543 // at this point. In this case, it is OK to parse zero LBRs. 1544 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack " 1545 "LBR. Record profile with perf record -j any or run perf2bolt " 1546 "in no-LBR mode with -nl (the performance improvement in -nl " 1547 "mode may be limited)\n"; 1548 } else { 1549 const uint64_t IgnoredSamples = NumTotalSamples - NumSamples; 1550 const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples; 1551 outs() << "PERF2BOLT: " << IgnoredSamples << " samples"; 1552 printColored(outs(), PercentIgnored, 20, 50); 1553 outs() << " were ignored\n"; 1554 if (PercentIgnored > 50.0f) 1555 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples " 1556 "were attributed to the input binary\n"; 1557 } 1558 } 1559 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1560 << NumInvalidTraces; 1561 float Perc = 0.0f; 1562 if (NumTraces > 0) { 1563 Perc = NumInvalidTraces * 100.0f / NumTraces; 1564 printColored(outs(), Perc, 5, 10); 1565 } 1566 outs() << "\n"; 1567 if (Perc > 10.0f) 1568 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1569 "binary is probably not the same binary used during profiling " 1570 "collection. The generated data may be ineffective for improving " 1571 "performance.\n\n"; 1572 1573 outs() << "PERF2BOLT: out of range traces involving unknown regions: " 1574 << NumLongRangeTraces; 1575 if (NumTraces > 0) 1576 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1577 outs() << "\n"; 1578 1579 if (NumColdSamples > 0) { 1580 const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples; 1581 outs() << "PERF2BOLT: " << NumColdSamples 1582 << format(" (%.1f%%)", ColdSamples) 1583 << " samples recorded in cold regions of split functions.\n"; 1584 if (ColdSamples > 5.0f) 1585 outs() 1586 << "WARNING: The BOLT-processed binary where samples were collected " 1587 "likely used bad data or your service observed a large shift in " 1588 "profile. You may want to audit this.\n"; 1589 } 1590 1591 return std::error_code(); 1592 } 1593 1594 void DataAggregator::processBranchEvents() { 1595 outs() << "PERF2BOLT: processing branch events...\n"; 1596 NamedRegionTimer T("processBranch", "Processing branch events", 1597 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1598 1599 for (const auto &AggrLBR : FallthroughLBRs) { 1600 const Trace &Loc = AggrLBR.first; 1601 const FTInfo &Info = AggrLBR.second; 1602 LBREntry First{Loc.From, Loc.From, false}; 1603 LBREntry Second{Loc.To, Loc.To, false}; 1604 if (Info.InternCount) 1605 doTrace(First, Second, Info.InternCount); 1606 if (Info.ExternCount) { 1607 First.From = 0; 1608 doTrace(First, Second, Info.ExternCount); 1609 } 1610 } 1611 1612 for (const auto &AggrLBR : BranchLBRs) { 1613 const Trace &Loc = AggrLBR.first; 1614 const BranchInfo &Info = AggrLBR.second; 1615 doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount); 1616 } 1617 } 1618 1619 std::error_code DataAggregator::parseBasicEvents() { 1620 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n"; 1621 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName, 1622 TimerGroupDesc, opts::TimeAggregator); 1623 while (hasData()) { 1624 ErrorOr<PerfBasicSample> Sample = parseBasicSample(); 1625 if (std::error_code EC = Sample.getError()) 1626 return EC; 1627 1628 if (!Sample->PC) 1629 continue; 1630 1631 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1632 BF->setHasProfileAvailable(); 1633 1634 ++BasicSamples[Sample->PC]; 1635 EventNames.insert(Sample->EventName); 1636 } 1637 1638 return std::error_code(); 1639 } 1640 1641 void DataAggregator::processBasicEvents() { 1642 outs() << "PERF2BOLT: processing basic events (without LBR)...\n"; 1643 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName, 1644 TimerGroupDesc, opts::TimeAggregator); 1645 uint64_t OutOfRangeSamples = 0; 1646 uint64_t NumSamples = 0; 1647 for (auto &Sample : BasicSamples) { 1648 const uint64_t PC = Sample.first; 1649 const uint64_t HitCount = Sample.second; 1650 NumSamples += HitCount; 1651 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1652 if (!Func) { 1653 OutOfRangeSamples += HitCount; 1654 continue; 1655 } 1656 1657 doSample(*Func, PC, HitCount); 1658 } 1659 outs() << "PERF2BOLT: read " << NumSamples << " samples\n"; 1660 1661 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: " 1662 << OutOfRangeSamples; 1663 float Perc = 0.0f; 1664 if (NumSamples > 0) { 1665 outs() << " ("; 1666 Perc = OutOfRangeSamples * 100.0f / NumSamples; 1667 if (outs().has_colors()) { 1668 if (Perc > 60.0f) 1669 outs().changeColor(raw_ostream::RED); 1670 else if (Perc > 40.0f) 1671 outs().changeColor(raw_ostream::YELLOW); 1672 else 1673 outs().changeColor(raw_ostream::GREEN); 1674 } 1675 outs() << format("%.1f%%", Perc); 1676 if (outs().has_colors()) 1677 outs().resetColor(); 1678 outs() << ")"; 1679 } 1680 outs() << "\n"; 1681 if (Perc > 80.0f) 1682 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1683 "binary is probably not the same binary used during profiling " 1684 "collection. The generated data may be ineffective for improving " 1685 "performance.\n\n"; 1686 } 1687 1688 std::error_code DataAggregator::parseMemEvents() { 1689 outs() << "PERF2BOLT: parsing memory events...\n"; 1690 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName, 1691 TimerGroupDesc, opts::TimeAggregator); 1692 while (hasData()) { 1693 ErrorOr<PerfMemSample> Sample = parseMemSample(); 1694 if (std::error_code EC = Sample.getError()) 1695 return EC; 1696 1697 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1698 BF->setHasProfileAvailable(); 1699 1700 MemSamples.emplace_back(std::move(Sample.get())); 1701 } 1702 1703 return std::error_code(); 1704 } 1705 1706 void DataAggregator::processMemEvents() { 1707 NamedRegionTimer T("ProcessMemEvents", "Processing mem events", 1708 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1709 for (const PerfMemSample &Sample : MemSamples) { 1710 uint64_t PC = Sample.PC; 1711 uint64_t Addr = Sample.Addr; 1712 StringRef FuncName; 1713 StringRef MemName; 1714 1715 // Try to resolve symbol for PC 1716 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1717 if (!Func) { 1718 LLVM_DEBUG(if (PC != 0) { 1719 dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x" 1720 << Twine::utohexstr(Addr) << "\n"; 1721 }); 1722 continue; 1723 } 1724 1725 FuncName = Func->getOneName(); 1726 PC -= Func->getAddress(); 1727 1728 // Try to resolve symbol for memory load 1729 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) { 1730 MemName = BD->getName(); 1731 Addr -= BD->getAddress(); 1732 } else if (opts::FilterMemProfile) { 1733 // Filter out heap/stack accesses 1734 continue; 1735 } 1736 1737 const Location FuncLoc(!FuncName.empty(), FuncName, PC); 1738 const Location AddrLoc(!MemName.empty(), MemName, Addr); 1739 1740 FuncMemData *MemData = &NamesToMemEvents[FuncName]; 1741 setMemData(*Func, MemData); 1742 MemData->update(FuncLoc, AddrLoc); 1743 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n"); 1744 } 1745 } 1746 1747 std::error_code DataAggregator::parsePreAggregatedLBRSamples() { 1748 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; 1749 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", 1750 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1751 while (hasData()) { 1752 ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry(); 1753 if (std::error_code EC = AggrEntry.getError()) 1754 return EC; 1755 1756 if (BinaryFunction *BF = 1757 getBinaryFunctionContainingAddress(AggrEntry->From.Offset)) 1758 BF->setHasProfileAvailable(); 1759 if (BinaryFunction *BF = 1760 getBinaryFunctionContainingAddress(AggrEntry->To.Offset)) 1761 BF->setHasProfileAvailable(); 1762 1763 AggregatedLBRs.emplace_back(std::move(AggrEntry.get())); 1764 } 1765 1766 return std::error_code(); 1767 } 1768 1769 void DataAggregator::processPreAggregated() { 1770 outs() << "PERF2BOLT: processing pre-aggregated profile...\n"; 1771 NamedRegionTimer T("processAggregated", "Processing aggregated branch events", 1772 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1773 1774 uint64_t NumTraces = 0; 1775 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { 1776 switch (AggrEntry.EntryType) { 1777 case AggregatedLBREntry::BRANCH: 1778 doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, 1779 AggrEntry.Mispreds); 1780 break; 1781 case AggregatedLBREntry::FT: 1782 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { 1783 LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT 1784 ? AggrEntry.From.Offset 1785 : 0, 1786 AggrEntry.From.Offset, false}; 1787 LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false}; 1788 doTrace(First, Second, AggrEntry.Count); 1789 NumTraces += AggrEntry.Count; 1790 break; 1791 } 1792 } 1793 } 1794 1795 outs() << "PERF2BOLT: read " << AggregatedLBRs.size() 1796 << " aggregated LBR entries\n"; 1797 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1798 << NumInvalidTraces; 1799 float Perc = 0.0f; 1800 if (NumTraces > 0) { 1801 outs() << " ("; 1802 Perc = NumInvalidTraces * 100.0f / NumTraces; 1803 if (outs().has_colors()) { 1804 if (Perc > 10.0f) 1805 outs().changeColor(raw_ostream::RED); 1806 else if (Perc > 5.0f) 1807 outs().changeColor(raw_ostream::YELLOW); 1808 else 1809 outs().changeColor(raw_ostream::GREEN); 1810 } 1811 outs() << format("%.1f%%", Perc); 1812 if (outs().has_colors()) 1813 outs().resetColor(); 1814 outs() << ")"; 1815 } 1816 outs() << "\n"; 1817 if (Perc > 10.0f) 1818 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1819 "binary is probably not the same binary used during profiling " 1820 "collection. The generated data may be ineffective for improving " 1821 "performance.\n\n"; 1822 1823 outs() << "PERF2BOLT: Out of range traces involving unknown regions: " 1824 << NumLongRangeTraces; 1825 if (NumTraces > 0) 1826 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1827 outs() << "\n"; 1828 } 1829 1830 Optional<int32_t> DataAggregator::parseCommExecEvent() { 1831 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1832 if (LineEnd == StringRef::npos) { 1833 reportError("expected rest of line"); 1834 Diag << "Found: " << ParsingBuf << "\n"; 1835 return NoneType(); 1836 } 1837 StringRef Line = ParsingBuf.substr(0, LineEnd); 1838 1839 size_t Pos = Line.find("PERF_RECORD_COMM exec"); 1840 if (Pos == StringRef::npos) 1841 return NoneType(); 1842 Line = Line.drop_front(Pos); 1843 1844 // Line: 1845 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>" 1846 StringRef PIDStr = Line.rsplit(':').second.split('/').first; 1847 int32_t PID; 1848 if (PIDStr.getAsInteger(10, PID)) { 1849 reportError("expected PID"); 1850 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1851 return NoneType(); 1852 } 1853 1854 return PID; 1855 } 1856 1857 namespace { 1858 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) { 1859 const StringRef SecTimeStr = TimeStr.split('.').first; 1860 const StringRef USecTimeStr = TimeStr.split('.').second; 1861 uint64_t SecTime; 1862 uint64_t USecTime; 1863 if (SecTimeStr.getAsInteger(10, SecTime) || 1864 USecTimeStr.getAsInteger(10, USecTime)) 1865 return NoneType(); 1866 return SecTime * 1000000ULL + USecTime; 1867 } 1868 } 1869 1870 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() { 1871 while (checkAndConsumeFS()) { 1872 } 1873 1874 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1875 if (LineEnd == StringRef::npos) { 1876 reportError("expected rest of line"); 1877 Diag << "Found: " << ParsingBuf << "\n"; 1878 return NoneType(); 1879 } 1880 StringRef Line = ParsingBuf.substr(0, LineEnd); 1881 1882 size_t Pos = Line.find("PERF_RECORD_FORK"); 1883 if (Pos == StringRef::npos) { 1884 consumeRestOfLine(); 1885 return NoneType(); 1886 } 1887 1888 ForkInfo FI; 1889 1890 const StringRef TimeStr = 1891 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1892 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) { 1893 FI.Time = *TimeRes; 1894 } 1895 1896 Line = Line.drop_front(Pos); 1897 1898 // Line: 1899 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>) 1900 const StringRef ChildPIDStr = Line.split('(').second.split(':').first; 1901 if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) { 1902 reportError("expected PID"); 1903 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n"; 1904 return NoneType(); 1905 } 1906 1907 const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first; 1908 if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) { 1909 reportError("expected PID"); 1910 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n"; 1911 return NoneType(); 1912 } 1913 1914 consumeRestOfLine(); 1915 1916 return FI; 1917 } 1918 1919 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>> 1920 DataAggregator::parseMMapEvent() { 1921 while (checkAndConsumeFS()) { 1922 } 1923 1924 MMapInfo ParsedInfo; 1925 1926 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1927 if (LineEnd == StringRef::npos) { 1928 reportError("expected rest of line"); 1929 Diag << "Found: " << ParsingBuf << "\n"; 1930 return make_error_code(llvm::errc::io_error); 1931 } 1932 StringRef Line = ParsingBuf.substr(0, LineEnd); 1933 1934 size_t Pos = Line.find("PERF_RECORD_MMAP2"); 1935 if (Pos == StringRef::npos) { 1936 consumeRestOfLine(); 1937 return std::make_pair(StringRef(), ParsedInfo); 1938 } 1939 1940 // Line: 1941 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name> 1942 1943 const StringRef TimeStr = 1944 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1945 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) 1946 ParsedInfo.Time = *TimeRes; 1947 1948 Line = Line.drop_front(Pos); 1949 1950 // Line: 1951 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name> 1952 1953 StringRef FileName = Line.rsplit(FieldSeparator).second; 1954 if (FileName.startswith("//") || FileName.startswith("[")) { 1955 consumeRestOfLine(); 1956 return std::make_pair(StringRef(), ParsedInfo); 1957 } 1958 FileName = sys::path::filename(FileName); 1959 1960 const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first; 1961 if (PIDStr.getAsInteger(10, ParsedInfo.PID)) { 1962 reportError("expected PID"); 1963 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1964 return make_error_code(llvm::errc::io_error); 1965 } 1966 1967 const StringRef BaseAddressStr = Line.split('[').second.split('(').first; 1968 if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) { 1969 reportError("expected base address"); 1970 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n"; 1971 return make_error_code(llvm::errc::io_error); 1972 } 1973 1974 const StringRef SizeStr = Line.split('(').second.split(')').first; 1975 if (SizeStr.getAsInteger(0, ParsedInfo.Size)) { 1976 reportError("expected mmaped size"); 1977 Diag << "Found: " << SizeStr << "in '" << Line << "'\n"; 1978 return make_error_code(llvm::errc::io_error); 1979 } 1980 1981 const StringRef OffsetStr = 1982 Line.split('@').second.ltrim().split(FieldSeparator).first; 1983 if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) { 1984 reportError("expected mmaped page-aligned offset"); 1985 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n"; 1986 return make_error_code(llvm::errc::io_error); 1987 } 1988 1989 consumeRestOfLine(); 1990 1991 return std::make_pair(FileName, ParsedInfo); 1992 } 1993 1994 std::error_code DataAggregator::parseMMapEvents() { 1995 outs() << "PERF2BOLT: parsing perf-script mmap events output\n"; 1996 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName, 1997 TimerGroupDesc, opts::TimeAggregator); 1998 1999 std::multimap<StringRef, MMapInfo> GlobalMMapInfo; 2000 while (hasData()) { 2001 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent(); 2002 if (std::error_code EC = FileMMapInfoRes.getError()) 2003 return EC; 2004 2005 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get(); 2006 if (FileMMapInfo.second.PID == -1) 2007 continue; 2008 2009 // Consider only the first mapping of the file for any given PID 2010 bool PIDExists = false; 2011 auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first); 2012 for (auto MI = Range.first; MI != Range.second; ++MI) { 2013 if (MI->second.PID == FileMMapInfo.second.PID) { 2014 PIDExists = true; 2015 break; 2016 } 2017 } 2018 if (PIDExists) 2019 continue; 2020 2021 GlobalMMapInfo.insert(FileMMapInfo); 2022 } 2023 2024 LLVM_DEBUG({ 2025 dbgs() << "FileName -> mmap info:\n"; 2026 for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo) 2027 dbgs() << " " << Pair.first << " : " << Pair.second.PID << " [0x" 2028 << Twine::utohexstr(Pair.second.MMapAddress) << ", " 2029 << Twine::utohexstr(Pair.second.Size) << " @ " 2030 << Twine::utohexstr(Pair.second.Offset) << "]\n"; 2031 }); 2032 2033 StringRef NameToUse = llvm::sys::path::filename(BC->getFilename()); 2034 if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) { 2035 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName 2036 << "\" for profile matching\n"; 2037 NameToUse = BuildIDBinaryName; 2038 } 2039 2040 auto Range = GlobalMMapInfo.equal_range(NameToUse); 2041 for (auto I = Range.first; I != Range.second; ++I) { 2042 MMapInfo &MMapInfo = I->second; 2043 if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) { 2044 // Check that the binary mapping matches one of the segments. 2045 bool MatchFound = false; 2046 for (auto &KV : BC->SegmentMapInfo) { 2047 SegmentInfo &SegInfo = KV.second; 2048 // The mapping is page-aligned and hence the MMapAddress could be 2049 // different from the segment start address. We cannot know the page 2050 // size of the mapping, but we know it should not exceed the segment 2051 // alignment value. Hence we are performing an approximate check. 2052 if (SegInfo.Address >= MMapInfo.MMapAddress && 2053 SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) { 2054 MatchFound = true; 2055 break; 2056 } 2057 } 2058 if (!MatchFound) { 2059 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse 2060 << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n'; 2061 continue; 2062 } 2063 } 2064 2065 // Set base address for shared objects. 2066 if (!BC->HasFixedLoadAddress) { 2067 Optional<uint64_t> BaseAddress = 2068 BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset); 2069 if (!BaseAddress) { 2070 errs() << "PERF2BOLT-WARNING: unable to find base address of the " 2071 "binary when memory mapped at 0x" 2072 << Twine::utohexstr(MMapInfo.MMapAddress) 2073 << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset) 2074 << ". Ignoring profile data for this mapping\n"; 2075 continue; 2076 } else { 2077 MMapInfo.BaseAddress = *BaseAddress; 2078 } 2079 } 2080 2081 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); 2082 } 2083 2084 if (BinaryMMapInfo.empty()) { 2085 if (errs().has_colors()) 2086 errs().changeColor(raw_ostream::RED); 2087 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \"" 2088 << BC->getFilename() << "\"."; 2089 if (!GlobalMMapInfo.empty()) { 2090 errs() << " Profile for the following binary name(s) is available:\n"; 2091 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE; 2092 I = GlobalMMapInfo.upper_bound(I->first)) 2093 errs() << " " << I->first << '\n'; 2094 errs() << "Please rename the input binary.\n"; 2095 } else { 2096 errs() << " Failed to extract any binary name from a profile.\n"; 2097 } 2098 if (errs().has_colors()) 2099 errs().resetColor(); 2100 2101 exit(1); 2102 } 2103 2104 return std::error_code(); 2105 } 2106 2107 std::error_code DataAggregator::parseTaskEvents() { 2108 outs() << "PERF2BOLT: parsing perf-script task events output\n"; 2109 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName, 2110 TimerGroupDesc, opts::TimeAggregator); 2111 2112 while (hasData()) { 2113 if (Optional<int32_t> CommInfo = parseCommExecEvent()) { 2114 // Remove forked child that ran execve 2115 auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo); 2116 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked) 2117 BinaryMMapInfo.erase(MMapInfoIter); 2118 consumeRestOfLine(); 2119 continue; 2120 } 2121 2122 Optional<ForkInfo> ForkInfo = parseForkEvent(); 2123 if (!ForkInfo) 2124 continue; 2125 2126 if (ForkInfo->ParentPID == ForkInfo->ChildPID) 2127 continue; 2128 2129 if (ForkInfo->Time == 0) { 2130 // Process was forked and mmaped before perf ran. In this case the child 2131 // should have its own mmap entry unless it was execve'd. 2132 continue; 2133 } 2134 2135 auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID); 2136 if (MMapInfoIter == BinaryMMapInfo.end()) 2137 continue; 2138 2139 MMapInfo MMapInfo = MMapInfoIter->second; 2140 MMapInfo.PID = ForkInfo->ChildPID; 2141 MMapInfo.Forked = true; 2142 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); 2143 } 2144 2145 outs() << "PERF2BOLT: input binary is associated with " 2146 << BinaryMMapInfo.size() << " PID(s)\n"; 2147 2148 LLVM_DEBUG({ 2149 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) 2150 outs() << " " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "") 2151 << ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x" 2152 << Twine::utohexstr(MMI.second.Size) << ")\n"; 2153 }); 2154 2155 return std::error_code(); 2156 } 2157 2158 Optional<std::pair<StringRef, StringRef>> 2159 DataAggregator::parseNameBuildIDPair() { 2160 while (checkAndConsumeFS()) { 2161 } 2162 2163 ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true); 2164 if (std::error_code EC = BuildIDStr.getError()) 2165 return NoneType(); 2166 2167 ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true); 2168 if (std::error_code EC = NameStr.getError()) 2169 return NoneType(); 2170 2171 consumeRestOfLine(); 2172 return std::make_pair(NameStr.get(), BuildIDStr.get()); 2173 } 2174 2175 Optional<StringRef> 2176 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) { 2177 while (hasData()) { 2178 Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair(); 2179 if (!IDPair) 2180 return NoneType(); 2181 2182 if (IDPair->second.startswith(FileBuildID)) 2183 return sys::path::filename(IDPair->first); 2184 } 2185 return NoneType(); 2186 } 2187 2188 std::error_code 2189 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { 2190 std::error_code EC; 2191 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 2192 if (EC) 2193 return EC; 2194 2195 bool WriteMemLocs = false; 2196 2197 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) { 2198 if (WriteMemLocs) 2199 OutFile << (Loc.IsSymbol ? "4 " : "3 "); 2200 else 2201 OutFile << (Loc.IsSymbol ? "1 " : "0 "); 2202 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name)) 2203 << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator; 2204 }; 2205 2206 uint64_t BranchValues = 0; 2207 uint64_t MemValues = 0; 2208 2209 if (BAT) 2210 OutFile << "boltedcollection\n"; 2211 if (opts::BasicAggregation) { 2212 OutFile << "no_lbr"; 2213 for (const StringMapEntry<NoneType> &Entry : EventNames) 2214 OutFile << " " << Entry.getKey(); 2215 OutFile << "\n"; 2216 2217 for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) { 2218 for (const SampleInfo &SI : Func.getValue().Data) { 2219 writeLocation(SI.Loc); 2220 OutFile << SI.Hits << "\n"; 2221 ++BranchValues; 2222 } 2223 } 2224 } else { 2225 for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) { 2226 for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) { 2227 writeLocation(BI.From); 2228 writeLocation(BI.To); 2229 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2230 ++BranchValues; 2231 } 2232 for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) { 2233 // Do not output if source is a known symbol, since this was already 2234 // accounted for in the source function 2235 if (BI.From.IsSymbol) 2236 continue; 2237 writeLocation(BI.From); 2238 writeLocation(BI.To); 2239 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2240 ++BranchValues; 2241 } 2242 } 2243 2244 WriteMemLocs = true; 2245 for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) { 2246 for (const MemInfo &MemEvent : Func.getValue().Data) { 2247 writeLocation(MemEvent.Offset); 2248 writeLocation(MemEvent.Addr); 2249 OutFile << MemEvent.Count << "\n"; 2250 ++MemValues; 2251 } 2252 } 2253 } 2254 2255 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues 2256 << " memory objects to " << OutputFilename << "\n"; 2257 2258 return std::error_code(); 2259 } 2260 2261 void DataAggregator::dump() const { DataReader::dump(); } 2262 2263 void DataAggregator::dump(const LBREntry &LBR) const { 2264 Diag << "From: " << Twine::utohexstr(LBR.From) 2265 << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred 2266 << "\n"; 2267 } 2268 2269 void DataAggregator::dump(const PerfBranchSample &Sample) const { 2270 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n"; 2271 for (const LBREntry &LBR : Sample.LBR) 2272 dump(LBR); 2273 } 2274 2275 void DataAggregator::dump(const PerfMemSample &Sample) const { 2276 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n"; 2277 } 2278