1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions reads profile data written by perf record, 10 // aggregate it and then write it back to an output file. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "bolt/Profile/DataAggregator.h" 15 #include "bolt/Core/BinaryContext.h" 16 #include "bolt/Core/BinaryFunction.h" 17 #include "bolt/Profile/BoltAddressTranslation.h" 18 #include "bolt/Profile/Heatmap.h" 19 #include "bolt/Utils/CommandLineOpts.h" 20 #include "bolt/Utils/Utils.h" 21 #include "llvm/ADT/ScopeExit.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/Errc.h" 25 #include "llvm/Support/FileSystem.h" 26 #include "llvm/Support/Process.h" 27 #include "llvm/Support/Program.h" 28 #include "llvm/Support/Regex.h" 29 #include "llvm/Support/Timer.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include <map> 32 #include <unordered_map> 33 #include <utility> 34 35 #define DEBUG_TYPE "aggregator" 36 37 using namespace llvm; 38 using namespace bolt; 39 40 namespace opts { 41 42 static cl::opt<bool> 43 BasicAggregation("nl", 44 cl::desc("aggregate basic samples (without LBR info)"), 45 cl::cat(AggregatorCategory)); 46 47 static cl::opt<bool> 48 FilterMemProfile("filter-mem-profile", 49 cl::desc("if processing a memory profile, filter out stack or heap accesses " 50 "that won't be useful for BOLT to reduce profile file size"), 51 cl::init(true), 52 cl::cat(AggregatorCategory)); 53 54 static cl::opt<unsigned long long> 55 FilterPID("pid", 56 cl::desc("only use samples from process with specified PID"), 57 cl::init(0), 58 cl::Optional, 59 cl::cat(AggregatorCategory)); 60 61 static cl::opt<bool> 62 IgnoreBuildID("ignore-build-id", 63 cl::desc("continue even if build-ids in input binary and perf.data mismatch"), 64 cl::init(false), 65 cl::cat(AggregatorCategory)); 66 67 static cl::opt<bool> IgnoreInterruptLBR( 68 "ignore-interrupt-lbr", 69 cl::desc("ignore kernel interrupt LBR that happens asynchronously"), 70 cl::init(true), cl::cat(AggregatorCategory)); 71 72 static cl::opt<unsigned long long> 73 MaxSamples("max-samples", 74 cl::init(-1ULL), 75 cl::desc("maximum number of samples to read from LBR profile"), 76 cl::Optional, 77 cl::Hidden, 78 cl::cat(AggregatorCategory)); 79 80 cl::opt<bool> ReadPreAggregated( 81 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"), 82 cl::cat(AggregatorCategory)); 83 84 static cl::opt<bool> 85 TimeAggregator("time-aggr", 86 cl::desc("time BOLT aggregator"), 87 cl::init(false), 88 cl::ZeroOrMore, 89 cl::cat(AggregatorCategory)); 90 91 static cl::opt<bool> 92 UseEventPC("use-event-pc", 93 cl::desc("use event PC in combination with LBR sampling"), 94 cl::cat(AggregatorCategory)); 95 96 static cl::opt<bool> WriteAutoFDOData( 97 "autofdo", cl::desc("generate autofdo textual data instead of bolt data"), 98 cl::cat(AggregatorCategory)); 99 100 } // namespace opts 101 102 namespace { 103 104 const char TimerGroupName[] = "aggregator"; 105 const char TimerGroupDesc[] = "Aggregator"; 106 107 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) { 108 std::vector<SectionNameAndRange> sections; 109 for (BinarySection &Section : BC->sections()) { 110 if (!Section.isText()) 111 continue; 112 if (Section.getSize() == 0) 113 continue; 114 sections.push_back( 115 {Section.getName(), Section.getAddress(), Section.getEndAddress()}); 116 } 117 llvm::sort(sections, 118 [](const SectionNameAndRange &A, const SectionNameAndRange &B) { 119 return A.BeginAddress < B.BeginAddress; 120 }); 121 return sections; 122 } 123 } 124 125 constexpr uint64_t DataAggregator::KernelBaseAddr; 126 127 DataAggregator::~DataAggregator() { deleteTempFiles(); } 128 129 namespace { 130 void deleteTempFile(const std::string &FileName) { 131 if (std::error_code Errc = sys::fs::remove(FileName.c_str())) 132 errs() << "PERF2BOLT: failed to delete temporary file " << FileName 133 << " with error " << Errc.message() << "\n"; 134 } 135 } 136 137 void DataAggregator::deleteTempFiles() { 138 for (std::string &FileName : TempFiles) 139 deleteTempFile(FileName); 140 TempFiles.clear(); 141 } 142 143 void DataAggregator::findPerfExecutable() { 144 Optional<std::string> PerfExecutable = 145 sys::Process::FindInEnvPath("PATH", "perf"); 146 if (!PerfExecutable) { 147 outs() << "PERF2BOLT: No perf executable found!\n"; 148 exit(1); 149 } 150 PerfPath = *PerfExecutable; 151 } 152 153 void DataAggregator::start() { 154 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; 155 156 // Don't launch perf for pre-aggregated files 157 if (opts::ReadPreAggregated) 158 return; 159 160 findPerfExecutable(); 161 162 if (opts::BasicAggregation) 163 launchPerfProcess("events without LBR", 164 MainEventsPPI, 165 "script -F pid,event,ip", 166 /*Wait = */false); 167 else 168 launchPerfProcess("branch events", 169 MainEventsPPI, 170 "script -F pid,ip,brstack", 171 /*Wait = */false); 172 173 // Note: we launch script for mem events regardless of the option, as the 174 // command fails fairly fast if mem events were not collected. 175 launchPerfProcess("mem events", 176 MemEventsPPI, 177 "script -F pid,event,addr,ip", 178 /*Wait = */false); 179 180 launchPerfProcess("process events", 181 MMapEventsPPI, 182 "script --show-mmap-events", 183 /*Wait = */false); 184 185 launchPerfProcess("task events", 186 TaskEventsPPI, 187 "script --show-task-events", 188 /*Wait = */false); 189 } 190 191 void DataAggregator::abort() { 192 if (opts::ReadPreAggregated) 193 return; 194 195 std::string Error; 196 197 // Kill subprocesses in case they are not finished 198 sys::Wait(TaskEventsPPI.PI, 1, false, &Error); 199 sys::Wait(MMapEventsPPI.PI, 1, false, &Error); 200 sys::Wait(MainEventsPPI.PI, 1, false, &Error); 201 sys::Wait(MemEventsPPI.PI, 1, false, &Error); 202 203 deleteTempFiles(); 204 205 exit(1); 206 } 207 208 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, 209 const char *ArgsString, bool Wait) { 210 SmallVector<StringRef, 4> Argv; 211 212 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n'; 213 Argv.push_back(PerfPath.data()); 214 215 char *WritableArgsString = strdup(ArgsString); 216 char *Str = WritableArgsString; 217 do { 218 Argv.push_back(Str); 219 while (*Str && *Str != ' ') 220 ++Str; 221 if (!*Str) 222 break; 223 *Str++ = 0; 224 } while (true); 225 226 Argv.push_back("-f"); 227 Argv.push_back("-i"); 228 Argv.push_back(Filename.c_str()); 229 230 if (std::error_code Errc = 231 sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) { 232 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath 233 << " with error " << Errc.message() << "\n"; 234 exit(1); 235 } 236 TempFiles.push_back(PPI.StdoutPath.data()); 237 238 if (std::error_code Errc = 239 sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) { 240 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath 241 << " with error " << Errc.message() << "\n"; 242 exit(1); 243 } 244 TempFiles.push_back(PPI.StderrPath.data()); 245 246 Optional<StringRef> Redirects[] = { 247 llvm::None, // Stdin 248 StringRef(PPI.StdoutPath.data()), // Stdout 249 StringRef(PPI.StderrPath.data())}; // Stderr 250 251 LLVM_DEBUG({ 252 dbgs() << "Launching perf: "; 253 for (StringRef Arg : Argv) 254 dbgs() << Arg << " "; 255 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data() 256 << "\n"; 257 }); 258 259 if (Wait) 260 PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv, 261 /*envp*/ llvm::None, Redirects); 262 else 263 PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None, 264 Redirects); 265 266 free(WritableArgsString); 267 } 268 269 void DataAggregator::processFileBuildID(StringRef FileBuildID) { 270 PerfProcessInfo BuildIDProcessInfo; 271 launchPerfProcess("buildid list", 272 BuildIDProcessInfo, 273 "buildid-list", 274 /*Wait = */true); 275 276 if (BuildIDProcessInfo.PI.ReturnCode != 0) { 277 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 278 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data()); 279 StringRef ErrBuf = (*MB)->getBuffer(); 280 281 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode 282 << '\n'; 283 errs() << ErrBuf; 284 return; 285 } 286 287 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 288 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data()); 289 if (std::error_code EC = MB.getError()) { 290 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": " 291 << EC.message() << "\n"; 292 return; 293 } 294 295 FileBuf = std::move(*MB); 296 ParsingBuf = FileBuf->getBuffer(); 297 298 Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID); 299 if (!FileName) { 300 if (hasAllBuildIDs()) { 301 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. " 302 "This indicates the input binary supplied for data aggregation " 303 "is not the same recorded by perf when collecting profiling " 304 "data, or there were no samples recorded for the binary. " 305 "Use -ignore-build-id option to override.\n"; 306 if (!opts::IgnoreBuildID) 307 abort(); 308 } else { 309 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf " 310 "data was recorded without it\n"; 311 return; 312 } 313 } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) { 314 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n"; 315 BuildIDBinaryName = std::string(*FileName); 316 } else { 317 outs() << "PERF2BOLT: matched build-id and file name\n"; 318 } 319 320 return; 321 } 322 323 bool DataAggregator::checkPerfDataMagic(StringRef FileName) { 324 if (opts::ReadPreAggregated) 325 return true; 326 327 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName); 328 if (!FD) 329 return false; 330 331 char Buf[7] = {0, 0, 0, 0, 0, 0, 0}; 332 333 auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); }); 334 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice( 335 *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0); 336 if (!BytesRead || *BytesRead != 7) 337 return false; 338 339 if (strncmp(Buf, "PERFILE", 7) == 0) 340 return true; 341 return false; 342 } 343 344 void DataAggregator::parsePreAggregated() { 345 std::string Error; 346 347 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 348 MemoryBuffer::getFileOrSTDIN(Filename); 349 if (std::error_code EC = MB.getError()) { 350 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": " 351 << EC.message() << "\n"; 352 exit(1); 353 } 354 355 FileBuf = std::move(*MB); 356 ParsingBuf = FileBuf->getBuffer(); 357 Col = 0; 358 Line = 1; 359 if (parsePreAggregatedLBRSamples()) { 360 errs() << "PERF2BOLT: failed to parse samples\n"; 361 exit(1); 362 } 363 } 364 365 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) { 366 outs() << "PERF2BOLT: writing data for autofdo tools...\n"; 367 NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName, 368 TimerGroupDesc, opts::TimeAggregator); 369 370 std::error_code EC; 371 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 372 if (EC) 373 return EC; 374 375 // Format: 376 // number of unique traces 377 // from_1-to_1:count_1 378 // from_2-to_2:count_2 379 // ...... 380 // from_n-to_n:count_n 381 // number of unique sample addresses 382 // addr_1:count_1 383 // addr_2:count_2 384 // ...... 385 // addr_n:count_n 386 // number of unique LBR entries 387 // src_1->dst_1:count_1 388 // src_2->dst_2:count_2 389 // ...... 390 // src_n->dst_n:count_n 391 392 const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress; 393 394 // AutoFDO addresses are relative to the first allocated loadable program 395 // segment 396 auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t { 397 if (Address < FirstAllocAddress) 398 return 0; 399 return Address - FirstAllocAddress; 400 }; 401 402 OutFile << FallthroughLBRs.size() << "\n"; 403 for (const auto &AggrLBR : FallthroughLBRs) { 404 const Trace &Trace = AggrLBR.first; 405 const FTInfo &Info = AggrLBR.second; 406 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-" 407 << Twine::utohexstr(filterAddress(Trace.To)) << ":" 408 << (Info.InternCount + Info.ExternCount) << "\n"; 409 } 410 411 OutFile << BasicSamples.size() << "\n"; 412 for (const auto &Sample : BasicSamples) { 413 uint64_t PC = Sample.first; 414 uint64_t HitCount = Sample.second; 415 OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n"; 416 } 417 418 OutFile << BranchLBRs.size() << "\n"; 419 for (const auto &AggrLBR : BranchLBRs) { 420 const Trace &Trace = AggrLBR.first; 421 const BranchInfo &Info = AggrLBR.second; 422 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->" 423 << Twine::utohexstr(filterAddress(Trace.To)) << ":" 424 << Info.TakenCount << "\n"; 425 } 426 427 outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, " 428 << BasicSamples.size() << " sample addresses and " << BranchLBRs.size() 429 << " unique branches to " << OutputFilename << "\n"; 430 431 return std::error_code(); 432 } 433 434 void DataAggregator::filterBinaryMMapInfo() { 435 if (opts::FilterPID) { 436 auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID); 437 if (MMapInfoIter != BinaryMMapInfo.end()) { 438 MMapInfo MMap = MMapInfoIter->second; 439 BinaryMMapInfo.clear(); 440 BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap)); 441 } else { 442 if (errs().has_colors()) 443 errs().changeColor(raw_ostream::RED); 444 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \"" 445 << opts::FilterPID << "\"" 446 << " for binary \"" << BC->getFilename() << "\"."; 447 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary"); 448 errs() << " Profile for the following process is available:\n"; 449 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) 450 outs() << " " << MMI.second.PID 451 << (MMI.second.Forked ? " (forked)\n" : "\n"); 452 453 if (errs().has_colors()) 454 errs().resetColor(); 455 456 exit(1); 457 } 458 } 459 } 460 461 Error DataAggregator::preprocessProfile(BinaryContext &BC) { 462 this->BC = &BC; 463 464 if (opts::ReadPreAggregated) { 465 parsePreAggregated(); 466 return Error::success(); 467 } 468 469 if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) { 470 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; 471 processFileBuildID(*FileBuildID); 472 } else { 473 errs() << "BOLT-WARNING: build-id will not be checked because we could " 474 "not read one from input binary\n"; 475 } 476 477 auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) { 478 std::string Error; 479 outs() << "PERF2BOLT: waiting for perf " << Name 480 << " collection to finish...\n"; 481 sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error); 482 483 if (!Error.empty()) { 484 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n"; 485 deleteTempFiles(); 486 exit(1); 487 } 488 489 if (PI.ReturnCode != 0) { 490 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB = 491 MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data()); 492 StringRef ErrBuf = (*ErrorMB)->getBuffer(); 493 494 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n"; 495 errs() << ErrBuf; 496 deleteTempFiles(); 497 exit(1); 498 } 499 500 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 501 MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data()); 502 if (std::error_code EC = MB.getError()) { 503 errs() << "Cannot open " << Process.StdoutPath.data() << ": " 504 << EC.message() << "\n"; 505 deleteTempFiles(); 506 exit(1); 507 } 508 509 FileBuf = std::move(*MB); 510 ParsingBuf = FileBuf->getBuffer(); 511 Col = 0; 512 Line = 1; 513 }; 514 515 if (opts::LinuxKernelMode) { 516 // Current MMap parsing logic does not work with linux kernel. 517 // MMap entries for linux kernel uses PERF_RECORD_MMAP 518 // format instead of typical PERF_RECORD_MMAP2 format. 519 // Since linux kernel address mapping is absolute (same as 520 // in the ELF file), we avoid parsing MMap in linux kernel mode. 521 // While generating optimized linux kernel binary, we may need 522 // to parse MMap entries. 523 524 // In linux kernel mode, we analyze and optimize 525 // all linux kernel binary instructions, irrespective 526 // of whether they are due to system calls or due to 527 // interrupts. Therefore, we cannot ignore interrupt 528 // in Linux kernel mode. 529 opts::IgnoreInterruptLBR = false; 530 } else { 531 prepareToParse("mmap events", MMapEventsPPI); 532 if (parseMMapEvents()) 533 errs() << "PERF2BOLT: failed to parse mmap events\n"; 534 } 535 536 prepareToParse("task events", TaskEventsPPI); 537 if (parseTaskEvents()) 538 errs() << "PERF2BOLT: failed to parse task events\n"; 539 540 filterBinaryMMapInfo(); 541 prepareToParse("events", MainEventsPPI); 542 543 if (opts::HeatmapMode) { 544 if (std::error_code EC = printLBRHeatMap()) { 545 errs() << "ERROR: failed to print heat map: " << EC.message() << '\n'; 546 exit(1); 547 } 548 exit(0); 549 } 550 551 if ((!opts::BasicAggregation && parseBranchEvents()) || 552 (opts::BasicAggregation && parseBasicEvents())) 553 errs() << "PERF2BOLT: failed to parse samples\n"; 554 555 // We can finish early if the goal is just to generate data for autofdo 556 if (opts::WriteAutoFDOData) { 557 if (std::error_code EC = writeAutoFDOData(opts::OutputFilename)) 558 errs() << "Error writing autofdo data to file: " << EC.message() << "\n"; 559 560 deleteTempFiles(); 561 exit(0); 562 } 563 564 // Special handling for memory events 565 std::string Error; 566 sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error); 567 if (PI.ReturnCode != 0) { 568 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 569 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data()); 570 StringRef ErrBuf = (*MB)->getBuffer(); 571 572 deleteTempFiles(); 573 574 Regex NoData("Samples for '.*' event do not have ADDR attribute set. " 575 "Cannot print 'addr' field."); 576 if (!NoData.match(ErrBuf)) { 577 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n"; 578 errs() << ErrBuf; 579 exit(1); 580 } 581 return Error::success(); 582 } 583 584 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 585 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data()); 586 if (std::error_code EC = MB.getError()) { 587 errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": " 588 << EC.message() << "\n"; 589 deleteTempFiles(); 590 exit(1); 591 } 592 593 FileBuf = std::move(*MB); 594 ParsingBuf = FileBuf->getBuffer(); 595 Col = 0; 596 Line = 1; 597 if (const std::error_code EC = parseMemEvents()) 598 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message() 599 << '\n'; 600 601 deleteTempFiles(); 602 603 return Error::success(); 604 } 605 606 Error DataAggregator::readProfile(BinaryContext &BC) { 607 processProfile(BC); 608 609 for (auto &BFI : BC.getBinaryFunctions()) { 610 BinaryFunction &Function = BFI.second; 611 convertBranchData(Function); 612 } 613 614 if (opts::AggregateOnly) { 615 if (std::error_code EC = writeAggregatedFile(opts::OutputFilename)) 616 report_error("cannot create output data file", EC); 617 } 618 619 return Error::success(); 620 } 621 622 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { 623 return Function.hasProfileAvailable(); 624 } 625 626 void DataAggregator::processProfile(BinaryContext &BC) { 627 if (opts::ReadPreAggregated) 628 processPreAggregated(); 629 else if (opts::BasicAggregation) 630 processBasicEvents(); 631 else 632 processBranchEvents(); 633 634 processMemEvents(); 635 636 // Mark all functions with registered events as having a valid profile. 637 for (auto &BFI : BC.getBinaryFunctions()) { 638 BinaryFunction &BF = BFI.second; 639 if (getBranchData(BF)) { 640 const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE 641 : BinaryFunction::PF_LBR; 642 BF.markProfiled(Flags); 643 } 644 } 645 646 // Release intermediate storage. 647 clear(BranchLBRs); 648 clear(FallthroughLBRs); 649 clear(AggregatedLBRs); 650 clear(BasicSamples); 651 clear(MemSamples); 652 } 653 654 BinaryFunction * 655 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const { 656 if (!BC->containsAddress(Address)) 657 return nullptr; 658 659 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false, 660 /*UseMaxSize=*/true); 661 } 662 663 StringRef DataAggregator::getLocationName(BinaryFunction &Func, 664 uint64_t Count) { 665 if (!BAT) 666 return Func.getOneName(); 667 668 const BinaryFunction *OrigFunc = &Func; 669 if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) { 670 NumColdSamples += Count; 671 BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr); 672 if (HotFunc) 673 OrigFunc = HotFunc; 674 } 675 // If it is a local function, prefer the name containing the file name where 676 // the local function was declared 677 for (StringRef AlternativeName : OrigFunc->getNames()) { 678 size_t FileNameIdx = AlternativeName.find('/'); 679 // Confirm the alternative name has the pattern Symbol/FileName/1 before 680 // using it 681 if (FileNameIdx == StringRef::npos || 682 AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos) 683 continue; 684 return AlternativeName; 685 } 686 return OrigFunc->getOneName(); 687 } 688 689 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address, 690 uint64_t Count) { 691 auto I = NamesToSamples.find(Func.getOneName()); 692 if (I == NamesToSamples.end()) { 693 bool Success; 694 StringRef LocName = getLocationName(Func, Count); 695 std::tie(I, Success) = NamesToSamples.insert( 696 std::make_pair(Func.getOneName(), 697 FuncSampleData(LocName, FuncSampleData::ContainerTy()))); 698 } 699 700 Address -= Func.getAddress(); 701 if (BAT) 702 Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false); 703 704 I->second.bumpCount(Address, Count); 705 return true; 706 } 707 708 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From, 709 uint64_t To, uint64_t Count, 710 uint64_t Mispreds) { 711 FuncBranchData *AggrData = getBranchData(Func); 712 if (!AggrData) { 713 AggrData = &NamesToBranches[Func.getOneName()]; 714 AggrData->Name = getLocationName(Func, Count); 715 setBranchData(Func, AggrData); 716 } 717 718 From -= Func.getAddress(); 719 To -= Func.getAddress(); 720 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName() 721 << " @ " << Twine::utohexstr(From) << " -> " 722 << Func.getPrintName() << " @ " << Twine::utohexstr(To) 723 << '\n'); 724 if (BAT) { 725 From = BAT->translate(Func, From, /*IsBranchSrc=*/true); 726 To = BAT->translate(Func, To, /*IsBranchSrc=*/false); 727 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: " 728 << Func.getPrintName() << " @ " << Twine::utohexstr(From) 729 << " -> " << Func.getPrintName() << " @ " 730 << Twine::utohexstr(To) << '\n'); 731 } 732 733 AggrData->bumpBranchCount(From, To, Count, Mispreds); 734 return true; 735 } 736 737 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc, 738 BinaryFunction *ToFunc, uint64_t From, 739 uint64_t To, uint64_t Count, 740 uint64_t Mispreds) { 741 FuncBranchData *FromAggrData = nullptr; 742 FuncBranchData *ToAggrData = nullptr; 743 StringRef SrcFunc; 744 StringRef DstFunc; 745 if (FromFunc) { 746 SrcFunc = getLocationName(*FromFunc, Count); 747 FromAggrData = getBranchData(*FromFunc); 748 if (!FromAggrData) { 749 FromAggrData = &NamesToBranches[FromFunc->getOneName()]; 750 FromAggrData->Name = SrcFunc; 751 setBranchData(*FromFunc, FromAggrData); 752 } 753 From -= FromFunc->getAddress(); 754 if (BAT) 755 From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true); 756 757 recordExit(*FromFunc, From, Mispreds, Count); 758 } 759 if (ToFunc) { 760 DstFunc = getLocationName(*ToFunc, 0); 761 ToAggrData = getBranchData(*ToFunc); 762 if (!ToAggrData) { 763 ToAggrData = &NamesToBranches[ToFunc->getOneName()]; 764 ToAggrData->Name = DstFunc; 765 setBranchData(*ToFunc, ToAggrData); 766 } 767 To -= ToFunc->getAddress(); 768 if (BAT) 769 To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false); 770 771 recordEntry(*ToFunc, To, Mispreds, Count); 772 } 773 774 if (FromAggrData) 775 FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To), 776 Count, Mispreds); 777 if (ToAggrData) 778 ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To, 779 Count, Mispreds); 780 return true; 781 } 782 783 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, 784 uint64_t Mispreds) { 785 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From); 786 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To); 787 if (!FromFunc && !ToFunc) 788 return false; 789 790 if (FromFunc == ToFunc) { 791 recordBranch(*FromFunc, From - FromFunc->getAddress(), 792 To - FromFunc->getAddress(), Count, Mispreds); 793 return doIntraBranch(*FromFunc, From, To, Count, Mispreds); 794 } 795 796 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds); 797 } 798 799 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, 800 uint64_t Count) { 801 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To); 802 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From); 803 if (!FromFunc || !ToFunc) { 804 LLVM_DEBUG( 805 dbgs() << "Out of range trace starting in " << FromFunc->getPrintName() 806 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 807 << " and ending in " << ToFunc->getPrintName() << " @ " 808 << ToFunc->getPrintName() << " @ " 809 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 810 NumLongRangeTraces += Count; 811 return false; 812 } 813 if (FromFunc != ToFunc) { 814 NumInvalidTraces += Count; 815 LLVM_DEBUG( 816 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 817 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 818 << " and ending in " << ToFunc->getPrintName() << " @ " 819 << ToFunc->getPrintName() << " @ " 820 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 821 return false; 822 } 823 824 Optional<BoltAddressTranslation::FallthroughListTy> FTs = 825 BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From) 826 : getFallthroughsInTrace(*FromFunc, First, Second, Count); 827 if (!FTs) { 828 LLVM_DEBUG( 829 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 830 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 831 << " and ending in " << ToFunc->getPrintName() << " @ " 832 << ToFunc->getPrintName() << " @ " 833 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 834 NumInvalidTraces += Count; 835 return false; 836 } 837 838 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for " 839 << FromFunc->getPrintName() << ":" 840 << Twine::utohexstr(First.To) << " to " 841 << Twine::utohexstr(Second.From) << ".\n"); 842 for (const std::pair<uint64_t, uint64_t> &Pair : *FTs) 843 doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(), 844 Pair.second + FromFunc->getAddress(), Count, false); 845 846 return true; 847 } 848 849 bool DataAggregator::recordTrace( 850 BinaryFunction &BF, 851 const LBREntry &FirstLBR, 852 const LBREntry &SecondLBR, 853 uint64_t Count, 854 SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const { 855 BinaryContext &BC = BF.getBinaryContext(); 856 857 if (!BF.isSimple()) 858 return false; 859 860 assert(BF.hasCFG() && "can only record traces in CFG state"); 861 862 // Offsets of the trace within this function. 863 const uint64_t From = FirstLBR.To - BF.getAddress(); 864 const uint64_t To = SecondLBR.From - BF.getAddress(); 865 866 if (From > To) 867 return false; 868 869 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); 870 BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); 871 872 if (!FromBB || !ToBB) 873 return false; 874 875 // Adjust FromBB if the first LBR is a return from the last instruction in 876 // the previous block (that instruction should be a call). 877 if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && 878 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { 879 BinaryBasicBlock *PrevBB = BF.getLayout().getBlock(FromBB->getIndex() - 1); 880 if (PrevBB->getSuccessor(FromBB->getLabel())) { 881 const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); 882 if (Instr && BC.MIB->isCall(*Instr)) 883 FromBB = PrevBB; 884 else 885 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR 886 << '\n'); 887 } else { 888 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n'); 889 } 890 } 891 892 // Fill out information for fall-through edges. The From and To could be 893 // within the same basic block, e.g. when two call instructions are in the 894 // same block. In this case we skip the processing. 895 if (FromBB == ToBB) 896 return true; 897 898 // Process blocks in the original layout order. 899 BinaryBasicBlock *BB = BF.getLayout().getBlock(FromBB->getIndex()); 900 assert(BB == FromBB && "index mismatch"); 901 while (BB != ToBB) { 902 BinaryBasicBlock *NextBB = BF.getLayout().getBlock(BB->getIndex() + 1); 903 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout"); 904 905 // Check for bad LBRs. 906 if (!BB->getSuccessor(NextBB->getLabel())) { 907 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n" 908 << " " << FirstLBR << '\n' 909 << " " << SecondLBR << '\n'); 910 return false; 911 } 912 913 // Record fall-through jumps 914 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB); 915 BI.Count += Count; 916 917 if (Branches) { 918 const MCInst *Instr = BB->getLastNonPseudoInstr(); 919 uint64_t Offset = 0; 920 if (Instr) 921 Offset = BC.MIB->getOffsetWithDefault(*Instr, 0); 922 else 923 Offset = BB->getOffset(); 924 925 Branches->emplace_back(Offset, NextBB->getOffset()); 926 } 927 928 BB = NextBB; 929 } 930 931 return true; 932 } 933 934 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>> 935 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, 936 const LBREntry &FirstLBR, 937 const LBREntry &SecondLBR, 938 uint64_t Count) const { 939 SmallVector<std::pair<uint64_t, uint64_t>, 16> Res; 940 941 if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res)) 942 return NoneType(); 943 944 return Res; 945 } 946 947 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred, 948 uint64_t Count) const { 949 if (To > BF.getSize()) 950 return false; 951 952 if (!BF.hasProfile()) 953 BF.ExecutionCount = 0; 954 955 BinaryBasicBlock *EntryBB = nullptr; 956 if (To == 0) { 957 BF.ExecutionCount += Count; 958 if (!BF.empty()) 959 EntryBB = &BF.front(); 960 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) { 961 if (BB->isEntryPoint()) 962 EntryBB = BB; 963 } 964 965 if (EntryBB) 966 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count); 967 968 return true; 969 } 970 971 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred, 972 uint64_t Count) const { 973 if (!BF.isSimple() || From > BF.getSize()) 974 return false; 975 976 if (!BF.hasProfile()) 977 BF.ExecutionCount = 0; 978 979 return true; 980 } 981 982 ErrorOr<LBREntry> DataAggregator::parseLBREntry() { 983 LBREntry Res; 984 ErrorOr<StringRef> FromStrRes = parseString('/'); 985 if (std::error_code EC = FromStrRes.getError()) 986 return EC; 987 StringRef OffsetStr = FromStrRes.get(); 988 if (OffsetStr.getAsInteger(0, Res.From)) { 989 reportError("expected hexadecimal number with From address"); 990 Diag << "Found: " << OffsetStr << "\n"; 991 return make_error_code(llvm::errc::io_error); 992 } 993 994 ErrorOr<StringRef> ToStrRes = parseString('/'); 995 if (std::error_code EC = ToStrRes.getError()) 996 return EC; 997 OffsetStr = ToStrRes.get(); 998 if (OffsetStr.getAsInteger(0, Res.To)) { 999 reportError("expected hexadecimal number with To address"); 1000 Diag << "Found: " << OffsetStr << "\n"; 1001 return make_error_code(llvm::errc::io_error); 1002 } 1003 1004 ErrorOr<StringRef> MispredStrRes = parseString('/'); 1005 if (std::error_code EC = MispredStrRes.getError()) 1006 return EC; 1007 StringRef MispredStr = MispredStrRes.get(); 1008 if (MispredStr.size() != 1 || 1009 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) { 1010 reportError("expected single char for mispred bit"); 1011 Diag << "Found: " << MispredStr << "\n"; 1012 return make_error_code(llvm::errc::io_error); 1013 } 1014 Res.Mispred = MispredStr[0] == 'M'; 1015 1016 static bool MispredWarning = true; 1017 if (MispredStr[0] == '-' && MispredWarning) { 1018 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n"; 1019 MispredWarning = false; 1020 } 1021 1022 ErrorOr<StringRef> Rest = parseString(FieldSeparator, true); 1023 if (std::error_code EC = Rest.getError()) 1024 return EC; 1025 if (Rest.get().size() < 5) { 1026 reportError("expected rest of LBR entry"); 1027 Diag << "Found: " << Rest.get() << "\n"; 1028 return make_error_code(llvm::errc::io_error); 1029 } 1030 return Res; 1031 } 1032 1033 bool DataAggregator::checkAndConsumeFS() { 1034 if (ParsingBuf[0] != FieldSeparator) 1035 return false; 1036 1037 ParsingBuf = ParsingBuf.drop_front(1); 1038 Col += 1; 1039 return true; 1040 } 1041 1042 void DataAggregator::consumeRestOfLine() { 1043 size_t LineEnd = ParsingBuf.find_first_of('\n'); 1044 if (LineEnd == StringRef::npos) { 1045 ParsingBuf = StringRef(); 1046 Col = 0; 1047 Line += 1; 1048 return; 1049 } 1050 ParsingBuf = ParsingBuf.drop_front(LineEnd + 1); 1051 Col = 0; 1052 Line += 1; 1053 } 1054 1055 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() { 1056 PerfBranchSample Res; 1057 1058 while (checkAndConsumeFS()) { 1059 } 1060 1061 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1062 if (std::error_code EC = PIDRes.getError()) 1063 return EC; 1064 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1065 if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) { 1066 consumeRestOfLine(); 1067 return make_error_code(errc::no_such_process); 1068 } 1069 1070 while (checkAndConsumeFS()) { 1071 } 1072 1073 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true); 1074 if (std::error_code EC = PCRes.getError()) 1075 return EC; 1076 Res.PC = PCRes.get(); 1077 1078 if (checkAndConsumeNewLine()) 1079 return Res; 1080 1081 while (!checkAndConsumeNewLine()) { 1082 checkAndConsumeFS(); 1083 1084 ErrorOr<LBREntry> LBRRes = parseLBREntry(); 1085 if (std::error_code EC = LBRRes.getError()) 1086 return EC; 1087 LBREntry LBR = LBRRes.get(); 1088 if (ignoreKernelInterrupt(LBR)) 1089 continue; 1090 if (!BC->HasFixedLoadAddress) 1091 adjustLBR(LBR, MMapInfoIter->second); 1092 Res.LBR.push_back(LBR); 1093 } 1094 1095 return Res; 1096 } 1097 1098 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() { 1099 while (checkAndConsumeFS()) { 1100 } 1101 1102 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1103 if (std::error_code EC = PIDRes.getError()) 1104 return EC; 1105 1106 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1107 if (MMapInfoIter == BinaryMMapInfo.end()) { 1108 consumeRestOfLine(); 1109 return PerfBasicSample{StringRef(), 0}; 1110 } 1111 1112 while (checkAndConsumeFS()) { 1113 } 1114 1115 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1116 if (std::error_code EC = Event.getError()) 1117 return EC; 1118 1119 while (checkAndConsumeFS()) { 1120 } 1121 1122 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true); 1123 if (std::error_code EC = AddrRes.getError()) 1124 return EC; 1125 1126 if (!checkAndConsumeNewLine()) { 1127 reportError("expected end of line"); 1128 return make_error_code(llvm::errc::io_error); 1129 } 1130 1131 uint64_t Address = *AddrRes; 1132 if (!BC->HasFixedLoadAddress) 1133 adjustAddress(Address, MMapInfoIter->second); 1134 1135 return PerfBasicSample{Event.get(), Address}; 1136 } 1137 1138 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() { 1139 PerfMemSample Res{0, 0}; 1140 1141 while (checkAndConsumeFS()) { 1142 } 1143 1144 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1145 if (std::error_code EC = PIDRes.getError()) 1146 return EC; 1147 1148 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1149 if (MMapInfoIter == BinaryMMapInfo.end()) { 1150 consumeRestOfLine(); 1151 return Res; 1152 } 1153 1154 while (checkAndConsumeFS()) { 1155 } 1156 1157 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1158 if (std::error_code EC = Event.getError()) 1159 return EC; 1160 if (Event.get().find("mem-loads") == StringRef::npos) { 1161 consumeRestOfLine(); 1162 return Res; 1163 } 1164 1165 while (checkAndConsumeFS()) { 1166 } 1167 1168 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator); 1169 if (std::error_code EC = AddrRes.getError()) 1170 return EC; 1171 1172 while (checkAndConsumeFS()) { 1173 } 1174 1175 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true); 1176 if (std::error_code EC = PCRes.getError()) { 1177 consumeRestOfLine(); 1178 return EC; 1179 } 1180 1181 if (!checkAndConsumeNewLine()) { 1182 reportError("expected end of line"); 1183 return make_error_code(llvm::errc::io_error); 1184 } 1185 1186 uint64_t Address = *AddrRes; 1187 if (!BC->HasFixedLoadAddress) 1188 adjustAddress(Address, MMapInfoIter->second); 1189 1190 return PerfMemSample{PCRes.get(), Address}; 1191 } 1192 1193 ErrorOr<Location> DataAggregator::parseLocationOrOffset() { 1194 auto parseOffset = [this]() -> ErrorOr<Location> { 1195 ErrorOr<uint64_t> Res = parseHexField(FieldSeparator); 1196 if (std::error_code EC = Res.getError()) 1197 return EC; 1198 return Location(Res.get()); 1199 }; 1200 1201 size_t Sep = ParsingBuf.find_first_of(" \n"); 1202 if (Sep == StringRef::npos) 1203 return parseOffset(); 1204 StringRef LookAhead = ParsingBuf.substr(0, Sep); 1205 if (LookAhead.find_first_of(":") == StringRef::npos) 1206 return parseOffset(); 1207 1208 ErrorOr<StringRef> BuildID = parseString(':'); 1209 if (std::error_code EC = BuildID.getError()) 1210 return EC; 1211 ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator); 1212 if (std::error_code EC = Offset.getError()) 1213 return EC; 1214 return Location(true, BuildID.get(), Offset.get()); 1215 } 1216 1217 ErrorOr<DataAggregator::AggregatedLBREntry> 1218 DataAggregator::parseAggregatedLBREntry() { 1219 while (checkAndConsumeFS()) { 1220 } 1221 1222 ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator); 1223 if (std::error_code EC = TypeOrErr.getError()) 1224 return EC; 1225 auto Type = AggregatedLBREntry::BRANCH; 1226 if (TypeOrErr.get() == "B") { 1227 Type = AggregatedLBREntry::BRANCH; 1228 } else if (TypeOrErr.get() == "F") { 1229 Type = AggregatedLBREntry::FT; 1230 } else if (TypeOrErr.get() == "f") { 1231 Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN; 1232 } else { 1233 reportError("expected B, F or f"); 1234 return make_error_code(llvm::errc::io_error); 1235 } 1236 1237 while (checkAndConsumeFS()) { 1238 } 1239 ErrorOr<Location> From = parseLocationOrOffset(); 1240 if (std::error_code EC = From.getError()) 1241 return EC; 1242 1243 while (checkAndConsumeFS()) { 1244 } 1245 ErrorOr<Location> To = parseLocationOrOffset(); 1246 if (std::error_code EC = To.getError()) 1247 return EC; 1248 1249 while (checkAndConsumeFS()) { 1250 } 1251 ErrorOr<int64_t> Frequency = 1252 parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH); 1253 if (std::error_code EC = Frequency.getError()) 1254 return EC; 1255 1256 uint64_t Mispreds = 0; 1257 if (Type == AggregatedLBREntry::BRANCH) { 1258 while (checkAndConsumeFS()) { 1259 } 1260 ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true); 1261 if (std::error_code EC = MispredsOrErr.getError()) 1262 return EC; 1263 Mispreds = static_cast<uint64_t>(MispredsOrErr.get()); 1264 } 1265 1266 if (!checkAndConsumeNewLine()) { 1267 reportError("expected end of line"); 1268 return make_error_code(llvm::errc::io_error); 1269 } 1270 1271 return AggregatedLBREntry{From.get(), To.get(), 1272 static_cast<uint64_t>(Frequency.get()), Mispreds, 1273 Type}; 1274 } 1275 1276 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const { 1277 return opts::IgnoreInterruptLBR && 1278 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr); 1279 } 1280 1281 std::error_code DataAggregator::printLBRHeatMap() { 1282 outs() << "PERF2BOLT: parse branch events...\n"; 1283 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1284 TimerGroupDesc, opts::TimeAggregator); 1285 1286 if (opts::LinuxKernelMode) { 1287 opts::HeatmapMaxAddress = 0xffffffffffffffff; 1288 opts::HeatmapMinAddress = KernelBaseAddr; 1289 } 1290 Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress, 1291 opts::HeatmapMaxAddress, getTextSections(BC)); 1292 uint64_t NumTotalSamples = 0; 1293 1294 if (opts::BasicAggregation) { 1295 while (hasData()) { 1296 ErrorOr<PerfBasicSample> SampleRes = parseBasicSample(); 1297 if (std::error_code EC = SampleRes.getError()) { 1298 if (EC == errc::no_such_process) 1299 continue; 1300 return EC; 1301 } 1302 PerfBasicSample &Sample = SampleRes.get(); 1303 HM.registerAddress(Sample.PC); 1304 NumTotalSamples++; 1305 } 1306 outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n"; 1307 } else { 1308 while (hasData()) { 1309 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1310 if (std::error_code EC = SampleRes.getError()) { 1311 if (EC == errc::no_such_process) 1312 continue; 1313 return EC; 1314 } 1315 1316 PerfBranchSample &Sample = SampleRes.get(); 1317 1318 // LBRs are stored in reverse execution order. NextLBR refers to the next 1319 // executed branch record. 1320 const LBREntry *NextLBR = nullptr; 1321 for (const LBREntry &LBR : Sample.LBR) { 1322 if (NextLBR) { 1323 // Record fall-through trace. 1324 const uint64_t TraceFrom = LBR.To; 1325 const uint64_t TraceTo = NextLBR->From; 1326 ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount; 1327 } 1328 NextLBR = &LBR; 1329 } 1330 if (!Sample.LBR.empty()) { 1331 HM.registerAddress(Sample.LBR.front().To); 1332 HM.registerAddress(Sample.LBR.back().From); 1333 } 1334 NumTotalSamples += Sample.LBR.size(); 1335 } 1336 outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n"; 1337 outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n"; 1338 } 1339 1340 if (!NumTotalSamples) { 1341 if (opts::BasicAggregation) { 1342 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. " 1343 "Cannot build heatmap."; 1344 } else { 1345 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. " 1346 "Cannot build heatmap. Use -nl for building heatmap from " 1347 "basic events.\n"; 1348 } 1349 exit(1); 1350 } 1351 1352 outs() << "HEATMAP: building heat map...\n"; 1353 1354 for (const auto &LBR : FallthroughLBRs) { 1355 const Trace &Trace = LBR.first; 1356 const FTInfo &Info = LBR.second; 1357 HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount); 1358 } 1359 1360 if (HM.getNumInvalidRanges()) 1361 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n'; 1362 1363 if (!HM.size()) { 1364 errs() << "HEATMAP-ERROR: no valid traces registered\n"; 1365 exit(1); 1366 } 1367 1368 HM.print(opts::OutputFilename); 1369 if (opts::OutputFilename == "-") 1370 HM.printCDF(opts::OutputFilename); 1371 else 1372 HM.printCDF(opts::OutputFilename + ".csv"); 1373 if (opts::OutputFilename == "-") 1374 HM.printSectionHotness(opts::OutputFilename); 1375 else 1376 HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv"); 1377 1378 return std::error_code(); 1379 } 1380 1381 std::error_code DataAggregator::parseBranchEvents() { 1382 outs() << "PERF2BOLT: parse branch events...\n"; 1383 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1384 TimerGroupDesc, opts::TimeAggregator); 1385 1386 uint64_t NumTotalSamples = 0; 1387 uint64_t NumEntries = 0; 1388 uint64_t NumSamples = 0; 1389 uint64_t NumSamplesNoLBR = 0; 1390 uint64_t NumTraces = 0; 1391 bool NeedsSkylakeFix = false; 1392 1393 while (hasData() && NumTotalSamples < opts::MaxSamples) { 1394 ++NumTotalSamples; 1395 1396 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1397 if (std::error_code EC = SampleRes.getError()) { 1398 if (EC == errc::no_such_process) 1399 continue; 1400 return EC; 1401 } 1402 ++NumSamples; 1403 1404 PerfBranchSample &Sample = SampleRes.get(); 1405 if (opts::WriteAutoFDOData) 1406 ++BasicSamples[Sample.PC]; 1407 1408 if (Sample.LBR.empty()) { 1409 ++NumSamplesNoLBR; 1410 continue; 1411 } 1412 1413 NumEntries += Sample.LBR.size(); 1414 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) { 1415 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n"; 1416 NeedsSkylakeFix = true; 1417 } 1418 1419 // LBRs are stored in reverse execution order. NextPC refers to the next 1420 // recorded executed PC. 1421 uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0; 1422 uint32_t NumEntry = 0; 1423 for (const LBREntry &LBR : Sample.LBR) { 1424 ++NumEntry; 1425 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries) 1426 // sometimes record entry 32 as an exact copy of entry 31. This will cause 1427 // us to likely record an invalid trace and generate a stale function for 1428 // BAT mode (non BAT disassembles the function and is able to ignore this 1429 // trace at aggregation time). Drop first 2 entries (last two, in 1430 // chronological order) 1431 if (NeedsSkylakeFix && NumEntry <= 2) 1432 continue; 1433 if (NextPC) { 1434 // Record fall-through trace. 1435 const uint64_t TraceFrom = LBR.To; 1436 const uint64_t TraceTo = NextPC; 1437 const BinaryFunction *TraceBF = 1438 getBinaryFunctionContainingAddress(TraceFrom); 1439 if (TraceBF && TraceBF->containsAddress(TraceTo)) { 1440 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; 1441 if (TraceBF->containsAddress(LBR.From)) 1442 ++Info.InternCount; 1443 else 1444 ++Info.ExternCount; 1445 } else { 1446 if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) { 1447 LLVM_DEBUG(dbgs() 1448 << "Invalid trace starting in " 1449 << TraceBF->getPrintName() << " @ " 1450 << Twine::utohexstr(TraceFrom - TraceBF->getAddress()) 1451 << " and ending @ " << Twine::utohexstr(TraceTo) 1452 << '\n'); 1453 ++NumInvalidTraces; 1454 } else { 1455 LLVM_DEBUG(dbgs() 1456 << "Out of range trace starting in " 1457 << (TraceBF ? TraceBF->getPrintName() : "None") << " @ " 1458 << Twine::utohexstr( 1459 TraceFrom - (TraceBF ? TraceBF->getAddress() : 0)) 1460 << " and ending in " 1461 << (getBinaryFunctionContainingAddress(TraceTo) 1462 ? getBinaryFunctionContainingAddress(TraceTo) 1463 ->getPrintName() 1464 : "None") 1465 << " @ " 1466 << Twine::utohexstr( 1467 TraceTo - 1468 (getBinaryFunctionContainingAddress(TraceTo) 1469 ? getBinaryFunctionContainingAddress(TraceTo) 1470 ->getAddress() 1471 : 0)) 1472 << '\n'); 1473 ++NumLongRangeTraces; 1474 } 1475 } 1476 ++NumTraces; 1477 } 1478 NextPC = LBR.From; 1479 1480 uint64_t From = LBR.From; 1481 if (!getBinaryFunctionContainingAddress(From)) 1482 From = 0; 1483 uint64_t To = LBR.To; 1484 if (!getBinaryFunctionContainingAddress(To)) 1485 To = 0; 1486 if (!From && !To) 1487 continue; 1488 BranchInfo &Info = BranchLBRs[Trace(From, To)]; 1489 ++Info.TakenCount; 1490 Info.MispredCount += LBR.Mispred; 1491 } 1492 } 1493 1494 for (const auto &LBR : BranchLBRs) { 1495 const Trace &Trace = LBR.first; 1496 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From)) 1497 BF->setHasProfileAvailable(); 1498 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To)) 1499 BF->setHasProfileAvailable(); 1500 } 1501 1502 auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) { 1503 OS << " ("; 1504 if (OS.has_colors()) { 1505 if (Percent > T2) 1506 OS.changeColor(raw_ostream::RED); 1507 else if (Percent > T1) 1508 OS.changeColor(raw_ostream::YELLOW); 1509 else 1510 OS.changeColor(raw_ostream::GREEN); 1511 } 1512 OS << format("%.1f%%", Percent); 1513 if (OS.has_colors()) 1514 OS.resetColor(); 1515 OS << ")"; 1516 }; 1517 1518 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries 1519 << " LBR entries\n"; 1520 if (NumTotalSamples) { 1521 if (NumSamples && NumSamplesNoLBR == NumSamples) { 1522 // Note: we don't know if perf2bolt is being used to parse memory samples 1523 // at this point. In this case, it is OK to parse zero LBRs. 1524 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack " 1525 "LBR. Record profile with perf record -j any or run perf2bolt " 1526 "in no-LBR mode with -nl (the performance improvement in -nl " 1527 "mode may be limited)\n"; 1528 } else { 1529 const uint64_t IgnoredSamples = NumTotalSamples - NumSamples; 1530 const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples; 1531 outs() << "PERF2BOLT: " << IgnoredSamples << " samples"; 1532 printColored(outs(), PercentIgnored, 20, 50); 1533 outs() << " were ignored\n"; 1534 if (PercentIgnored > 50.0f) 1535 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples " 1536 "were attributed to the input binary\n"; 1537 } 1538 } 1539 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1540 << NumInvalidTraces; 1541 float Perc = 0.0f; 1542 if (NumTraces > 0) { 1543 Perc = NumInvalidTraces * 100.0f / NumTraces; 1544 printColored(outs(), Perc, 5, 10); 1545 } 1546 outs() << "\n"; 1547 if (Perc > 10.0f) 1548 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1549 "binary is probably not the same binary used during profiling " 1550 "collection. The generated data may be ineffective for improving " 1551 "performance.\n\n"; 1552 1553 outs() << "PERF2BOLT: out of range traces involving unknown regions: " 1554 << NumLongRangeTraces; 1555 if (NumTraces > 0) 1556 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1557 outs() << "\n"; 1558 1559 if (NumColdSamples > 0) { 1560 const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples; 1561 outs() << "PERF2BOLT: " << NumColdSamples 1562 << format(" (%.1f%%)", ColdSamples) 1563 << " samples recorded in cold regions of split functions.\n"; 1564 if (ColdSamples > 5.0f) 1565 outs() 1566 << "WARNING: The BOLT-processed binary where samples were collected " 1567 "likely used bad data or your service observed a large shift in " 1568 "profile. You may want to audit this.\n"; 1569 } 1570 1571 return std::error_code(); 1572 } 1573 1574 void DataAggregator::processBranchEvents() { 1575 outs() << "PERF2BOLT: processing branch events...\n"; 1576 NamedRegionTimer T("processBranch", "Processing branch events", 1577 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1578 1579 for (const auto &AggrLBR : FallthroughLBRs) { 1580 const Trace &Loc = AggrLBR.first; 1581 const FTInfo &Info = AggrLBR.second; 1582 LBREntry First{Loc.From, Loc.From, false}; 1583 LBREntry Second{Loc.To, Loc.To, false}; 1584 if (Info.InternCount) 1585 doTrace(First, Second, Info.InternCount); 1586 if (Info.ExternCount) { 1587 First.From = 0; 1588 doTrace(First, Second, Info.ExternCount); 1589 } 1590 } 1591 1592 for (const auto &AggrLBR : BranchLBRs) { 1593 const Trace &Loc = AggrLBR.first; 1594 const BranchInfo &Info = AggrLBR.second; 1595 doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount); 1596 } 1597 } 1598 1599 std::error_code DataAggregator::parseBasicEvents() { 1600 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n"; 1601 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName, 1602 TimerGroupDesc, opts::TimeAggregator); 1603 while (hasData()) { 1604 ErrorOr<PerfBasicSample> Sample = parseBasicSample(); 1605 if (std::error_code EC = Sample.getError()) 1606 return EC; 1607 1608 if (!Sample->PC) 1609 continue; 1610 1611 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1612 BF->setHasProfileAvailable(); 1613 1614 ++BasicSamples[Sample->PC]; 1615 EventNames.insert(Sample->EventName); 1616 } 1617 1618 return std::error_code(); 1619 } 1620 1621 void DataAggregator::processBasicEvents() { 1622 outs() << "PERF2BOLT: processing basic events (without LBR)...\n"; 1623 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName, 1624 TimerGroupDesc, opts::TimeAggregator); 1625 uint64_t OutOfRangeSamples = 0; 1626 uint64_t NumSamples = 0; 1627 for (auto &Sample : BasicSamples) { 1628 const uint64_t PC = Sample.first; 1629 const uint64_t HitCount = Sample.second; 1630 NumSamples += HitCount; 1631 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1632 if (!Func) { 1633 OutOfRangeSamples += HitCount; 1634 continue; 1635 } 1636 1637 doSample(*Func, PC, HitCount); 1638 } 1639 outs() << "PERF2BOLT: read " << NumSamples << " samples\n"; 1640 1641 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: " 1642 << OutOfRangeSamples; 1643 float Perc = 0.0f; 1644 if (NumSamples > 0) { 1645 outs() << " ("; 1646 Perc = OutOfRangeSamples * 100.0f / NumSamples; 1647 if (outs().has_colors()) { 1648 if (Perc > 60.0f) 1649 outs().changeColor(raw_ostream::RED); 1650 else if (Perc > 40.0f) 1651 outs().changeColor(raw_ostream::YELLOW); 1652 else 1653 outs().changeColor(raw_ostream::GREEN); 1654 } 1655 outs() << format("%.1f%%", Perc); 1656 if (outs().has_colors()) 1657 outs().resetColor(); 1658 outs() << ")"; 1659 } 1660 outs() << "\n"; 1661 if (Perc > 80.0f) 1662 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1663 "binary is probably not the same binary used during profiling " 1664 "collection. The generated data may be ineffective for improving " 1665 "performance.\n\n"; 1666 } 1667 1668 std::error_code DataAggregator::parseMemEvents() { 1669 outs() << "PERF2BOLT: parsing memory events...\n"; 1670 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName, 1671 TimerGroupDesc, opts::TimeAggregator); 1672 while (hasData()) { 1673 ErrorOr<PerfMemSample> Sample = parseMemSample(); 1674 if (std::error_code EC = Sample.getError()) 1675 return EC; 1676 1677 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1678 BF->setHasProfileAvailable(); 1679 1680 MemSamples.emplace_back(std::move(Sample.get())); 1681 } 1682 1683 return std::error_code(); 1684 } 1685 1686 void DataAggregator::processMemEvents() { 1687 NamedRegionTimer T("ProcessMemEvents", "Processing mem events", 1688 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1689 for (const PerfMemSample &Sample : MemSamples) { 1690 uint64_t PC = Sample.PC; 1691 uint64_t Addr = Sample.Addr; 1692 StringRef FuncName; 1693 StringRef MemName; 1694 1695 // Try to resolve symbol for PC 1696 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1697 if (!Func) { 1698 LLVM_DEBUG(if (PC != 0) { 1699 dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x" 1700 << Twine::utohexstr(Addr) << "\n"; 1701 }); 1702 continue; 1703 } 1704 1705 FuncName = Func->getOneName(); 1706 PC -= Func->getAddress(); 1707 1708 // Try to resolve symbol for memory load 1709 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) { 1710 MemName = BD->getName(); 1711 Addr -= BD->getAddress(); 1712 } else if (opts::FilterMemProfile) { 1713 // Filter out heap/stack accesses 1714 continue; 1715 } 1716 1717 const Location FuncLoc(!FuncName.empty(), FuncName, PC); 1718 const Location AddrLoc(!MemName.empty(), MemName, Addr); 1719 1720 FuncMemData *MemData = &NamesToMemEvents[FuncName]; 1721 setMemData(*Func, MemData); 1722 MemData->update(FuncLoc, AddrLoc); 1723 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n"); 1724 } 1725 } 1726 1727 std::error_code DataAggregator::parsePreAggregatedLBRSamples() { 1728 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; 1729 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", 1730 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1731 while (hasData()) { 1732 ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry(); 1733 if (std::error_code EC = AggrEntry.getError()) 1734 return EC; 1735 1736 if (BinaryFunction *BF = 1737 getBinaryFunctionContainingAddress(AggrEntry->From.Offset)) 1738 BF->setHasProfileAvailable(); 1739 if (BinaryFunction *BF = 1740 getBinaryFunctionContainingAddress(AggrEntry->To.Offset)) 1741 BF->setHasProfileAvailable(); 1742 1743 AggregatedLBRs.emplace_back(std::move(AggrEntry.get())); 1744 } 1745 1746 return std::error_code(); 1747 } 1748 1749 void DataAggregator::processPreAggregated() { 1750 outs() << "PERF2BOLT: processing pre-aggregated profile...\n"; 1751 NamedRegionTimer T("processAggregated", "Processing aggregated branch events", 1752 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1753 1754 uint64_t NumTraces = 0; 1755 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { 1756 switch (AggrEntry.EntryType) { 1757 case AggregatedLBREntry::BRANCH: 1758 doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, 1759 AggrEntry.Mispreds); 1760 break; 1761 case AggregatedLBREntry::FT: 1762 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { 1763 LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT 1764 ? AggrEntry.From.Offset 1765 : 0, 1766 AggrEntry.From.Offset, false}; 1767 LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false}; 1768 doTrace(First, Second, AggrEntry.Count); 1769 NumTraces += AggrEntry.Count; 1770 break; 1771 } 1772 } 1773 } 1774 1775 outs() << "PERF2BOLT: read " << AggregatedLBRs.size() 1776 << " aggregated LBR entries\n"; 1777 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1778 << NumInvalidTraces; 1779 float Perc = 0.0f; 1780 if (NumTraces > 0) { 1781 outs() << " ("; 1782 Perc = NumInvalidTraces * 100.0f / NumTraces; 1783 if (outs().has_colors()) { 1784 if (Perc > 10.0f) 1785 outs().changeColor(raw_ostream::RED); 1786 else if (Perc > 5.0f) 1787 outs().changeColor(raw_ostream::YELLOW); 1788 else 1789 outs().changeColor(raw_ostream::GREEN); 1790 } 1791 outs() << format("%.1f%%", Perc); 1792 if (outs().has_colors()) 1793 outs().resetColor(); 1794 outs() << ")"; 1795 } 1796 outs() << "\n"; 1797 if (Perc > 10.0f) 1798 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1799 "binary is probably not the same binary used during profiling " 1800 "collection. The generated data may be ineffective for improving " 1801 "performance.\n\n"; 1802 1803 outs() << "PERF2BOLT: Out of range traces involving unknown regions: " 1804 << NumLongRangeTraces; 1805 if (NumTraces > 0) 1806 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1807 outs() << "\n"; 1808 } 1809 1810 Optional<int32_t> DataAggregator::parseCommExecEvent() { 1811 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1812 if (LineEnd == StringRef::npos) { 1813 reportError("expected rest of line"); 1814 Diag << "Found: " << ParsingBuf << "\n"; 1815 return NoneType(); 1816 } 1817 StringRef Line = ParsingBuf.substr(0, LineEnd); 1818 1819 size_t Pos = Line.find("PERF_RECORD_COMM exec"); 1820 if (Pos == StringRef::npos) 1821 return NoneType(); 1822 Line = Line.drop_front(Pos); 1823 1824 // Line: 1825 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>" 1826 StringRef PIDStr = Line.rsplit(':').second.split('/').first; 1827 int32_t PID; 1828 if (PIDStr.getAsInteger(10, PID)) { 1829 reportError("expected PID"); 1830 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1831 return NoneType(); 1832 } 1833 1834 return PID; 1835 } 1836 1837 namespace { 1838 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) { 1839 const StringRef SecTimeStr = TimeStr.split('.').first; 1840 const StringRef USecTimeStr = TimeStr.split('.').second; 1841 uint64_t SecTime; 1842 uint64_t USecTime; 1843 if (SecTimeStr.getAsInteger(10, SecTime) || 1844 USecTimeStr.getAsInteger(10, USecTime)) 1845 return NoneType(); 1846 return SecTime * 1000000ULL + USecTime; 1847 } 1848 } 1849 1850 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() { 1851 while (checkAndConsumeFS()) { 1852 } 1853 1854 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1855 if (LineEnd == StringRef::npos) { 1856 reportError("expected rest of line"); 1857 Diag << "Found: " << ParsingBuf << "\n"; 1858 return NoneType(); 1859 } 1860 StringRef Line = ParsingBuf.substr(0, LineEnd); 1861 1862 size_t Pos = Line.find("PERF_RECORD_FORK"); 1863 if (Pos == StringRef::npos) { 1864 consumeRestOfLine(); 1865 return NoneType(); 1866 } 1867 1868 ForkInfo FI; 1869 1870 const StringRef TimeStr = 1871 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1872 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) { 1873 FI.Time = *TimeRes; 1874 } 1875 1876 Line = Line.drop_front(Pos); 1877 1878 // Line: 1879 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>) 1880 const StringRef ChildPIDStr = Line.split('(').second.split(':').first; 1881 if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) { 1882 reportError("expected PID"); 1883 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n"; 1884 return NoneType(); 1885 } 1886 1887 const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first; 1888 if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) { 1889 reportError("expected PID"); 1890 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n"; 1891 return NoneType(); 1892 } 1893 1894 consumeRestOfLine(); 1895 1896 return FI; 1897 } 1898 1899 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>> 1900 DataAggregator::parseMMapEvent() { 1901 while (checkAndConsumeFS()) { 1902 } 1903 1904 MMapInfo ParsedInfo; 1905 1906 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1907 if (LineEnd == StringRef::npos) { 1908 reportError("expected rest of line"); 1909 Diag << "Found: " << ParsingBuf << "\n"; 1910 return make_error_code(llvm::errc::io_error); 1911 } 1912 StringRef Line = ParsingBuf.substr(0, LineEnd); 1913 1914 size_t Pos = Line.find("PERF_RECORD_MMAP2"); 1915 if (Pos == StringRef::npos) { 1916 consumeRestOfLine(); 1917 return std::make_pair(StringRef(), ParsedInfo); 1918 } 1919 1920 // Line: 1921 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name> 1922 1923 const StringRef TimeStr = 1924 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1925 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) 1926 ParsedInfo.Time = *TimeRes; 1927 1928 Line = Line.drop_front(Pos); 1929 1930 // Line: 1931 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name> 1932 1933 StringRef FileName = Line.rsplit(FieldSeparator).second; 1934 if (FileName.startswith("//") || FileName.startswith("[")) { 1935 consumeRestOfLine(); 1936 return std::make_pair(StringRef(), ParsedInfo); 1937 } 1938 FileName = sys::path::filename(FileName); 1939 1940 const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first; 1941 if (PIDStr.getAsInteger(10, ParsedInfo.PID)) { 1942 reportError("expected PID"); 1943 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1944 return make_error_code(llvm::errc::io_error); 1945 } 1946 1947 const StringRef BaseAddressStr = Line.split('[').second.split('(').first; 1948 if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) { 1949 reportError("expected base address"); 1950 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n"; 1951 return make_error_code(llvm::errc::io_error); 1952 } 1953 1954 const StringRef SizeStr = Line.split('(').second.split(')').first; 1955 if (SizeStr.getAsInteger(0, ParsedInfo.Size)) { 1956 reportError("expected mmaped size"); 1957 Diag << "Found: " << SizeStr << "in '" << Line << "'\n"; 1958 return make_error_code(llvm::errc::io_error); 1959 } 1960 1961 const StringRef OffsetStr = 1962 Line.split('@').second.ltrim().split(FieldSeparator).first; 1963 if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) { 1964 reportError("expected mmaped page-aligned offset"); 1965 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n"; 1966 return make_error_code(llvm::errc::io_error); 1967 } 1968 1969 consumeRestOfLine(); 1970 1971 return std::make_pair(FileName, ParsedInfo); 1972 } 1973 1974 std::error_code DataAggregator::parseMMapEvents() { 1975 outs() << "PERF2BOLT: parsing perf-script mmap events output\n"; 1976 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName, 1977 TimerGroupDesc, opts::TimeAggregator); 1978 1979 std::multimap<StringRef, MMapInfo> GlobalMMapInfo; 1980 while (hasData()) { 1981 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent(); 1982 if (std::error_code EC = FileMMapInfoRes.getError()) 1983 return EC; 1984 1985 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get(); 1986 if (FileMMapInfo.second.PID == -1) 1987 continue; 1988 1989 // Consider only the first mapping of the file for any given PID 1990 bool PIDExists = false; 1991 auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first); 1992 for (auto MI = Range.first; MI != Range.second; ++MI) { 1993 if (MI->second.PID == FileMMapInfo.second.PID) { 1994 PIDExists = true; 1995 break; 1996 } 1997 } 1998 if (PIDExists) 1999 continue; 2000 2001 GlobalMMapInfo.insert(FileMMapInfo); 2002 } 2003 2004 LLVM_DEBUG({ 2005 dbgs() << "FileName -> mmap info:\n"; 2006 for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo) 2007 dbgs() << " " << Pair.first << " : " << Pair.second.PID << " [0x" 2008 << Twine::utohexstr(Pair.second.MMapAddress) << ", " 2009 << Twine::utohexstr(Pair.second.Size) << " @ " 2010 << Twine::utohexstr(Pair.second.Offset) << "]\n"; 2011 }); 2012 2013 StringRef NameToUse = llvm::sys::path::filename(BC->getFilename()); 2014 if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) { 2015 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName 2016 << "\" for profile matching\n"; 2017 NameToUse = BuildIDBinaryName; 2018 } 2019 2020 auto Range = GlobalMMapInfo.equal_range(NameToUse); 2021 for (auto I = Range.first; I != Range.second; ++I) { 2022 MMapInfo &MMapInfo = I->second; 2023 if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) { 2024 // Check that the binary mapping matches one of the segments. 2025 bool MatchFound = false; 2026 for (auto &KV : BC->SegmentMapInfo) { 2027 SegmentInfo &SegInfo = KV.second; 2028 // The mapping is page-aligned and hence the MMapAddress could be 2029 // different from the segment start address. We cannot know the page 2030 // size of the mapping, but we know it should not exceed the segment 2031 // alignment value. Hence we are performing an approximate check. 2032 if (SegInfo.Address >= MMapInfo.MMapAddress && 2033 SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) { 2034 MatchFound = true; 2035 break; 2036 } 2037 } 2038 if (!MatchFound) { 2039 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse 2040 << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n'; 2041 continue; 2042 } 2043 } 2044 2045 // Set base address for shared objects. 2046 if (!BC->HasFixedLoadAddress) { 2047 Optional<uint64_t> BaseAddress = 2048 BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset); 2049 if (!BaseAddress) { 2050 errs() << "PERF2BOLT-WARNING: unable to find base address of the " 2051 "binary when memory mapped at 0x" 2052 << Twine::utohexstr(MMapInfo.MMapAddress) 2053 << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset) 2054 << ". Ignoring profile data for this mapping\n"; 2055 continue; 2056 } else { 2057 MMapInfo.BaseAddress = *BaseAddress; 2058 } 2059 } 2060 2061 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); 2062 } 2063 2064 if (BinaryMMapInfo.empty()) { 2065 if (errs().has_colors()) 2066 errs().changeColor(raw_ostream::RED); 2067 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \"" 2068 << BC->getFilename() << "\"."; 2069 if (!GlobalMMapInfo.empty()) { 2070 errs() << " Profile for the following binary name(s) is available:\n"; 2071 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE; 2072 I = GlobalMMapInfo.upper_bound(I->first)) 2073 errs() << " " << I->first << '\n'; 2074 errs() << "Please rename the input binary.\n"; 2075 } else { 2076 errs() << " Failed to extract any binary name from a profile.\n"; 2077 } 2078 if (errs().has_colors()) 2079 errs().resetColor(); 2080 2081 exit(1); 2082 } 2083 2084 return std::error_code(); 2085 } 2086 2087 std::error_code DataAggregator::parseTaskEvents() { 2088 outs() << "PERF2BOLT: parsing perf-script task events output\n"; 2089 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName, 2090 TimerGroupDesc, opts::TimeAggregator); 2091 2092 while (hasData()) { 2093 if (Optional<int32_t> CommInfo = parseCommExecEvent()) { 2094 // Remove forked child that ran execve 2095 auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo); 2096 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked) 2097 BinaryMMapInfo.erase(MMapInfoIter); 2098 consumeRestOfLine(); 2099 continue; 2100 } 2101 2102 Optional<ForkInfo> ForkInfo = parseForkEvent(); 2103 if (!ForkInfo) 2104 continue; 2105 2106 if (ForkInfo->ParentPID == ForkInfo->ChildPID) 2107 continue; 2108 2109 if (ForkInfo->Time == 0) { 2110 // Process was forked and mmaped before perf ran. In this case the child 2111 // should have its own mmap entry unless it was execve'd. 2112 continue; 2113 } 2114 2115 auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID); 2116 if (MMapInfoIter == BinaryMMapInfo.end()) 2117 continue; 2118 2119 MMapInfo MMapInfo = MMapInfoIter->second; 2120 MMapInfo.PID = ForkInfo->ChildPID; 2121 MMapInfo.Forked = true; 2122 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); 2123 } 2124 2125 outs() << "PERF2BOLT: input binary is associated with " 2126 << BinaryMMapInfo.size() << " PID(s)\n"; 2127 2128 LLVM_DEBUG({ 2129 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) 2130 outs() << " " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "") 2131 << ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x" 2132 << Twine::utohexstr(MMI.second.Size) << ")\n"; 2133 }); 2134 2135 return std::error_code(); 2136 } 2137 2138 Optional<std::pair<StringRef, StringRef>> 2139 DataAggregator::parseNameBuildIDPair() { 2140 while (checkAndConsumeFS()) { 2141 } 2142 2143 ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true); 2144 if (std::error_code EC = BuildIDStr.getError()) 2145 return NoneType(); 2146 2147 // If one of the strings is missing, don't issue a parsing error, but still 2148 // do not return a value. 2149 if (ParsingBuf[0] == '\n') 2150 return NoneType(); 2151 2152 ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true); 2153 if (std::error_code EC = NameStr.getError()) 2154 return NoneType(); 2155 2156 consumeRestOfLine(); 2157 return std::make_pair(NameStr.get(), BuildIDStr.get()); 2158 } 2159 2160 bool DataAggregator::hasAllBuildIDs() { 2161 const StringRef SavedParsingBuf = ParsingBuf; 2162 2163 if (!hasData()) 2164 return false; 2165 2166 bool HasInvalidEntries = false; 2167 while (hasData()) { 2168 if (!parseNameBuildIDPair()) { 2169 HasInvalidEntries = true; 2170 break; 2171 } 2172 } 2173 2174 ParsingBuf = SavedParsingBuf; 2175 2176 return !HasInvalidEntries; 2177 } 2178 2179 Optional<StringRef> 2180 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) { 2181 const StringRef SavedParsingBuf = ParsingBuf; 2182 2183 StringRef FileName; 2184 while (hasData()) { 2185 Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair(); 2186 if (!IDPair) { 2187 consumeRestOfLine(); 2188 continue; 2189 } 2190 2191 if (IDPair->second.startswith(FileBuildID)) { 2192 FileName = sys::path::filename(IDPair->first); 2193 break; 2194 } 2195 } 2196 2197 ParsingBuf = SavedParsingBuf; 2198 2199 if (!FileName.empty()) 2200 return FileName; 2201 2202 return NoneType(); 2203 } 2204 2205 std::error_code 2206 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { 2207 std::error_code EC; 2208 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 2209 if (EC) 2210 return EC; 2211 2212 bool WriteMemLocs = false; 2213 2214 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) { 2215 if (WriteMemLocs) 2216 OutFile << (Loc.IsSymbol ? "4 " : "3 "); 2217 else 2218 OutFile << (Loc.IsSymbol ? "1 " : "0 "); 2219 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name)) 2220 << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator; 2221 }; 2222 2223 uint64_t BranchValues = 0; 2224 uint64_t MemValues = 0; 2225 2226 if (BAT) 2227 OutFile << "boltedcollection\n"; 2228 if (opts::BasicAggregation) { 2229 OutFile << "no_lbr"; 2230 for (const StringMapEntry<NoneType> &Entry : EventNames) 2231 OutFile << " " << Entry.getKey(); 2232 OutFile << "\n"; 2233 2234 for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) { 2235 for (const SampleInfo &SI : Func.getValue().Data) { 2236 writeLocation(SI.Loc); 2237 OutFile << SI.Hits << "\n"; 2238 ++BranchValues; 2239 } 2240 } 2241 } else { 2242 for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) { 2243 for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) { 2244 writeLocation(BI.From); 2245 writeLocation(BI.To); 2246 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2247 ++BranchValues; 2248 } 2249 for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) { 2250 // Do not output if source is a known symbol, since this was already 2251 // accounted for in the source function 2252 if (BI.From.IsSymbol) 2253 continue; 2254 writeLocation(BI.From); 2255 writeLocation(BI.To); 2256 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2257 ++BranchValues; 2258 } 2259 } 2260 2261 WriteMemLocs = true; 2262 for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) { 2263 for (const MemInfo &MemEvent : Func.getValue().Data) { 2264 writeLocation(MemEvent.Offset); 2265 writeLocation(MemEvent.Addr); 2266 OutFile << MemEvent.Count << "\n"; 2267 ++MemValues; 2268 } 2269 } 2270 } 2271 2272 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues 2273 << " memory objects to " << OutputFilename << "\n"; 2274 2275 return std::error_code(); 2276 } 2277 2278 void DataAggregator::dump() const { DataReader::dump(); } 2279 2280 void DataAggregator::dump(const LBREntry &LBR) const { 2281 Diag << "From: " << Twine::utohexstr(LBR.From) 2282 << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred 2283 << "\n"; 2284 } 2285 2286 void DataAggregator::dump(const PerfBranchSample &Sample) const { 2287 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n"; 2288 for (const LBREntry &LBR : Sample.LBR) 2289 dump(LBR); 2290 } 2291 2292 void DataAggregator::dump(const PerfMemSample &Sample) const { 2293 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n"; 2294 } 2295