1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions reads profile data written by perf record, 10 // aggregate it and then write it back to an output file. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "bolt/Profile/DataAggregator.h" 15 #include "bolt/Core/BinaryContext.h" 16 #include "bolt/Core/BinaryFunction.h" 17 #include "bolt/Profile/BoltAddressTranslation.h" 18 #include "bolt/Profile/Heatmap.h" 19 #include "bolt/Utils/CommandLineOpts.h" 20 #include "bolt/Utils/Utils.h" 21 #include "llvm/ADT/ScopeExit.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/Errc.h" 25 #include "llvm/Support/FileSystem.h" 26 #include "llvm/Support/Process.h" 27 #include "llvm/Support/Program.h" 28 #include "llvm/Support/Regex.h" 29 #include "llvm/Support/Timer.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include <map> 32 #include <unordered_map> 33 #include <utility> 34 35 #define DEBUG_TYPE "aggregator" 36 37 using namespace llvm; 38 using namespace bolt; 39 40 namespace opts { 41 42 static cl::opt<bool> 43 BasicAggregation("nl", 44 cl::desc("aggregate basic samples (without LBR info)"), 45 cl::cat(AggregatorCategory)); 46 47 static cl::opt<bool> 48 FilterMemProfile("filter-mem-profile", 49 cl::desc("if processing a memory profile, filter out stack or heap accesses " 50 "that won't be useful for BOLT to reduce profile file size"), 51 cl::init(true), 52 cl::cat(AggregatorCategory)); 53 54 static cl::opt<unsigned long long> 55 FilterPID("pid", 56 cl::desc("only use samples from process with specified PID"), 57 cl::init(0), 58 cl::Optional, 59 cl::cat(AggregatorCategory)); 60 61 static cl::opt<bool> 62 IgnoreBuildID("ignore-build-id", 63 cl::desc("continue even if build-ids in input binary and perf.data mismatch"), 64 cl::init(false), 65 cl::cat(AggregatorCategory)); 66 67 static cl::opt<bool> IgnoreInterruptLBR( 68 "ignore-interrupt-lbr", 69 cl::desc("ignore kernel interrupt LBR that happens asynchronously"), 70 cl::init(true), cl::cat(AggregatorCategory)); 71 72 static cl::opt<unsigned long long> 73 MaxSamples("max-samples", 74 cl::init(-1ULL), 75 cl::desc("maximum number of samples to read from LBR profile"), 76 cl::Optional, 77 cl::Hidden, 78 cl::cat(AggregatorCategory)); 79 80 static cl::opt<bool> ReadPreAggregated( 81 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"), 82 cl::cat(AggregatorCategory)); 83 84 static cl::opt<bool> 85 TimeAggregator("time-aggr", 86 cl::desc("time BOLT aggregator"), 87 cl::init(false), 88 cl::ZeroOrMore, 89 cl::cat(AggregatorCategory)); 90 91 static cl::opt<bool> 92 UseEventPC("use-event-pc", 93 cl::desc("use event PC in combination with LBR sampling"), 94 cl::cat(AggregatorCategory)); 95 96 static cl::opt<bool> WriteAutoFDOData( 97 "autofdo", cl::desc("generate autofdo textual data instead of bolt data"), 98 cl::cat(AggregatorCategory)); 99 100 } // namespace opts 101 102 namespace { 103 104 const char TimerGroupName[] = "aggregator"; 105 const char TimerGroupDesc[] = "Aggregator"; 106 107 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) { 108 std::vector<SectionNameAndRange> sections; 109 for (BinarySection &Section : BC->sections()) { 110 if (!Section.isText()) 111 continue; 112 if (Section.getSize() == 0) 113 continue; 114 sections.push_back( 115 {Section.getName(), Section.getAddress(), Section.getEndAddress()}); 116 } 117 llvm::sort(sections, 118 [](const SectionNameAndRange &A, const SectionNameAndRange &B) { 119 return A.BeginAddress < B.BeginAddress; 120 }); 121 return sections; 122 } 123 } 124 125 constexpr uint64_t DataAggregator::KernelBaseAddr; 126 127 DataAggregator::~DataAggregator() { deleteTempFiles(); } 128 129 namespace { 130 void deleteTempFile(const std::string &FileName) { 131 if (std::error_code Errc = sys::fs::remove(FileName.c_str())) 132 errs() << "PERF2BOLT: failed to delete temporary file " << FileName 133 << " with error " << Errc.message() << "\n"; 134 } 135 } 136 137 void DataAggregator::deleteTempFiles() { 138 for (std::string &FileName : TempFiles) 139 deleteTempFile(FileName); 140 TempFiles.clear(); 141 } 142 143 void DataAggregator::findPerfExecutable() { 144 Optional<std::string> PerfExecutable = 145 sys::Process::FindInEnvPath("PATH", "perf"); 146 if (!PerfExecutable) { 147 outs() << "PERF2BOLT: No perf executable found!\n"; 148 exit(1); 149 } 150 PerfPath = *PerfExecutable; 151 } 152 153 void DataAggregator::start() { 154 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; 155 156 // Don't launch perf for pre-aggregated files 157 if (opts::ReadPreAggregated) 158 return; 159 160 findPerfExecutable(); 161 162 if (opts::BasicAggregation) 163 launchPerfProcess("events without LBR", 164 MainEventsPPI, 165 "script -F pid,event,ip", 166 /*Wait = */false); 167 else 168 launchPerfProcess("branch events", 169 MainEventsPPI, 170 "script -F pid,ip,brstack", 171 /*Wait = */false); 172 173 // Note: we launch script for mem events regardless of the option, as the 174 // command fails fairly fast if mem events were not collected. 175 launchPerfProcess("mem events", 176 MemEventsPPI, 177 "script -F pid,event,addr,ip", 178 /*Wait = */false); 179 180 launchPerfProcess("process events", 181 MMapEventsPPI, 182 "script --show-mmap-events", 183 /*Wait = */false); 184 185 launchPerfProcess("task events", 186 TaskEventsPPI, 187 "script --show-task-events", 188 /*Wait = */false); 189 } 190 191 void DataAggregator::abort() { 192 if (opts::ReadPreAggregated) 193 return; 194 195 std::string Error; 196 197 // Kill subprocesses in case they are not finished 198 sys::Wait(TaskEventsPPI.PI, 1, false, &Error); 199 sys::Wait(MMapEventsPPI.PI, 1, false, &Error); 200 sys::Wait(MainEventsPPI.PI, 1, false, &Error); 201 sys::Wait(MemEventsPPI.PI, 1, false, &Error); 202 203 deleteTempFiles(); 204 205 exit(1); 206 } 207 208 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, 209 const char *ArgsString, bool Wait) { 210 SmallVector<StringRef, 4> Argv; 211 212 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n'; 213 Argv.push_back(PerfPath.data()); 214 215 char *WritableArgsString = strdup(ArgsString); 216 char *Str = WritableArgsString; 217 do { 218 Argv.push_back(Str); 219 while (*Str && *Str != ' ') 220 ++Str; 221 if (!*Str) 222 break; 223 *Str++ = 0; 224 } while (true); 225 226 Argv.push_back("-f"); 227 Argv.push_back("-i"); 228 Argv.push_back(Filename.c_str()); 229 230 if (std::error_code Errc = 231 sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) { 232 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath 233 << " with error " << Errc.message() << "\n"; 234 exit(1); 235 } 236 TempFiles.push_back(PPI.StdoutPath.data()); 237 238 if (std::error_code Errc = 239 sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) { 240 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath 241 << " with error " << Errc.message() << "\n"; 242 exit(1); 243 } 244 TempFiles.push_back(PPI.StderrPath.data()); 245 246 Optional<StringRef> Redirects[] = { 247 llvm::None, // Stdin 248 StringRef(PPI.StdoutPath.data()), // Stdout 249 StringRef(PPI.StderrPath.data())}; // Stderr 250 251 LLVM_DEBUG({ 252 dbgs() << "Launching perf: "; 253 for (StringRef Arg : Argv) 254 dbgs() << Arg << " "; 255 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data() 256 << "\n"; 257 }); 258 259 if (Wait) 260 PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv, 261 /*envp*/ llvm::None, Redirects); 262 else 263 PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None, 264 Redirects); 265 266 free(WritableArgsString); 267 } 268 269 void DataAggregator::processFileBuildID(StringRef FileBuildID) { 270 PerfProcessInfo BuildIDProcessInfo; 271 launchPerfProcess("buildid list", 272 BuildIDProcessInfo, 273 "buildid-list", 274 /*Wait = */true); 275 276 if (BuildIDProcessInfo.PI.ReturnCode != 0) { 277 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 278 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data()); 279 StringRef ErrBuf = (*MB)->getBuffer(); 280 281 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode 282 << '\n'; 283 errs() << ErrBuf; 284 return; 285 } 286 287 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 288 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data()); 289 if (std::error_code EC = MB.getError()) { 290 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": " 291 << EC.message() << "\n"; 292 return; 293 } 294 295 FileBuf = std::move(*MB); 296 ParsingBuf = FileBuf->getBuffer(); 297 if (ParsingBuf.empty()) { 298 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf " 299 "data was recorded without it\n"; 300 return; 301 } 302 303 Col = 0; 304 Line = 1; 305 Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID); 306 if (!FileName) { 307 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. " 308 "This indicates the input binary supplied for data aggregation " 309 "is not the same recorded by perf when collecting profiling " 310 "data, or there were no samples recorded for the binary. " 311 "Use -ignore-build-id option to override.\n"; 312 if (!opts::IgnoreBuildID) 313 abort(); 314 } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) { 315 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n"; 316 BuildIDBinaryName = std::string(*FileName); 317 } else { 318 outs() << "PERF2BOLT: matched build-id and file name\n"; 319 } 320 321 return; 322 } 323 324 bool DataAggregator::checkPerfDataMagic(StringRef FileName) { 325 if (opts::ReadPreAggregated) 326 return true; 327 328 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName); 329 if (!FD) 330 return false; 331 332 char Buf[7] = {0, 0, 0, 0, 0, 0, 0}; 333 334 auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); }); 335 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice( 336 *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0); 337 if (!BytesRead || *BytesRead != 7) 338 return false; 339 340 if (strncmp(Buf, "PERFILE", 7) == 0) 341 return true; 342 return false; 343 } 344 345 void DataAggregator::parsePreAggregated() { 346 std::string Error; 347 348 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 349 MemoryBuffer::getFileOrSTDIN(Filename); 350 if (std::error_code EC = MB.getError()) { 351 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": " 352 << EC.message() << "\n"; 353 exit(1); 354 } 355 356 FileBuf = std::move(*MB); 357 ParsingBuf = FileBuf->getBuffer(); 358 Col = 0; 359 Line = 1; 360 if (parsePreAggregatedLBRSamples()) { 361 errs() << "PERF2BOLT: failed to parse samples\n"; 362 exit(1); 363 } 364 } 365 366 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) { 367 outs() << "PERF2BOLT: writing data for autofdo tools...\n"; 368 NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName, 369 TimerGroupDesc, opts::TimeAggregator); 370 371 std::error_code EC; 372 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 373 if (EC) 374 return EC; 375 376 // Format: 377 // number of unique traces 378 // from_1-to_1:count_1 379 // from_2-to_2:count_2 380 // ...... 381 // from_n-to_n:count_n 382 // number of unique sample addresses 383 // addr_1:count_1 384 // addr_2:count_2 385 // ...... 386 // addr_n:count_n 387 // number of unique LBR entries 388 // src_1->dst_1:count_1 389 // src_2->dst_2:count_2 390 // ...... 391 // src_n->dst_n:count_n 392 393 const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress; 394 395 // AutoFDO addresses are relative to the first allocated loadable program 396 // segment 397 auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t { 398 if (Address < FirstAllocAddress) 399 return 0; 400 return Address - FirstAllocAddress; 401 }; 402 403 OutFile << FallthroughLBRs.size() << "\n"; 404 for (const auto &AggrLBR : FallthroughLBRs) { 405 const Trace &Trace = AggrLBR.first; 406 const FTInfo &Info = AggrLBR.second; 407 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-" 408 << Twine::utohexstr(filterAddress(Trace.To)) << ":" 409 << (Info.InternCount + Info.ExternCount) << "\n"; 410 } 411 412 OutFile << BasicSamples.size() << "\n"; 413 for (const auto &Sample : BasicSamples) { 414 uint64_t PC = Sample.first; 415 uint64_t HitCount = Sample.second; 416 OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n"; 417 } 418 419 OutFile << BranchLBRs.size() << "\n"; 420 for (const auto &AggrLBR : BranchLBRs) { 421 const Trace &Trace = AggrLBR.first; 422 const BranchInfo &Info = AggrLBR.second; 423 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->" 424 << Twine::utohexstr(filterAddress(Trace.To)) << ":" 425 << Info.TakenCount << "\n"; 426 } 427 428 outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, " 429 << BasicSamples.size() << " sample addresses and " << BranchLBRs.size() 430 << " unique branches to " << OutputFilename << "\n"; 431 432 return std::error_code(); 433 } 434 435 void DataAggregator::filterBinaryMMapInfo() { 436 if (opts::FilterPID) { 437 auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID); 438 if (MMapInfoIter != BinaryMMapInfo.end()) { 439 MMapInfo MMap = MMapInfoIter->second; 440 BinaryMMapInfo.clear(); 441 BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap)); 442 } else { 443 if (errs().has_colors()) 444 errs().changeColor(raw_ostream::RED); 445 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \"" 446 << opts::FilterPID << "\"" 447 << " for binary \"" << BC->getFilename() << "\"."; 448 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary"); 449 errs() << " Profile for the following process is available:\n"; 450 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) 451 outs() << " " << MMI.second.PID 452 << (MMI.second.Forked ? " (forked)\n" : "\n"); 453 454 if (errs().has_colors()) 455 errs().resetColor(); 456 457 exit(1); 458 } 459 } 460 } 461 462 Error DataAggregator::preprocessProfile(BinaryContext &BC) { 463 this->BC = &BC; 464 465 if (opts::ReadPreAggregated) { 466 parsePreAggregated(); 467 return Error::success(); 468 } 469 470 if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) { 471 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; 472 processFileBuildID(*FileBuildID); 473 } else { 474 errs() << "BOLT-WARNING: build-id will not be checked because we could " 475 "not read one from input binary\n"; 476 } 477 478 auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) { 479 std::string Error; 480 outs() << "PERF2BOLT: waiting for perf " << Name 481 << " collection to finish...\n"; 482 sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error); 483 484 if (!Error.empty()) { 485 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n"; 486 deleteTempFiles(); 487 exit(1); 488 } 489 490 if (PI.ReturnCode != 0) { 491 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB = 492 MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data()); 493 StringRef ErrBuf = (*ErrorMB)->getBuffer(); 494 495 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n"; 496 errs() << ErrBuf; 497 deleteTempFiles(); 498 exit(1); 499 } 500 501 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 502 MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data()); 503 if (std::error_code EC = MB.getError()) { 504 errs() << "Cannot open " << Process.StdoutPath.data() << ": " 505 << EC.message() << "\n"; 506 deleteTempFiles(); 507 exit(1); 508 } 509 510 FileBuf = std::move(*MB); 511 ParsingBuf = FileBuf->getBuffer(); 512 Col = 0; 513 Line = 1; 514 }; 515 516 if (opts::LinuxKernelMode) { 517 // Current MMap parsing logic does not work with linux kernel. 518 // MMap entries for linux kernel uses PERF_RECORD_MMAP 519 // format instead of typical PERF_RECORD_MMAP2 format. 520 // Since linux kernel address mapping is absolute (same as 521 // in the ELF file), we avoid parsing MMap in linux kernel mode. 522 // While generating optimized linux kernel binary, we may need 523 // to parse MMap entries. 524 525 // In linux kernel mode, we analyze and optimize 526 // all linux kernel binary instructions, irrespective 527 // of whether they are due to system calls or due to 528 // interrupts. Therefore, we cannot ignore interrupt 529 // in Linux kernel mode. 530 opts::IgnoreInterruptLBR = false; 531 } else { 532 prepareToParse("mmap events", MMapEventsPPI); 533 if (parseMMapEvents()) 534 errs() << "PERF2BOLT: failed to parse mmap events\n"; 535 } 536 537 prepareToParse("task events", TaskEventsPPI); 538 if (parseTaskEvents()) 539 errs() << "PERF2BOLT: failed to parse task events\n"; 540 541 filterBinaryMMapInfo(); 542 prepareToParse("events", MainEventsPPI); 543 544 if (opts::HeatmapMode) { 545 if (std::error_code EC = printLBRHeatMap()) { 546 errs() << "ERROR: failed to print heat map: " << EC.message() << '\n'; 547 exit(1); 548 } 549 exit(0); 550 } 551 552 if ((!opts::BasicAggregation && parseBranchEvents()) || 553 (opts::BasicAggregation && parseBasicEvents())) 554 errs() << "PERF2BOLT: failed to parse samples\n"; 555 556 // We can finish early if the goal is just to generate data for autofdo 557 if (opts::WriteAutoFDOData) { 558 if (std::error_code EC = writeAutoFDOData(opts::OutputFilename)) 559 errs() << "Error writing autofdo data to file: " << EC.message() << "\n"; 560 561 deleteTempFiles(); 562 exit(0); 563 } 564 565 // Special handling for memory events 566 std::string Error; 567 sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error); 568 if (PI.ReturnCode != 0) { 569 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 570 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data()); 571 StringRef ErrBuf = (*MB)->getBuffer(); 572 573 deleteTempFiles(); 574 575 Regex NoData("Samples for '.*' event do not have ADDR attribute set. " 576 "Cannot print 'addr' field."); 577 if (!NoData.match(ErrBuf)) { 578 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n"; 579 errs() << ErrBuf; 580 exit(1); 581 } 582 return Error::success(); 583 } 584 585 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 586 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data()); 587 if (std::error_code EC = MB.getError()) { 588 errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": " 589 << EC.message() << "\n"; 590 deleteTempFiles(); 591 exit(1); 592 } 593 594 FileBuf = std::move(*MB); 595 ParsingBuf = FileBuf->getBuffer(); 596 Col = 0; 597 Line = 1; 598 if (const std::error_code EC = parseMemEvents()) 599 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message() 600 << '\n'; 601 602 deleteTempFiles(); 603 604 return Error::success(); 605 } 606 607 Error DataAggregator::readProfile(BinaryContext &BC) { 608 processProfile(BC); 609 610 for (auto &BFI : BC.getBinaryFunctions()) { 611 BinaryFunction &Function = BFI.second; 612 convertBranchData(Function); 613 } 614 615 if (opts::AggregateOnly) { 616 if (std::error_code EC = writeAggregatedFile(opts::OutputFilename)) 617 report_error("cannot create output data file", EC); 618 } 619 620 return Error::success(); 621 } 622 623 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { 624 return Function.hasProfileAvailable(); 625 } 626 627 void DataAggregator::processProfile(BinaryContext &BC) { 628 if (opts::ReadPreAggregated) 629 processPreAggregated(); 630 else if (opts::BasicAggregation) 631 processBasicEvents(); 632 else 633 processBranchEvents(); 634 635 processMemEvents(); 636 637 // Mark all functions with registered events as having a valid profile. 638 for (auto &BFI : BC.getBinaryFunctions()) { 639 BinaryFunction &BF = BFI.second; 640 if (getBranchData(BF)) { 641 const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE 642 : BinaryFunction::PF_LBR; 643 BF.markProfiled(Flags); 644 } 645 } 646 647 // Release intermediate storage. 648 clear(BranchLBRs); 649 clear(FallthroughLBRs); 650 clear(AggregatedLBRs); 651 clear(BasicSamples); 652 clear(MemSamples); 653 } 654 655 BinaryFunction * 656 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const { 657 if (!BC->containsAddress(Address)) 658 return nullptr; 659 660 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false, 661 /*UseMaxSize=*/true); 662 } 663 664 StringRef DataAggregator::getLocationName(BinaryFunction &Func, 665 uint64_t Count) { 666 if (!BAT) 667 return Func.getOneName(); 668 669 const BinaryFunction *OrigFunc = &Func; 670 if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) { 671 NumColdSamples += Count; 672 BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr); 673 if (HotFunc) 674 OrigFunc = HotFunc; 675 } 676 // If it is a local function, prefer the name containing the file name where 677 // the local function was declared 678 for (StringRef AlternativeName : OrigFunc->getNames()) { 679 size_t FileNameIdx = AlternativeName.find('/'); 680 // Confirm the alternative name has the pattern Symbol/FileName/1 before 681 // using it 682 if (FileNameIdx == StringRef::npos || 683 AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos) 684 continue; 685 return AlternativeName; 686 } 687 return OrigFunc->getOneName(); 688 } 689 690 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address, 691 uint64_t Count) { 692 auto I = NamesToSamples.find(Func.getOneName()); 693 if (I == NamesToSamples.end()) { 694 bool Success; 695 StringRef LocName = getLocationName(Func, Count); 696 std::tie(I, Success) = NamesToSamples.insert( 697 std::make_pair(Func.getOneName(), 698 FuncSampleData(LocName, FuncSampleData::ContainerTy()))); 699 } 700 701 Address -= Func.getAddress(); 702 if (BAT) 703 Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false); 704 705 I->second.bumpCount(Address, Count); 706 return true; 707 } 708 709 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From, 710 uint64_t To, uint64_t Count, 711 uint64_t Mispreds) { 712 FuncBranchData *AggrData = getBranchData(Func); 713 if (!AggrData) { 714 AggrData = &NamesToBranches[Func.getOneName()]; 715 AggrData->Name = getLocationName(Func, Count); 716 setBranchData(Func, AggrData); 717 } 718 719 From -= Func.getAddress(); 720 To -= Func.getAddress(); 721 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName() 722 << " @ " << Twine::utohexstr(From) << " -> " 723 << Func.getPrintName() << " @ " << Twine::utohexstr(To) 724 << '\n'); 725 if (BAT) { 726 From = BAT->translate(Func, From, /*IsBranchSrc=*/true); 727 To = BAT->translate(Func, To, /*IsBranchSrc=*/false); 728 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: " 729 << Func.getPrintName() << " @ " << Twine::utohexstr(From) 730 << " -> " << Func.getPrintName() << " @ " 731 << Twine::utohexstr(To) << '\n'); 732 } 733 734 AggrData->bumpBranchCount(From, To, Count, Mispreds); 735 return true; 736 } 737 738 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc, 739 BinaryFunction *ToFunc, uint64_t From, 740 uint64_t To, uint64_t Count, 741 uint64_t Mispreds) { 742 FuncBranchData *FromAggrData = nullptr; 743 FuncBranchData *ToAggrData = nullptr; 744 StringRef SrcFunc; 745 StringRef DstFunc; 746 if (FromFunc) { 747 SrcFunc = getLocationName(*FromFunc, Count); 748 FromAggrData = getBranchData(*FromFunc); 749 if (!FromAggrData) { 750 FromAggrData = &NamesToBranches[FromFunc->getOneName()]; 751 FromAggrData->Name = SrcFunc; 752 setBranchData(*FromFunc, FromAggrData); 753 } 754 From -= FromFunc->getAddress(); 755 if (BAT) 756 From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true); 757 758 recordExit(*FromFunc, From, Mispreds, Count); 759 } 760 if (ToFunc) { 761 DstFunc = getLocationName(*ToFunc, 0); 762 ToAggrData = getBranchData(*ToFunc); 763 if (!ToAggrData) { 764 ToAggrData = &NamesToBranches[ToFunc->getOneName()]; 765 ToAggrData->Name = DstFunc; 766 setBranchData(*ToFunc, ToAggrData); 767 } 768 To -= ToFunc->getAddress(); 769 if (BAT) 770 To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false); 771 772 recordEntry(*ToFunc, To, Mispreds, Count); 773 } 774 775 if (FromAggrData) 776 FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To), 777 Count, Mispreds); 778 if (ToAggrData) 779 ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To, 780 Count, Mispreds); 781 return true; 782 } 783 784 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, 785 uint64_t Mispreds) { 786 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From); 787 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To); 788 if (!FromFunc && !ToFunc) 789 return false; 790 791 if (FromFunc == ToFunc) { 792 recordBranch(*FromFunc, From - FromFunc->getAddress(), 793 To - FromFunc->getAddress(), Count, Mispreds); 794 return doIntraBranch(*FromFunc, From, To, Count, Mispreds); 795 } 796 797 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds); 798 } 799 800 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, 801 uint64_t Count) { 802 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To); 803 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From); 804 if (!FromFunc || !ToFunc) { 805 LLVM_DEBUG( 806 dbgs() << "Out of range trace starting in " << FromFunc->getPrintName() 807 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 808 << " and ending in " << ToFunc->getPrintName() << " @ " 809 << ToFunc->getPrintName() << " @ " 810 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 811 NumLongRangeTraces += Count; 812 return false; 813 } 814 if (FromFunc != ToFunc) { 815 NumInvalidTraces += Count; 816 LLVM_DEBUG( 817 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 818 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 819 << " and ending in " << ToFunc->getPrintName() << " @ " 820 << ToFunc->getPrintName() << " @ " 821 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 822 return false; 823 } 824 825 Optional<BoltAddressTranslation::FallthroughListTy> FTs = 826 BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From) 827 : getFallthroughsInTrace(*FromFunc, First, Second, Count); 828 if (!FTs) { 829 LLVM_DEBUG( 830 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 831 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 832 << " and ending in " << ToFunc->getPrintName() << " @ " 833 << ToFunc->getPrintName() << " @ " 834 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 835 NumInvalidTraces += Count; 836 return false; 837 } 838 839 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for " 840 << FromFunc->getPrintName() << ":" 841 << Twine::utohexstr(First.To) << " to " 842 << Twine::utohexstr(Second.From) << ".\n"); 843 for (const std::pair<uint64_t, uint64_t> &Pair : *FTs) 844 doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(), 845 Pair.second + FromFunc->getAddress(), Count, false); 846 847 return true; 848 } 849 850 bool DataAggregator::recordTrace( 851 BinaryFunction &BF, 852 const LBREntry &FirstLBR, 853 const LBREntry &SecondLBR, 854 uint64_t Count, 855 SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const { 856 BinaryContext &BC = BF.getBinaryContext(); 857 858 if (!BF.isSimple()) 859 return false; 860 861 assert(BF.hasCFG() && "can only record traces in CFG state"); 862 863 // Offsets of the trace within this function. 864 const uint64_t From = FirstLBR.To - BF.getAddress(); 865 const uint64_t To = SecondLBR.From - BF.getAddress(); 866 867 if (From > To) 868 return false; 869 870 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); 871 BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); 872 873 if (!FromBB || !ToBB) 874 return false; 875 876 // Adjust FromBB if the first LBR is a return from the last instruction in 877 // the previous block (that instruction should be a call). 878 if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && 879 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { 880 BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1]; 881 if (PrevBB->getSuccessor(FromBB->getLabel())) { 882 const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); 883 if (Instr && BC.MIB->isCall(*Instr)) 884 FromBB = PrevBB; 885 else 886 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR 887 << '\n'); 888 } else { 889 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n'); 890 } 891 } 892 893 // Fill out information for fall-through edges. The From and To could be 894 // within the same basic block, e.g. when two call instructions are in the 895 // same block. In this case we skip the processing. 896 if (FromBB == ToBB) 897 return true; 898 899 // Process blocks in the original layout order. 900 BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()]; 901 assert(BB == FromBB && "index mismatch"); 902 while (BB != ToBB) { 903 BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1]; 904 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout"); 905 906 // Check for bad LBRs. 907 if (!BB->getSuccessor(NextBB->getLabel())) { 908 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n" 909 << " " << FirstLBR << '\n' 910 << " " << SecondLBR << '\n'); 911 return false; 912 } 913 914 // Record fall-through jumps 915 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB); 916 BI.Count += Count; 917 918 if (Branches) { 919 const MCInst *Instr = BB->getLastNonPseudoInstr(); 920 uint64_t Offset = 0; 921 if (Instr) 922 Offset = BC.MIB->getOffsetWithDefault(*Instr, 0); 923 else 924 Offset = BB->getOffset(); 925 926 Branches->emplace_back(Offset, NextBB->getOffset()); 927 } 928 929 BB = NextBB; 930 } 931 932 return true; 933 } 934 935 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>> 936 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, 937 const LBREntry &FirstLBR, 938 const LBREntry &SecondLBR, 939 uint64_t Count) const { 940 SmallVector<std::pair<uint64_t, uint64_t>, 16> Res; 941 942 if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res)) 943 return NoneType(); 944 945 return Res; 946 } 947 948 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred, 949 uint64_t Count) const { 950 if (To > BF.getSize()) 951 return false; 952 953 if (!BF.hasProfile()) 954 BF.ExecutionCount = 0; 955 956 BinaryBasicBlock *EntryBB = nullptr; 957 if (To == 0) { 958 BF.ExecutionCount += Count; 959 if (!BF.empty()) 960 EntryBB = &BF.front(); 961 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) { 962 if (BB->isEntryPoint()) 963 EntryBB = BB; 964 } 965 966 if (EntryBB) 967 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count); 968 969 return true; 970 } 971 972 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred, 973 uint64_t Count) const { 974 if (!BF.isSimple() || From > BF.getSize()) 975 return false; 976 977 if (!BF.hasProfile()) 978 BF.ExecutionCount = 0; 979 980 return true; 981 } 982 983 ErrorOr<LBREntry> DataAggregator::parseLBREntry() { 984 LBREntry Res; 985 ErrorOr<StringRef> FromStrRes = parseString('/'); 986 if (std::error_code EC = FromStrRes.getError()) 987 return EC; 988 StringRef OffsetStr = FromStrRes.get(); 989 if (OffsetStr.getAsInteger(0, Res.From)) { 990 reportError("expected hexadecimal number with From address"); 991 Diag << "Found: " << OffsetStr << "\n"; 992 return make_error_code(llvm::errc::io_error); 993 } 994 995 ErrorOr<StringRef> ToStrRes = parseString('/'); 996 if (std::error_code EC = ToStrRes.getError()) 997 return EC; 998 OffsetStr = ToStrRes.get(); 999 if (OffsetStr.getAsInteger(0, Res.To)) { 1000 reportError("expected hexadecimal number with To address"); 1001 Diag << "Found: " << OffsetStr << "\n"; 1002 return make_error_code(llvm::errc::io_error); 1003 } 1004 1005 ErrorOr<StringRef> MispredStrRes = parseString('/'); 1006 if (std::error_code EC = MispredStrRes.getError()) 1007 return EC; 1008 StringRef MispredStr = MispredStrRes.get(); 1009 if (MispredStr.size() != 1 || 1010 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) { 1011 reportError("expected single char for mispred bit"); 1012 Diag << "Found: " << MispredStr << "\n"; 1013 return make_error_code(llvm::errc::io_error); 1014 } 1015 Res.Mispred = MispredStr[0] == 'M'; 1016 1017 static bool MispredWarning = true; 1018 if (MispredStr[0] == '-' && MispredWarning) { 1019 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n"; 1020 MispredWarning = false; 1021 } 1022 1023 ErrorOr<StringRef> Rest = parseString(FieldSeparator, true); 1024 if (std::error_code EC = Rest.getError()) 1025 return EC; 1026 if (Rest.get().size() < 5) { 1027 reportError("expected rest of LBR entry"); 1028 Diag << "Found: " << Rest.get() << "\n"; 1029 return make_error_code(llvm::errc::io_error); 1030 } 1031 return Res; 1032 } 1033 1034 bool DataAggregator::checkAndConsumeFS() { 1035 if (ParsingBuf[0] != FieldSeparator) 1036 return false; 1037 1038 ParsingBuf = ParsingBuf.drop_front(1); 1039 Col += 1; 1040 return true; 1041 } 1042 1043 void DataAggregator::consumeRestOfLine() { 1044 size_t LineEnd = ParsingBuf.find_first_of('\n'); 1045 if (LineEnd == StringRef::npos) { 1046 ParsingBuf = StringRef(); 1047 Col = 0; 1048 Line += 1; 1049 return; 1050 } 1051 ParsingBuf = ParsingBuf.drop_front(LineEnd + 1); 1052 Col = 0; 1053 Line += 1; 1054 } 1055 1056 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() { 1057 PerfBranchSample Res; 1058 1059 while (checkAndConsumeFS()) { 1060 } 1061 1062 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1063 if (std::error_code EC = PIDRes.getError()) 1064 return EC; 1065 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1066 if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) { 1067 consumeRestOfLine(); 1068 return make_error_code(errc::no_such_process); 1069 } 1070 1071 while (checkAndConsumeFS()) { 1072 } 1073 1074 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true); 1075 if (std::error_code EC = PCRes.getError()) 1076 return EC; 1077 Res.PC = PCRes.get(); 1078 1079 if (checkAndConsumeNewLine()) 1080 return Res; 1081 1082 while (!checkAndConsumeNewLine()) { 1083 checkAndConsumeFS(); 1084 1085 ErrorOr<LBREntry> LBRRes = parseLBREntry(); 1086 if (std::error_code EC = LBRRes.getError()) 1087 return EC; 1088 LBREntry LBR = LBRRes.get(); 1089 if (ignoreKernelInterrupt(LBR)) 1090 continue; 1091 if (!BC->HasFixedLoadAddress) 1092 adjustLBR(LBR, MMapInfoIter->second); 1093 Res.LBR.push_back(LBR); 1094 } 1095 1096 return Res; 1097 } 1098 1099 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() { 1100 while (checkAndConsumeFS()) { 1101 } 1102 1103 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1104 if (std::error_code EC = PIDRes.getError()) 1105 return EC; 1106 1107 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1108 if (MMapInfoIter == BinaryMMapInfo.end()) { 1109 consumeRestOfLine(); 1110 return PerfBasicSample{StringRef(), 0}; 1111 } 1112 1113 while (checkAndConsumeFS()) { 1114 } 1115 1116 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1117 if (std::error_code EC = Event.getError()) 1118 return EC; 1119 1120 while (checkAndConsumeFS()) { 1121 } 1122 1123 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true); 1124 if (std::error_code EC = AddrRes.getError()) 1125 return EC; 1126 1127 if (!checkAndConsumeNewLine()) { 1128 reportError("expected end of line"); 1129 return make_error_code(llvm::errc::io_error); 1130 } 1131 1132 uint64_t Address = *AddrRes; 1133 if (!BC->HasFixedLoadAddress) 1134 adjustAddress(Address, MMapInfoIter->second); 1135 1136 return PerfBasicSample{Event.get(), Address}; 1137 } 1138 1139 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() { 1140 PerfMemSample Res{0, 0}; 1141 1142 while (checkAndConsumeFS()) { 1143 } 1144 1145 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1146 if (std::error_code EC = PIDRes.getError()) 1147 return EC; 1148 1149 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1150 if (MMapInfoIter == BinaryMMapInfo.end()) { 1151 consumeRestOfLine(); 1152 return Res; 1153 } 1154 1155 while (checkAndConsumeFS()) { 1156 } 1157 1158 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1159 if (std::error_code EC = Event.getError()) 1160 return EC; 1161 if (Event.get().find("mem-loads") == StringRef::npos) { 1162 consumeRestOfLine(); 1163 return Res; 1164 } 1165 1166 while (checkAndConsumeFS()) { 1167 } 1168 1169 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator); 1170 if (std::error_code EC = AddrRes.getError()) 1171 return EC; 1172 1173 while (checkAndConsumeFS()) { 1174 } 1175 1176 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true); 1177 if (std::error_code EC = PCRes.getError()) { 1178 consumeRestOfLine(); 1179 return EC; 1180 } 1181 1182 if (!checkAndConsumeNewLine()) { 1183 reportError("expected end of line"); 1184 return make_error_code(llvm::errc::io_error); 1185 } 1186 1187 uint64_t Address = *AddrRes; 1188 if (!BC->HasFixedLoadAddress) 1189 adjustAddress(Address, MMapInfoIter->second); 1190 1191 return PerfMemSample{PCRes.get(), Address}; 1192 } 1193 1194 ErrorOr<Location> DataAggregator::parseLocationOrOffset() { 1195 auto parseOffset = [this]() -> ErrorOr<Location> { 1196 ErrorOr<uint64_t> Res = parseHexField(FieldSeparator); 1197 if (std::error_code EC = Res.getError()) 1198 return EC; 1199 return Location(Res.get()); 1200 }; 1201 1202 size_t Sep = ParsingBuf.find_first_of(" \n"); 1203 if (Sep == StringRef::npos) 1204 return parseOffset(); 1205 StringRef LookAhead = ParsingBuf.substr(0, Sep); 1206 if (LookAhead.find_first_of(":") == StringRef::npos) 1207 return parseOffset(); 1208 1209 ErrorOr<StringRef> BuildID = parseString(':'); 1210 if (std::error_code EC = BuildID.getError()) 1211 return EC; 1212 ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator); 1213 if (std::error_code EC = Offset.getError()) 1214 return EC; 1215 return Location(true, BuildID.get(), Offset.get()); 1216 } 1217 1218 ErrorOr<DataAggregator::AggregatedLBREntry> 1219 DataAggregator::parseAggregatedLBREntry() { 1220 while (checkAndConsumeFS()) { 1221 } 1222 1223 ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator); 1224 if (std::error_code EC = TypeOrErr.getError()) 1225 return EC; 1226 auto Type = AggregatedLBREntry::BRANCH; 1227 if (TypeOrErr.get() == "B") { 1228 Type = AggregatedLBREntry::BRANCH; 1229 } else if (TypeOrErr.get() == "F") { 1230 Type = AggregatedLBREntry::FT; 1231 } else if (TypeOrErr.get() == "f") { 1232 Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN; 1233 } else { 1234 reportError("expected B, F or f"); 1235 return make_error_code(llvm::errc::io_error); 1236 } 1237 1238 while (checkAndConsumeFS()) { 1239 } 1240 ErrorOr<Location> From = parseLocationOrOffset(); 1241 if (std::error_code EC = From.getError()) 1242 return EC; 1243 1244 while (checkAndConsumeFS()) { 1245 } 1246 ErrorOr<Location> To = parseLocationOrOffset(); 1247 if (std::error_code EC = To.getError()) 1248 return EC; 1249 1250 while (checkAndConsumeFS()) { 1251 } 1252 ErrorOr<int64_t> Frequency = 1253 parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH); 1254 if (std::error_code EC = Frequency.getError()) 1255 return EC; 1256 1257 uint64_t Mispreds = 0; 1258 if (Type == AggregatedLBREntry::BRANCH) { 1259 while (checkAndConsumeFS()) { 1260 } 1261 ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true); 1262 if (std::error_code EC = MispredsOrErr.getError()) 1263 return EC; 1264 Mispreds = static_cast<uint64_t>(MispredsOrErr.get()); 1265 } 1266 1267 if (!checkAndConsumeNewLine()) { 1268 reportError("expected end of line"); 1269 return make_error_code(llvm::errc::io_error); 1270 } 1271 1272 return AggregatedLBREntry{From.get(), To.get(), 1273 static_cast<uint64_t>(Frequency.get()), Mispreds, 1274 Type}; 1275 } 1276 1277 bool DataAggregator::hasData() { 1278 if (ParsingBuf.size() == 0) 1279 return false; 1280 1281 return true; 1282 } 1283 1284 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const { 1285 return opts::IgnoreInterruptLBR && 1286 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr); 1287 } 1288 1289 std::error_code DataAggregator::printLBRHeatMap() { 1290 outs() << "PERF2BOLT: parse branch events...\n"; 1291 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1292 TimerGroupDesc, opts::TimeAggregator); 1293 1294 if (opts::LinuxKernelMode) { 1295 opts::HeatmapMaxAddress = 0xffffffffffffffff; 1296 opts::HeatmapMinAddress = KernelBaseAddr; 1297 } 1298 Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress, 1299 opts::HeatmapMaxAddress, getTextSections(BC)); 1300 uint64_t NumTotalSamples = 0; 1301 1302 if (opts::BasicAggregation) { 1303 while (hasData()) { 1304 ErrorOr<PerfBasicSample> SampleRes = parseBasicSample(); 1305 if (std::error_code EC = SampleRes.getError()) { 1306 if (EC == errc::no_such_process) 1307 continue; 1308 return EC; 1309 } 1310 PerfBasicSample &Sample = SampleRes.get(); 1311 HM.registerAddress(Sample.PC); 1312 NumTotalSamples++; 1313 } 1314 outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n"; 1315 } else { 1316 while (hasData()) { 1317 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1318 if (std::error_code EC = SampleRes.getError()) { 1319 if (EC == errc::no_such_process) 1320 continue; 1321 return EC; 1322 } 1323 1324 PerfBranchSample &Sample = SampleRes.get(); 1325 1326 // LBRs are stored in reverse execution order. NextLBR refers to the next 1327 // executed branch record. 1328 const LBREntry *NextLBR = nullptr; 1329 for (const LBREntry &LBR : Sample.LBR) { 1330 if (NextLBR) { 1331 // Record fall-through trace. 1332 const uint64_t TraceFrom = LBR.To; 1333 const uint64_t TraceTo = NextLBR->From; 1334 ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount; 1335 } 1336 NextLBR = &LBR; 1337 } 1338 if (!Sample.LBR.empty()) { 1339 HM.registerAddress(Sample.LBR.front().To); 1340 HM.registerAddress(Sample.LBR.back().From); 1341 } 1342 NumTotalSamples += Sample.LBR.size(); 1343 } 1344 outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n"; 1345 outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n"; 1346 } 1347 1348 if (!NumTotalSamples) { 1349 if (opts::BasicAggregation) { 1350 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. " 1351 "Cannot build heatmap."; 1352 } else { 1353 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. " 1354 "Cannot build heatmap. Use -nl for building heatmap from " 1355 "basic events.\n"; 1356 } 1357 exit(1); 1358 } 1359 1360 outs() << "HEATMAP: building heat map...\n"; 1361 1362 for (const auto &LBR : FallthroughLBRs) { 1363 const Trace &Trace = LBR.first; 1364 const FTInfo &Info = LBR.second; 1365 HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount); 1366 } 1367 1368 if (HM.getNumInvalidRanges()) 1369 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n'; 1370 1371 if (!HM.size()) { 1372 errs() << "HEATMAP-ERROR: no valid traces registered\n"; 1373 exit(1); 1374 } 1375 1376 HM.print(opts::OutputFilename); 1377 if (opts::OutputFilename == "-") 1378 HM.printCDF(opts::OutputFilename); 1379 else 1380 HM.printCDF(opts::OutputFilename + ".csv"); 1381 if (opts::OutputFilename == "-") 1382 HM.printSectionHotness(opts::OutputFilename); 1383 else 1384 HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv"); 1385 1386 return std::error_code(); 1387 } 1388 1389 std::error_code DataAggregator::parseBranchEvents() { 1390 outs() << "PERF2BOLT: parse branch events...\n"; 1391 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1392 TimerGroupDesc, opts::TimeAggregator); 1393 1394 uint64_t NumTotalSamples = 0; 1395 uint64_t NumEntries = 0; 1396 uint64_t NumSamples = 0; 1397 uint64_t NumSamplesNoLBR = 0; 1398 uint64_t NumTraces = 0; 1399 bool NeedsSkylakeFix = false; 1400 1401 while (hasData() && NumTotalSamples < opts::MaxSamples) { 1402 ++NumTotalSamples; 1403 1404 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1405 if (std::error_code EC = SampleRes.getError()) { 1406 if (EC == errc::no_such_process) 1407 continue; 1408 return EC; 1409 } 1410 ++NumSamples; 1411 1412 PerfBranchSample &Sample = SampleRes.get(); 1413 if (opts::WriteAutoFDOData) 1414 ++BasicSamples[Sample.PC]; 1415 1416 if (Sample.LBR.empty()) { 1417 ++NumSamplesNoLBR; 1418 continue; 1419 } 1420 1421 NumEntries += Sample.LBR.size(); 1422 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) { 1423 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n"; 1424 NeedsSkylakeFix = true; 1425 } 1426 1427 // LBRs are stored in reverse execution order. NextPC refers to the next 1428 // recorded executed PC. 1429 uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0; 1430 uint32_t NumEntry = 0; 1431 for (const LBREntry &LBR : Sample.LBR) { 1432 ++NumEntry; 1433 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries) 1434 // sometimes record entry 32 as an exact copy of entry 31. This will cause 1435 // us to likely record an invalid trace and generate a stale function for 1436 // BAT mode (non BAT disassembles the function and is able to ignore this 1437 // trace at aggregation time). Drop first 2 entries (last two, in 1438 // chronological order) 1439 if (NeedsSkylakeFix && NumEntry <= 2) 1440 continue; 1441 if (NextPC) { 1442 // Record fall-through trace. 1443 const uint64_t TraceFrom = LBR.To; 1444 const uint64_t TraceTo = NextPC; 1445 const BinaryFunction *TraceBF = 1446 getBinaryFunctionContainingAddress(TraceFrom); 1447 if (TraceBF && TraceBF->containsAddress(TraceTo)) { 1448 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; 1449 if (TraceBF->containsAddress(LBR.From)) 1450 ++Info.InternCount; 1451 else 1452 ++Info.ExternCount; 1453 } else { 1454 if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) { 1455 LLVM_DEBUG(dbgs() 1456 << "Invalid trace starting in " 1457 << TraceBF->getPrintName() << " @ " 1458 << Twine::utohexstr(TraceFrom - TraceBF->getAddress()) 1459 << " and ending @ " << Twine::utohexstr(TraceTo) 1460 << '\n'); 1461 ++NumInvalidTraces; 1462 } else { 1463 LLVM_DEBUG(dbgs() 1464 << "Out of range trace starting in " 1465 << (TraceBF ? TraceBF->getPrintName() : "None") << " @ " 1466 << Twine::utohexstr( 1467 TraceFrom - (TraceBF ? TraceBF->getAddress() : 0)) 1468 << " and ending in " 1469 << (getBinaryFunctionContainingAddress(TraceTo) 1470 ? getBinaryFunctionContainingAddress(TraceTo) 1471 ->getPrintName() 1472 : "None") 1473 << " @ " 1474 << Twine::utohexstr( 1475 TraceTo - 1476 (getBinaryFunctionContainingAddress(TraceTo) 1477 ? getBinaryFunctionContainingAddress(TraceTo) 1478 ->getAddress() 1479 : 0)) 1480 << '\n'); 1481 ++NumLongRangeTraces; 1482 } 1483 } 1484 ++NumTraces; 1485 } 1486 NextPC = LBR.From; 1487 1488 uint64_t From = LBR.From; 1489 if (!getBinaryFunctionContainingAddress(From)) 1490 From = 0; 1491 uint64_t To = LBR.To; 1492 if (!getBinaryFunctionContainingAddress(To)) 1493 To = 0; 1494 if (!From && !To) 1495 continue; 1496 BranchInfo &Info = BranchLBRs[Trace(From, To)]; 1497 ++Info.TakenCount; 1498 Info.MispredCount += LBR.Mispred; 1499 } 1500 } 1501 1502 for (const auto &LBR : BranchLBRs) { 1503 const Trace &Trace = LBR.first; 1504 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From)) 1505 BF->setHasProfileAvailable(); 1506 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To)) 1507 BF->setHasProfileAvailable(); 1508 } 1509 1510 auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) { 1511 OS << " ("; 1512 if (OS.has_colors()) { 1513 if (Percent > T2) 1514 OS.changeColor(raw_ostream::RED); 1515 else if (Percent > T1) 1516 OS.changeColor(raw_ostream::YELLOW); 1517 else 1518 OS.changeColor(raw_ostream::GREEN); 1519 } 1520 OS << format("%.1f%%", Percent); 1521 if (OS.has_colors()) 1522 OS.resetColor(); 1523 OS << ")"; 1524 }; 1525 1526 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries 1527 << " LBR entries\n"; 1528 if (NumTotalSamples) { 1529 if (NumSamples && NumSamplesNoLBR == NumSamples) { 1530 // Note: we don't know if perf2bolt is being used to parse memory samples 1531 // at this point. In this case, it is OK to parse zero LBRs. 1532 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack " 1533 "LBR. Record profile with perf record -j any or run perf2bolt " 1534 "in no-LBR mode with -nl (the performance improvement in -nl " 1535 "mode may be limited)\n"; 1536 } else { 1537 const uint64_t IgnoredSamples = NumTotalSamples - NumSamples; 1538 const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples; 1539 outs() << "PERF2BOLT: " << IgnoredSamples << " samples"; 1540 printColored(outs(), PercentIgnored, 20, 50); 1541 outs() << " were ignored\n"; 1542 if (PercentIgnored > 50.0f) 1543 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples " 1544 "were attributed to the input binary\n"; 1545 } 1546 } 1547 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1548 << NumInvalidTraces; 1549 float Perc = 0.0f; 1550 if (NumTraces > 0) { 1551 Perc = NumInvalidTraces * 100.0f / NumTraces; 1552 printColored(outs(), Perc, 5, 10); 1553 } 1554 outs() << "\n"; 1555 if (Perc > 10.0f) 1556 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1557 "binary is probably not the same binary used during profiling " 1558 "collection. The generated data may be ineffective for improving " 1559 "performance.\n\n"; 1560 1561 outs() << "PERF2BOLT: out of range traces involving unknown regions: " 1562 << NumLongRangeTraces; 1563 if (NumTraces > 0) 1564 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1565 outs() << "\n"; 1566 1567 if (NumColdSamples > 0) { 1568 const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples; 1569 outs() << "PERF2BOLT: " << NumColdSamples 1570 << format(" (%.1f%%)", ColdSamples) 1571 << " samples recorded in cold regions of split functions.\n"; 1572 if (ColdSamples > 5.0f) 1573 outs() 1574 << "WARNING: The BOLT-processed binary where samples were collected " 1575 "likely used bad data or your service observed a large shift in " 1576 "profile. You may want to audit this.\n"; 1577 } 1578 1579 return std::error_code(); 1580 } 1581 1582 void DataAggregator::processBranchEvents() { 1583 outs() << "PERF2BOLT: processing branch events...\n"; 1584 NamedRegionTimer T("processBranch", "Processing branch events", 1585 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1586 1587 for (const auto &AggrLBR : FallthroughLBRs) { 1588 const Trace &Loc = AggrLBR.first; 1589 const FTInfo &Info = AggrLBR.second; 1590 LBREntry First{Loc.From, Loc.From, false}; 1591 LBREntry Second{Loc.To, Loc.To, false}; 1592 if (Info.InternCount) 1593 doTrace(First, Second, Info.InternCount); 1594 if (Info.ExternCount) { 1595 First.From = 0; 1596 doTrace(First, Second, Info.ExternCount); 1597 } 1598 } 1599 1600 for (const auto &AggrLBR : BranchLBRs) { 1601 const Trace &Loc = AggrLBR.first; 1602 const BranchInfo &Info = AggrLBR.second; 1603 doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount); 1604 } 1605 } 1606 1607 std::error_code DataAggregator::parseBasicEvents() { 1608 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n"; 1609 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName, 1610 TimerGroupDesc, opts::TimeAggregator); 1611 while (hasData()) { 1612 ErrorOr<PerfBasicSample> Sample = parseBasicSample(); 1613 if (std::error_code EC = Sample.getError()) 1614 return EC; 1615 1616 if (!Sample->PC) 1617 continue; 1618 1619 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1620 BF->setHasProfileAvailable(); 1621 1622 ++BasicSamples[Sample->PC]; 1623 EventNames.insert(Sample->EventName); 1624 } 1625 1626 return std::error_code(); 1627 } 1628 1629 void DataAggregator::processBasicEvents() { 1630 outs() << "PERF2BOLT: processing basic events (without LBR)...\n"; 1631 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName, 1632 TimerGroupDesc, opts::TimeAggregator); 1633 uint64_t OutOfRangeSamples = 0; 1634 uint64_t NumSamples = 0; 1635 for (auto &Sample : BasicSamples) { 1636 const uint64_t PC = Sample.first; 1637 const uint64_t HitCount = Sample.second; 1638 NumSamples += HitCount; 1639 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1640 if (!Func) { 1641 OutOfRangeSamples += HitCount; 1642 continue; 1643 } 1644 1645 doSample(*Func, PC, HitCount); 1646 } 1647 outs() << "PERF2BOLT: read " << NumSamples << " samples\n"; 1648 1649 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: " 1650 << OutOfRangeSamples; 1651 float Perc = 0.0f; 1652 if (NumSamples > 0) { 1653 outs() << " ("; 1654 Perc = OutOfRangeSamples * 100.0f / NumSamples; 1655 if (outs().has_colors()) { 1656 if (Perc > 60.0f) 1657 outs().changeColor(raw_ostream::RED); 1658 else if (Perc > 40.0f) 1659 outs().changeColor(raw_ostream::YELLOW); 1660 else 1661 outs().changeColor(raw_ostream::GREEN); 1662 } 1663 outs() << format("%.1f%%", Perc); 1664 if (outs().has_colors()) 1665 outs().resetColor(); 1666 outs() << ")"; 1667 } 1668 outs() << "\n"; 1669 if (Perc > 80.0f) 1670 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1671 "binary is probably not the same binary used during profiling " 1672 "collection. The generated data may be ineffective for improving " 1673 "performance.\n\n"; 1674 } 1675 1676 std::error_code DataAggregator::parseMemEvents() { 1677 outs() << "PERF2BOLT: parsing memory events...\n"; 1678 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName, 1679 TimerGroupDesc, opts::TimeAggregator); 1680 while (hasData()) { 1681 ErrorOr<PerfMemSample> Sample = parseMemSample(); 1682 if (std::error_code EC = Sample.getError()) 1683 return EC; 1684 1685 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1686 BF->setHasProfileAvailable(); 1687 1688 MemSamples.emplace_back(std::move(Sample.get())); 1689 } 1690 1691 return std::error_code(); 1692 } 1693 1694 void DataAggregator::processMemEvents() { 1695 NamedRegionTimer T("ProcessMemEvents", "Processing mem events", 1696 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1697 for (const PerfMemSample &Sample : MemSamples) { 1698 uint64_t PC = Sample.PC; 1699 uint64_t Addr = Sample.Addr; 1700 StringRef FuncName; 1701 StringRef MemName; 1702 1703 // Try to resolve symbol for PC 1704 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1705 if (!Func) { 1706 LLVM_DEBUG(if (PC != 0) { 1707 dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x" 1708 << Twine::utohexstr(Addr) << "\n"; 1709 }); 1710 continue; 1711 } 1712 1713 FuncName = Func->getOneName(); 1714 PC -= Func->getAddress(); 1715 1716 // Try to resolve symbol for memory load 1717 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) { 1718 MemName = BD->getName(); 1719 Addr -= BD->getAddress(); 1720 } else if (opts::FilterMemProfile) { 1721 // Filter out heap/stack accesses 1722 continue; 1723 } 1724 1725 const Location FuncLoc(!FuncName.empty(), FuncName, PC); 1726 const Location AddrLoc(!MemName.empty(), MemName, Addr); 1727 1728 FuncMemData *MemData = &NamesToMemEvents[FuncName]; 1729 setMemData(*Func, MemData); 1730 MemData->update(FuncLoc, AddrLoc); 1731 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n"); 1732 } 1733 } 1734 1735 std::error_code DataAggregator::parsePreAggregatedLBRSamples() { 1736 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; 1737 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", 1738 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1739 while (hasData()) { 1740 ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry(); 1741 if (std::error_code EC = AggrEntry.getError()) 1742 return EC; 1743 1744 if (BinaryFunction *BF = 1745 getBinaryFunctionContainingAddress(AggrEntry->From.Offset)) 1746 BF->setHasProfileAvailable(); 1747 if (BinaryFunction *BF = 1748 getBinaryFunctionContainingAddress(AggrEntry->To.Offset)) 1749 BF->setHasProfileAvailable(); 1750 1751 AggregatedLBRs.emplace_back(std::move(AggrEntry.get())); 1752 } 1753 1754 return std::error_code(); 1755 } 1756 1757 void DataAggregator::processPreAggregated() { 1758 outs() << "PERF2BOLT: processing pre-aggregated profile...\n"; 1759 NamedRegionTimer T("processAggregated", "Processing aggregated branch events", 1760 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1761 1762 uint64_t NumTraces = 0; 1763 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { 1764 switch (AggrEntry.EntryType) { 1765 case AggregatedLBREntry::BRANCH: 1766 doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, 1767 AggrEntry.Mispreds); 1768 break; 1769 case AggregatedLBREntry::FT: 1770 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { 1771 LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT 1772 ? AggrEntry.From.Offset 1773 : 0, 1774 AggrEntry.From.Offset, false}; 1775 LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false}; 1776 doTrace(First, Second, AggrEntry.Count); 1777 NumTraces += AggrEntry.Count; 1778 break; 1779 } 1780 } 1781 } 1782 1783 outs() << "PERF2BOLT: read " << AggregatedLBRs.size() 1784 << " aggregated LBR entries\n"; 1785 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1786 << NumInvalidTraces; 1787 float Perc = 0.0f; 1788 if (NumTraces > 0) { 1789 outs() << " ("; 1790 Perc = NumInvalidTraces * 100.0f / NumTraces; 1791 if (outs().has_colors()) { 1792 if (Perc > 10.0f) 1793 outs().changeColor(raw_ostream::RED); 1794 else if (Perc > 5.0f) 1795 outs().changeColor(raw_ostream::YELLOW); 1796 else 1797 outs().changeColor(raw_ostream::GREEN); 1798 } 1799 outs() << format("%.1f%%", Perc); 1800 if (outs().has_colors()) 1801 outs().resetColor(); 1802 outs() << ")"; 1803 } 1804 outs() << "\n"; 1805 if (Perc > 10.0f) 1806 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1807 "binary is probably not the same binary used during profiling " 1808 "collection. The generated data may be ineffective for improving " 1809 "performance.\n\n"; 1810 1811 outs() << "PERF2BOLT: Out of range traces involving unknown regions: " 1812 << NumLongRangeTraces; 1813 if (NumTraces > 0) 1814 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1815 outs() << "\n"; 1816 } 1817 1818 Optional<int32_t> DataAggregator::parseCommExecEvent() { 1819 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1820 if (LineEnd == StringRef::npos) { 1821 reportError("expected rest of line"); 1822 Diag << "Found: " << ParsingBuf << "\n"; 1823 return NoneType(); 1824 } 1825 StringRef Line = ParsingBuf.substr(0, LineEnd); 1826 1827 size_t Pos = Line.find("PERF_RECORD_COMM exec"); 1828 if (Pos == StringRef::npos) 1829 return NoneType(); 1830 Line = Line.drop_front(Pos); 1831 1832 // Line: 1833 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>" 1834 StringRef PIDStr = Line.rsplit(':').second.split('/').first; 1835 int32_t PID; 1836 if (PIDStr.getAsInteger(10, PID)) { 1837 reportError("expected PID"); 1838 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1839 return NoneType(); 1840 } 1841 1842 return PID; 1843 } 1844 1845 namespace { 1846 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) { 1847 const StringRef SecTimeStr = TimeStr.split('.').first; 1848 const StringRef USecTimeStr = TimeStr.split('.').second; 1849 uint64_t SecTime; 1850 uint64_t USecTime; 1851 if (SecTimeStr.getAsInteger(10, SecTime) || 1852 USecTimeStr.getAsInteger(10, USecTime)) 1853 return NoneType(); 1854 return SecTime * 1000000ULL + USecTime; 1855 } 1856 } 1857 1858 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() { 1859 while (checkAndConsumeFS()) { 1860 } 1861 1862 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1863 if (LineEnd == StringRef::npos) { 1864 reportError("expected rest of line"); 1865 Diag << "Found: " << ParsingBuf << "\n"; 1866 return NoneType(); 1867 } 1868 StringRef Line = ParsingBuf.substr(0, LineEnd); 1869 1870 size_t Pos = Line.find("PERF_RECORD_FORK"); 1871 if (Pos == StringRef::npos) { 1872 consumeRestOfLine(); 1873 return NoneType(); 1874 } 1875 1876 ForkInfo FI; 1877 1878 const StringRef TimeStr = 1879 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1880 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) { 1881 FI.Time = *TimeRes; 1882 } 1883 1884 Line = Line.drop_front(Pos); 1885 1886 // Line: 1887 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>) 1888 const StringRef ChildPIDStr = Line.split('(').second.split(':').first; 1889 if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) { 1890 reportError("expected PID"); 1891 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n"; 1892 return NoneType(); 1893 } 1894 1895 const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first; 1896 if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) { 1897 reportError("expected PID"); 1898 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n"; 1899 return NoneType(); 1900 } 1901 1902 consumeRestOfLine(); 1903 1904 return FI; 1905 } 1906 1907 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>> 1908 DataAggregator::parseMMapEvent() { 1909 while (checkAndConsumeFS()) { 1910 } 1911 1912 MMapInfo ParsedInfo; 1913 1914 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1915 if (LineEnd == StringRef::npos) { 1916 reportError("expected rest of line"); 1917 Diag << "Found: " << ParsingBuf << "\n"; 1918 return make_error_code(llvm::errc::io_error); 1919 } 1920 StringRef Line = ParsingBuf.substr(0, LineEnd); 1921 1922 size_t Pos = Line.find("PERF_RECORD_MMAP2"); 1923 if (Pos == StringRef::npos) { 1924 consumeRestOfLine(); 1925 return std::make_pair(StringRef(), ParsedInfo); 1926 } 1927 1928 // Line: 1929 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name> 1930 1931 const StringRef TimeStr = 1932 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1933 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) 1934 ParsedInfo.Time = *TimeRes; 1935 1936 Line = Line.drop_front(Pos); 1937 1938 // Line: 1939 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name> 1940 1941 StringRef FileName = Line.rsplit(FieldSeparator).second; 1942 if (FileName.startswith("//") || FileName.startswith("[")) { 1943 consumeRestOfLine(); 1944 return std::make_pair(StringRef(), ParsedInfo); 1945 } 1946 FileName = sys::path::filename(FileName); 1947 1948 const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first; 1949 if (PIDStr.getAsInteger(10, ParsedInfo.PID)) { 1950 reportError("expected PID"); 1951 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1952 return make_error_code(llvm::errc::io_error); 1953 } 1954 1955 const StringRef BaseAddressStr = Line.split('[').second.split('(').first; 1956 if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) { 1957 reportError("expected base address"); 1958 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n"; 1959 return make_error_code(llvm::errc::io_error); 1960 } 1961 1962 const StringRef SizeStr = Line.split('(').second.split(')').first; 1963 if (SizeStr.getAsInteger(0, ParsedInfo.Size)) { 1964 reportError("expected mmaped size"); 1965 Diag << "Found: " << SizeStr << "in '" << Line << "'\n"; 1966 return make_error_code(llvm::errc::io_error); 1967 } 1968 1969 const StringRef OffsetStr = 1970 Line.split('@').second.ltrim().split(FieldSeparator).first; 1971 if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) { 1972 reportError("expected mmaped page-aligned offset"); 1973 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n"; 1974 return make_error_code(llvm::errc::io_error); 1975 } 1976 1977 consumeRestOfLine(); 1978 1979 return std::make_pair(FileName, ParsedInfo); 1980 } 1981 1982 std::error_code DataAggregator::parseMMapEvents() { 1983 outs() << "PERF2BOLT: parsing perf-script mmap events output\n"; 1984 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName, 1985 TimerGroupDesc, opts::TimeAggregator); 1986 1987 std::multimap<StringRef, MMapInfo> GlobalMMapInfo; 1988 while (hasData()) { 1989 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent(); 1990 if (std::error_code EC = FileMMapInfoRes.getError()) 1991 return EC; 1992 1993 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get(); 1994 if (FileMMapInfo.second.PID == -1) 1995 continue; 1996 1997 // Consider only the first mapping of the file for any given PID 1998 bool PIDExists = false; 1999 auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first); 2000 for (auto MI = Range.first; MI != Range.second; ++MI) { 2001 if (MI->second.PID == FileMMapInfo.second.PID) { 2002 PIDExists = true; 2003 break; 2004 } 2005 } 2006 if (PIDExists) 2007 continue; 2008 2009 GlobalMMapInfo.insert(FileMMapInfo); 2010 } 2011 2012 LLVM_DEBUG({ 2013 dbgs() << "FileName -> mmap info:\n"; 2014 for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo) 2015 dbgs() << " " << Pair.first << " : " << Pair.second.PID << " [0x" 2016 << Twine::utohexstr(Pair.second.MMapAddress) << ", " 2017 << Twine::utohexstr(Pair.second.Size) << " @ " 2018 << Twine::utohexstr(Pair.second.Offset) << "]\n"; 2019 }); 2020 2021 StringRef NameToUse = llvm::sys::path::filename(BC->getFilename()); 2022 if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) { 2023 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName 2024 << "\" for profile matching\n"; 2025 NameToUse = BuildIDBinaryName; 2026 } 2027 2028 auto Range = GlobalMMapInfo.equal_range(NameToUse); 2029 for (auto I = Range.first; I != Range.second; ++I) { 2030 MMapInfo &MMapInfo = I->second; 2031 if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) { 2032 // Check that the binary mapping matches one of the segments. 2033 bool MatchFound = false; 2034 for (auto &KV : BC->SegmentMapInfo) { 2035 SegmentInfo &SegInfo = KV.second; 2036 // The mapping is page-aligned and hence the MMapAddress could be 2037 // different from the segment start address. We cannot know the page 2038 // size of the mapping, but we know it should not exceed the segment 2039 // alignment value. Hence we are performing an approximate check. 2040 if (SegInfo.Address >= MMapInfo.MMapAddress && 2041 SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) { 2042 MatchFound = true; 2043 break; 2044 } 2045 } 2046 if (!MatchFound) { 2047 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse 2048 << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n'; 2049 continue; 2050 } 2051 } 2052 2053 // Set base address for shared objects. 2054 if (!BC->HasFixedLoadAddress) { 2055 Optional<uint64_t> BaseAddress = 2056 BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset); 2057 if (!BaseAddress) { 2058 errs() << "PERF2BOLT-WARNING: unable to find base address of the " 2059 "binary when memory mapped at 0x" 2060 << Twine::utohexstr(MMapInfo.MMapAddress) 2061 << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset) 2062 << ". Ignoring profile data for this mapping\n"; 2063 continue; 2064 } else { 2065 MMapInfo.BaseAddress = *BaseAddress; 2066 } 2067 } 2068 2069 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); 2070 } 2071 2072 if (BinaryMMapInfo.empty()) { 2073 if (errs().has_colors()) 2074 errs().changeColor(raw_ostream::RED); 2075 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \"" 2076 << BC->getFilename() << "\"."; 2077 if (!GlobalMMapInfo.empty()) { 2078 errs() << " Profile for the following binary name(s) is available:\n"; 2079 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE; 2080 I = GlobalMMapInfo.upper_bound(I->first)) 2081 errs() << " " << I->first << '\n'; 2082 errs() << "Please rename the input binary.\n"; 2083 } else { 2084 errs() << " Failed to extract any binary name from a profile.\n"; 2085 } 2086 if (errs().has_colors()) 2087 errs().resetColor(); 2088 2089 exit(1); 2090 } 2091 2092 return std::error_code(); 2093 } 2094 2095 std::error_code DataAggregator::parseTaskEvents() { 2096 outs() << "PERF2BOLT: parsing perf-script task events output\n"; 2097 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName, 2098 TimerGroupDesc, opts::TimeAggregator); 2099 2100 while (hasData()) { 2101 if (Optional<int32_t> CommInfo = parseCommExecEvent()) { 2102 // Remove forked child that ran execve 2103 auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo); 2104 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked) 2105 BinaryMMapInfo.erase(MMapInfoIter); 2106 consumeRestOfLine(); 2107 continue; 2108 } 2109 2110 Optional<ForkInfo> ForkInfo = parseForkEvent(); 2111 if (!ForkInfo) 2112 continue; 2113 2114 if (ForkInfo->ParentPID == ForkInfo->ChildPID) 2115 continue; 2116 2117 if (ForkInfo->Time == 0) { 2118 // Process was forked and mmaped before perf ran. In this case the child 2119 // should have its own mmap entry unless it was execve'd. 2120 continue; 2121 } 2122 2123 auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID); 2124 if (MMapInfoIter == BinaryMMapInfo.end()) 2125 continue; 2126 2127 MMapInfo MMapInfo = MMapInfoIter->second; 2128 MMapInfo.PID = ForkInfo->ChildPID; 2129 MMapInfo.Forked = true; 2130 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); 2131 } 2132 2133 outs() << "PERF2BOLT: input binary is associated with " 2134 << BinaryMMapInfo.size() << " PID(s)\n"; 2135 2136 LLVM_DEBUG({ 2137 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) 2138 outs() << " " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "") 2139 << ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x" 2140 << Twine::utohexstr(MMI.second.Size) << ")\n"; 2141 }); 2142 2143 return std::error_code(); 2144 } 2145 2146 Optional<std::pair<StringRef, StringRef>> 2147 DataAggregator::parseNameBuildIDPair() { 2148 while (checkAndConsumeFS()) { 2149 } 2150 2151 ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true); 2152 if (std::error_code EC = BuildIDStr.getError()) 2153 return NoneType(); 2154 2155 ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true); 2156 if (std::error_code EC = NameStr.getError()) 2157 return NoneType(); 2158 2159 consumeRestOfLine(); 2160 return std::make_pair(NameStr.get(), BuildIDStr.get()); 2161 } 2162 2163 Optional<StringRef> 2164 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) { 2165 while (hasData()) { 2166 Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair(); 2167 if (!IDPair) 2168 return NoneType(); 2169 2170 if (IDPair->second.startswith(FileBuildID)) 2171 return sys::path::filename(IDPair->first); 2172 } 2173 return NoneType(); 2174 } 2175 2176 std::error_code 2177 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { 2178 std::error_code EC; 2179 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 2180 if (EC) 2181 return EC; 2182 2183 bool WriteMemLocs = false; 2184 2185 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) { 2186 if (WriteMemLocs) 2187 OutFile << (Loc.IsSymbol ? "4 " : "3 "); 2188 else 2189 OutFile << (Loc.IsSymbol ? "1 " : "0 "); 2190 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name)) 2191 << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator; 2192 }; 2193 2194 uint64_t BranchValues = 0; 2195 uint64_t MemValues = 0; 2196 2197 if (BAT) 2198 OutFile << "boltedcollection\n"; 2199 if (opts::BasicAggregation) { 2200 OutFile << "no_lbr"; 2201 for (const StringMapEntry<NoneType> &Entry : EventNames) 2202 OutFile << " " << Entry.getKey(); 2203 OutFile << "\n"; 2204 2205 for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) { 2206 for (const SampleInfo &SI : Func.getValue().Data) { 2207 writeLocation(SI.Loc); 2208 OutFile << SI.Hits << "\n"; 2209 ++BranchValues; 2210 } 2211 } 2212 } else { 2213 for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) { 2214 for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) { 2215 writeLocation(BI.From); 2216 writeLocation(BI.To); 2217 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2218 ++BranchValues; 2219 } 2220 for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) { 2221 // Do not output if source is a known symbol, since this was already 2222 // accounted for in the source function 2223 if (BI.From.IsSymbol) 2224 continue; 2225 writeLocation(BI.From); 2226 writeLocation(BI.To); 2227 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2228 ++BranchValues; 2229 } 2230 } 2231 2232 WriteMemLocs = true; 2233 for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) { 2234 for (const MemInfo &MemEvent : Func.getValue().Data) { 2235 writeLocation(MemEvent.Offset); 2236 writeLocation(MemEvent.Addr); 2237 OutFile << MemEvent.Count << "\n"; 2238 ++MemValues; 2239 } 2240 } 2241 } 2242 2243 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues 2244 << " memory objects to " << OutputFilename << "\n"; 2245 2246 return std::error_code(); 2247 } 2248 2249 void DataAggregator::dump() const { DataReader::dump(); } 2250 2251 void DataAggregator::dump(const LBREntry &LBR) const { 2252 Diag << "From: " << Twine::utohexstr(LBR.From) 2253 << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred 2254 << "\n"; 2255 } 2256 2257 void DataAggregator::dump(const PerfBranchSample &Sample) const { 2258 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n"; 2259 for (const LBREntry &LBR : Sample.LBR) 2260 dump(LBR); 2261 } 2262 2263 void DataAggregator::dump(const PerfMemSample &Sample) const { 2264 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n"; 2265 } 2266