1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions reads profile data written by perf record, 10 // aggregate it and then write it back to an output file. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "bolt/Profile/DataAggregator.h" 15 #include "bolt/Core/BinaryContext.h" 16 #include "bolt/Core/BinaryFunction.h" 17 #include "bolt/Profile/BoltAddressTranslation.h" 18 #include "bolt/Profile/Heatmap.h" 19 #include "bolt/Utils/CommandLineOpts.h" 20 #include "bolt/Utils/Utils.h" 21 #include "llvm/ADT/ScopeExit.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/FileSystem.h" 25 #include "llvm/Support/Process.h" 26 #include "llvm/Support/Program.h" 27 #include "llvm/Support/Regex.h" 28 #include "llvm/Support/Timer.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <map> 31 #include <unordered_map> 32 33 #define DEBUG_TYPE "aggregator" 34 35 using namespace llvm; 36 using namespace bolt; 37 38 namespace opts { 39 40 static cl::opt<bool> 41 BasicAggregation("nl", 42 cl::desc("aggregate basic samples (without LBR info)"), 43 cl::init(false), 44 cl::ZeroOrMore, 45 cl::cat(AggregatorCategory)); 46 47 static cl::opt<bool> 48 FilterMemProfile("filter-mem-profile", 49 cl::desc("if processing a memory profile, filter out stack or heap accesses " 50 "that won't be useful for BOLT to reduce profile file size"), 51 cl::init(true), 52 cl::cat(AggregatorCategory)); 53 54 static cl::opt<unsigned long long> 55 FilterPID("pid", 56 cl::desc("only use samples from process with specified PID"), 57 cl::init(0), 58 cl::Optional, 59 cl::cat(AggregatorCategory)); 60 61 static cl::opt<bool> 62 IgnoreBuildID("ignore-build-id", 63 cl::desc("continue even if build-ids in input binary and perf.data mismatch"), 64 cl::init(false), 65 cl::cat(AggregatorCategory)); 66 67 static cl::opt<bool> 68 IgnoreInterruptLBR("ignore-interrupt-lbr", 69 cl::desc("ignore kernel interrupt LBR that happens asynchronously"), 70 cl::init(true), 71 cl::ZeroOrMore, 72 cl::cat(AggregatorCategory)); 73 74 static cl::opt<unsigned long long> 75 MaxSamples("max-samples", 76 cl::init(-1ULL), 77 cl::desc("maximum number of samples to read from LBR profile"), 78 cl::Optional, 79 cl::Hidden, 80 cl::cat(AggregatorCategory)); 81 82 static cl::opt<bool> 83 ReadPreAggregated("pa", 84 cl::desc("skip perf and read data from a pre-aggregated file format"), 85 cl::init(false), 86 cl::ZeroOrMore, 87 cl::cat(AggregatorCategory)); 88 89 static cl::opt<bool> 90 TimeAggregator("time-aggr", 91 cl::desc("time BOLT aggregator"), 92 cl::init(false), 93 cl::ZeroOrMore, 94 cl::cat(AggregatorCategory)); 95 96 static cl::opt<bool> 97 UseEventPC("use-event-pc", 98 cl::desc("use event PC in combination with LBR sampling"), 99 cl::init(false), 100 cl::ZeroOrMore, 101 cl::cat(AggregatorCategory)); 102 103 static cl::opt<bool> 104 WriteAutoFDOData("autofdo", 105 cl::desc("generate autofdo textual data instead of bolt data"), 106 cl::init(false), 107 cl::ZeroOrMore, 108 cl::cat(AggregatorCategory)); 109 110 } // namespace opts 111 112 namespace { 113 114 const char TimerGroupName[] = "aggregator"; 115 const char TimerGroupDesc[] = "Aggregator"; 116 117 } 118 119 constexpr uint64_t DataAggregator::KernelBaseAddr; 120 121 DataAggregator::~DataAggregator() { deleteTempFiles(); } 122 123 namespace { 124 void deleteTempFile(const std::string &FileName) { 125 if (std::error_code Errc = sys::fs::remove(FileName.c_str())) 126 errs() << "PERF2BOLT: failed to delete temporary file " << FileName 127 << " with error " << Errc.message() << "\n"; 128 } 129 } 130 131 void DataAggregator::deleteTempFiles() { 132 for (std::string &FileName : TempFiles) 133 deleteTempFile(FileName); 134 TempFiles.clear(); 135 } 136 137 void DataAggregator::findPerfExecutable() { 138 Optional<std::string> PerfExecutable = 139 sys::Process::FindInEnvPath("PATH", "perf"); 140 if (!PerfExecutable) { 141 outs() << "PERF2BOLT: No perf executable found!\n"; 142 exit(1); 143 } 144 PerfPath = *PerfExecutable; 145 } 146 147 void DataAggregator::start() { 148 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; 149 150 // Don't launch perf for pre-aggregated files 151 if (opts::ReadPreAggregated) 152 return; 153 154 findPerfExecutable(); 155 156 if (opts::BasicAggregation) 157 launchPerfProcess("events without LBR", 158 MainEventsPPI, 159 "script -F pid,event,ip", 160 /*Wait = */false); 161 else 162 launchPerfProcess("branch events", 163 MainEventsPPI, 164 "script -F pid,ip,brstack", 165 /*Wait = */false); 166 167 // Note: we launch script for mem events regardless of the option, as the 168 // command fails fairly fast if mem events were not collected. 169 launchPerfProcess("mem events", 170 MemEventsPPI, 171 "script -F pid,event,addr,ip", 172 /*Wait = */false); 173 174 launchPerfProcess("process events", 175 MMapEventsPPI, 176 "script --show-mmap-events", 177 /*Wait = */false); 178 179 launchPerfProcess("task events", 180 TaskEventsPPI, 181 "script --show-task-events", 182 /*Wait = */false); 183 } 184 185 void DataAggregator::abort() { 186 if (opts::ReadPreAggregated) 187 return; 188 189 std::string Error; 190 191 // Kill subprocesses in case they are not finished 192 sys::Wait(TaskEventsPPI.PI, 1, false, &Error); 193 sys::Wait(MMapEventsPPI.PI, 1, false, &Error); 194 sys::Wait(MainEventsPPI.PI, 1, false, &Error); 195 sys::Wait(MemEventsPPI.PI, 1, false, &Error); 196 197 deleteTempFiles(); 198 199 exit(1); 200 } 201 202 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, 203 const char *ArgsString, bool Wait) { 204 SmallVector<StringRef, 4> Argv; 205 206 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n'; 207 Argv.push_back(PerfPath.data()); 208 209 char *WritableArgsString = strdup(ArgsString); 210 char *Str = WritableArgsString; 211 do { 212 Argv.push_back(Str); 213 while (*Str && *Str != ' ') 214 ++Str; 215 if (!*Str) 216 break; 217 *Str++ = 0; 218 } while (true); 219 220 Argv.push_back("-f"); 221 Argv.push_back("-i"); 222 Argv.push_back(Filename.c_str()); 223 224 if (std::error_code Errc = 225 sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) { 226 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath 227 << " with error " << Errc.message() << "\n"; 228 exit(1); 229 } 230 TempFiles.push_back(PPI.StdoutPath.data()); 231 232 if (std::error_code Errc = 233 sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) { 234 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath 235 << " with error " << Errc.message() << "\n"; 236 exit(1); 237 } 238 TempFiles.push_back(PPI.StderrPath.data()); 239 240 Optional<StringRef> Redirects[] = { 241 llvm::None, // Stdin 242 StringRef(PPI.StdoutPath.data()), // Stdout 243 StringRef(PPI.StderrPath.data())}; // Stderr 244 245 LLVM_DEBUG({ 246 dbgs() << "Launching perf: "; 247 for (StringRef Arg : Argv) 248 dbgs() << Arg << " "; 249 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data() 250 << "\n"; 251 }); 252 253 if (Wait) 254 PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv, 255 /*envp*/ llvm::None, Redirects); 256 else 257 PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None, 258 Redirects); 259 260 free(WritableArgsString); 261 } 262 263 void DataAggregator::processFileBuildID(StringRef FileBuildID) { 264 PerfProcessInfo BuildIDProcessInfo; 265 launchPerfProcess("buildid list", 266 BuildIDProcessInfo, 267 "buildid-list", 268 /*Wait = */true); 269 270 if (BuildIDProcessInfo.PI.ReturnCode != 0) { 271 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 272 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data()); 273 StringRef ErrBuf = (*MB)->getBuffer(); 274 275 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode 276 << '\n'; 277 errs() << ErrBuf; 278 return; 279 } 280 281 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 282 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data()); 283 if (std::error_code EC = MB.getError()) { 284 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": " 285 << EC.message() << "\n"; 286 return; 287 } 288 289 FileBuf.reset(MB->release()); 290 ParsingBuf = FileBuf->getBuffer(); 291 if (ParsingBuf.empty()) { 292 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf " 293 "data was recorded without it\n"; 294 return; 295 } 296 297 Col = 0; 298 Line = 1; 299 Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID); 300 if (!FileName) { 301 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. " 302 "This indicates the input binary supplied for data aggregation " 303 "is not the same recorded by perf when collecting profiling " 304 "data, or there were no samples recorded for the binary. " 305 "Use -ignore-build-id option to override.\n"; 306 if (!opts::IgnoreBuildID) 307 abort(); 308 } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) { 309 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n"; 310 BuildIDBinaryName = std::string(*FileName); 311 } else { 312 outs() << "PERF2BOLT: matched build-id and file name\n"; 313 } 314 315 return; 316 } 317 318 bool DataAggregator::checkPerfDataMagic(StringRef FileName) { 319 if (opts::ReadPreAggregated) 320 return true; 321 322 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName); 323 if (!FD) 324 return false; 325 326 char Buf[7] = {0, 0, 0, 0, 0, 0, 0}; 327 328 auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); }); 329 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice( 330 *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0); 331 if (!BytesRead || *BytesRead != 7) 332 return false; 333 334 if (strncmp(Buf, "PERFILE", 7) == 0) 335 return true; 336 return false; 337 } 338 339 void DataAggregator::parsePreAggregated() { 340 std::string Error; 341 342 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 343 MemoryBuffer::getFileOrSTDIN(Filename); 344 if (std::error_code EC = MB.getError()) { 345 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": " 346 << EC.message() << "\n"; 347 exit(1); 348 } 349 350 FileBuf.reset(MB->release()); 351 ParsingBuf = FileBuf->getBuffer(); 352 Col = 0; 353 Line = 1; 354 if (parsePreAggregatedLBRSamples()) { 355 errs() << "PERF2BOLT: failed to parse samples\n"; 356 exit(1); 357 } 358 } 359 360 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) { 361 outs() << "PERF2BOLT: writing data for autofdo tools...\n"; 362 NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName, 363 TimerGroupDesc, opts::TimeAggregator); 364 365 std::error_code EC; 366 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 367 if (EC) 368 return EC; 369 370 // Format: 371 // number of unique traces 372 // from_1-to_1:count_1 373 // from_2-to_2:count_2 374 // ...... 375 // from_n-to_n:count_n 376 // number of unique sample addresses 377 // addr_1:count_1 378 // addr_2:count_2 379 // ...... 380 // addr_n:count_n 381 // number of unique LBR entries 382 // src_1->dst_1:count_1 383 // src_2->dst_2:count_2 384 // ...... 385 // src_n->dst_n:count_n 386 387 const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress; 388 389 // AutoFDO addresses are relative to the first allocated loadable program 390 // segment 391 auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t { 392 if (Address < FirstAllocAddress) 393 return 0; 394 return Address - FirstAllocAddress; 395 }; 396 397 OutFile << FallthroughLBRs.size() << "\n"; 398 for (const auto &AggrLBR : FallthroughLBRs) { 399 const Trace &Trace = AggrLBR.first; 400 const FTInfo &Info = AggrLBR.second; 401 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-" 402 << Twine::utohexstr(filterAddress(Trace.To)) << ":" 403 << (Info.InternCount + Info.ExternCount) << "\n"; 404 } 405 406 OutFile << BasicSamples.size() << "\n"; 407 for (const auto &Sample : BasicSamples) { 408 uint64_t PC = Sample.first; 409 uint64_t HitCount = Sample.second; 410 OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n"; 411 } 412 413 OutFile << BranchLBRs.size() << "\n"; 414 for (const auto &AggrLBR : BranchLBRs) { 415 const Trace &Trace = AggrLBR.first; 416 const BranchInfo &Info = AggrLBR.second; 417 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->" 418 << Twine::utohexstr(filterAddress(Trace.To)) << ":" 419 << Info.TakenCount << "\n"; 420 } 421 422 outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, " 423 << BasicSamples.size() << " sample addresses and " << BranchLBRs.size() 424 << " unique branches to " << OutputFilename << "\n"; 425 426 return std::error_code(); 427 } 428 429 void DataAggregator::filterBinaryMMapInfo() { 430 if (opts::FilterPID) { 431 auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID); 432 if (MMapInfoIter != BinaryMMapInfo.end()) { 433 MMapInfo MMap = MMapInfoIter->second; 434 BinaryMMapInfo.clear(); 435 BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap)); 436 } else { 437 if (errs().has_colors()) 438 errs().changeColor(raw_ostream::RED); 439 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \"" 440 << opts::FilterPID << "\"" 441 << " for binary \"" << BC->getFilename() << "\"."; 442 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary"); 443 errs() << " Profile for the following process is available:\n"; 444 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) 445 outs() << " " << MMI.second.PID 446 << (MMI.second.Forked ? " (forked)\n" : "\n"); 447 448 if (errs().has_colors()) 449 errs().resetColor(); 450 451 exit(1); 452 } 453 } 454 } 455 456 Error DataAggregator::preprocessProfile(BinaryContext &BC) { 457 this->BC = &BC; 458 459 if (opts::ReadPreAggregated) { 460 parsePreAggregated(); 461 return Error::success(); 462 } 463 464 if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) { 465 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; 466 processFileBuildID(*FileBuildID); 467 } else { 468 errs() << "BOLT-WARNING: build-id will not be checked because we could " 469 "not read one from input binary\n"; 470 } 471 472 auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) { 473 std::string Error; 474 outs() << "PERF2BOLT: waiting for perf " << Name 475 << " collection to finish...\n"; 476 sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error); 477 478 if (!Error.empty()) { 479 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n"; 480 deleteTempFiles(); 481 exit(1); 482 } 483 484 if (PI.ReturnCode != 0) { 485 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB = 486 MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data()); 487 StringRef ErrBuf = (*ErrorMB)->getBuffer(); 488 489 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n"; 490 errs() << ErrBuf; 491 deleteTempFiles(); 492 exit(1); 493 } 494 495 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 496 MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data()); 497 if (std::error_code EC = MB.getError()) { 498 errs() << "Cannot open " << Process.StdoutPath.data() << ": " 499 << EC.message() << "\n"; 500 deleteTempFiles(); 501 exit(1); 502 } 503 504 FileBuf.reset(MB->release()); 505 ParsingBuf = FileBuf->getBuffer(); 506 Col = 0; 507 Line = 1; 508 }; 509 510 if (opts::LinuxKernelMode) { 511 // Current MMap parsing logic does not work with linux kernel. 512 // MMap entries for linux kernel uses PERF_RECORD_MMAP 513 // format instead of typical PERF_RECORD_MMAP2 format. 514 // Since linux kernel address mapping is absolute (same as 515 // in the ELF file), we avoid parsing MMap in linux kernel mode. 516 // While generating optimized linux kernel binary, we may need 517 // to parse MMap entries. 518 519 // In linux kernel mode, we analyze and optimize 520 // all linux kernel binary instructions, irrespective 521 // of whether they are due to system calls or due to 522 // interrupts. Therefore, we cannot ignore interrupt 523 // in Linux kernel mode. 524 opts::IgnoreInterruptLBR = false; 525 } else { 526 prepareToParse("mmap events", MMapEventsPPI); 527 if (parseMMapEvents()) 528 errs() << "PERF2BOLT: failed to parse mmap events\n"; 529 } 530 531 prepareToParse("task events", TaskEventsPPI); 532 if (parseTaskEvents()) 533 errs() << "PERF2BOLT: failed to parse task events\n"; 534 535 filterBinaryMMapInfo(); 536 prepareToParse("events", MainEventsPPI); 537 538 if (opts::HeatmapMode) { 539 if (std::error_code EC = printLBRHeatMap()) { 540 errs() << "ERROR: failed to print heat map: " << EC.message() << '\n'; 541 exit(1); 542 } 543 exit(0); 544 } 545 546 if ((!opts::BasicAggregation && parseBranchEvents()) || 547 (opts::BasicAggregation && parseBasicEvents())) 548 errs() << "PERF2BOLT: failed to parse samples\n"; 549 550 // We can finish early if the goal is just to generate data for autofdo 551 if (opts::WriteAutoFDOData) { 552 if (std::error_code EC = writeAutoFDOData(opts::OutputFilename)) 553 errs() << "Error writing autofdo data to file: " << EC.message() << "\n"; 554 555 deleteTempFiles(); 556 exit(0); 557 } 558 559 // Special handling for memory events 560 std::string Error; 561 sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error); 562 if (PI.ReturnCode != 0) { 563 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 564 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data()); 565 StringRef ErrBuf = (*MB)->getBuffer(); 566 567 deleteTempFiles(); 568 569 Regex NoData("Samples for '.*' event do not have ADDR attribute set. " 570 "Cannot print 'addr' field."); 571 if (!NoData.match(ErrBuf)) { 572 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n"; 573 errs() << ErrBuf; 574 exit(1); 575 } 576 return Error::success(); 577 } 578 579 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 580 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data()); 581 if (std::error_code EC = MB.getError()) { 582 errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": " 583 << EC.message() << "\n"; 584 deleteTempFiles(); 585 exit(1); 586 } 587 588 FileBuf.reset(MB->release()); 589 ParsingBuf = FileBuf->getBuffer(); 590 Col = 0; 591 Line = 1; 592 if (const std::error_code EC = parseMemEvents()) 593 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message() 594 << '\n'; 595 596 deleteTempFiles(); 597 598 return Error::success(); 599 } 600 601 Error DataAggregator::readProfile(BinaryContext &BC) { 602 processProfile(BC); 603 604 for (auto &BFI : BC.getBinaryFunctions()) { 605 BinaryFunction &Function = BFI.second; 606 convertBranchData(Function); 607 } 608 609 if (opts::AggregateOnly) { 610 if (std::error_code EC = writeAggregatedFile(opts::OutputFilename)) 611 report_error("cannot create output data file", EC); 612 } 613 614 return Error::success(); 615 } 616 617 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { 618 return Function.hasProfileAvailable(); 619 } 620 621 void DataAggregator::processProfile(BinaryContext &BC) { 622 if (opts::ReadPreAggregated) 623 processPreAggregated(); 624 else if (opts::BasicAggregation) 625 processBasicEvents(); 626 else 627 processBranchEvents(); 628 629 processMemEvents(); 630 631 // Mark all functions with registered events as having a valid profile. 632 for (auto &BFI : BC.getBinaryFunctions()) { 633 BinaryFunction &BF = BFI.second; 634 if (getBranchData(BF)) { 635 const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE 636 : BinaryFunction::PF_LBR; 637 BF.markProfiled(Flags); 638 } 639 } 640 641 // Release intermediate storage. 642 clear(BranchLBRs); 643 clear(FallthroughLBRs); 644 clear(AggregatedLBRs); 645 clear(BasicSamples); 646 clear(MemSamples); 647 } 648 649 BinaryFunction * 650 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const { 651 if (!BC->containsAddress(Address)) 652 return nullptr; 653 654 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false, 655 /*UseMaxSize=*/true); 656 } 657 658 StringRef DataAggregator::getLocationName(BinaryFunction &Func, 659 uint64_t Count) { 660 if (!BAT) 661 return Func.getOneName(); 662 663 const BinaryFunction *OrigFunc = &Func; 664 if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) { 665 NumColdSamples += Count; 666 BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr); 667 if (HotFunc) 668 OrigFunc = HotFunc; 669 } 670 // If it is a local function, prefer the name containing the file name where 671 // the local function was declared 672 for (StringRef AlternativeName : OrigFunc->getNames()) { 673 size_t FileNameIdx = AlternativeName.find('/'); 674 // Confirm the alternative name has the pattern Symbol/FileName/1 before 675 // using it 676 if (FileNameIdx == StringRef::npos || 677 AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos) 678 continue; 679 return AlternativeName; 680 } 681 return OrigFunc->getOneName(); 682 } 683 684 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address, 685 uint64_t Count) { 686 auto I = NamesToSamples.find(Func.getOneName()); 687 if (I == NamesToSamples.end()) { 688 bool Success; 689 StringRef LocName = getLocationName(Func, Count); 690 std::tie(I, Success) = NamesToSamples.insert( 691 std::make_pair(Func.getOneName(), 692 FuncSampleData(LocName, FuncSampleData::ContainerTy()))); 693 } 694 695 Address -= Func.getAddress(); 696 if (BAT) 697 Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false); 698 699 I->second.bumpCount(Address, Count); 700 return true; 701 } 702 703 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From, 704 uint64_t To, uint64_t Count, 705 uint64_t Mispreds) { 706 FuncBranchData *AggrData = getBranchData(Func); 707 if (!AggrData) { 708 AggrData = &NamesToBranches[Func.getOneName()]; 709 AggrData->Name = getLocationName(Func, Count); 710 setBranchData(Func, AggrData); 711 } 712 713 From -= Func.getAddress(); 714 To -= Func.getAddress(); 715 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName() 716 << " @ " << Twine::utohexstr(From) << " -> " 717 << Func.getPrintName() << " @ " << Twine::utohexstr(To) 718 << '\n'); 719 if (BAT) { 720 From = BAT->translate(Func, From, /*IsBranchSrc=*/true); 721 To = BAT->translate(Func, To, /*IsBranchSrc=*/false); 722 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: " 723 << Func.getPrintName() << " @ " << Twine::utohexstr(From) 724 << " -> " << Func.getPrintName() << " @ " 725 << Twine::utohexstr(To) << '\n'); 726 } 727 728 AggrData->bumpBranchCount(From, To, Count, Mispreds); 729 return true; 730 } 731 732 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc, 733 BinaryFunction *ToFunc, uint64_t From, 734 uint64_t To, uint64_t Count, 735 uint64_t Mispreds) { 736 FuncBranchData *FromAggrData = nullptr; 737 FuncBranchData *ToAggrData = nullptr; 738 StringRef SrcFunc; 739 StringRef DstFunc; 740 if (FromFunc) { 741 SrcFunc = getLocationName(*FromFunc, Count); 742 FromAggrData = getBranchData(*FromFunc); 743 if (!FromAggrData) { 744 FromAggrData = &NamesToBranches[FromFunc->getOneName()]; 745 FromAggrData->Name = SrcFunc; 746 setBranchData(*FromFunc, FromAggrData); 747 } 748 From -= FromFunc->getAddress(); 749 if (BAT) 750 From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true); 751 752 recordExit(*FromFunc, From, Mispreds, Count); 753 } 754 if (ToFunc) { 755 DstFunc = getLocationName(*ToFunc, 0); 756 ToAggrData = getBranchData(*ToFunc); 757 if (!ToAggrData) { 758 ToAggrData = &NamesToBranches[ToFunc->getOneName()]; 759 ToAggrData->Name = DstFunc; 760 setBranchData(*ToFunc, ToAggrData); 761 } 762 To -= ToFunc->getAddress(); 763 if (BAT) 764 To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false); 765 766 recordEntry(*ToFunc, To, Mispreds, Count); 767 } 768 769 if (FromAggrData) 770 FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To), 771 Count, Mispreds); 772 if (ToAggrData) 773 ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To, 774 Count, Mispreds); 775 return true; 776 } 777 778 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, 779 uint64_t Mispreds) { 780 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From); 781 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To); 782 if (!FromFunc && !ToFunc) 783 return false; 784 785 if (FromFunc == ToFunc) { 786 recordBranch(*FromFunc, From - FromFunc->getAddress(), 787 To - FromFunc->getAddress(), Count, Mispreds); 788 return doIntraBranch(*FromFunc, From, To, Count, Mispreds); 789 } 790 791 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds); 792 } 793 794 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, 795 uint64_t Count) { 796 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To); 797 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From); 798 if (!FromFunc || !ToFunc) { 799 LLVM_DEBUG( 800 dbgs() << "Out of range trace starting in " << FromFunc->getPrintName() 801 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 802 << " and ending in " << ToFunc->getPrintName() << " @ " 803 << ToFunc->getPrintName() << " @ " 804 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 805 NumLongRangeTraces += Count; 806 return false; 807 } 808 if (FromFunc != ToFunc) { 809 NumInvalidTraces += Count; 810 LLVM_DEBUG( 811 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 812 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 813 << " and ending in " << ToFunc->getPrintName() << " @ " 814 << ToFunc->getPrintName() << " @ " 815 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 816 return false; 817 } 818 819 Optional<BoltAddressTranslation::FallthroughListTy> FTs = 820 BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From) 821 : getFallthroughsInTrace(*FromFunc, First, Second, Count); 822 if (!FTs) { 823 LLVM_DEBUG( 824 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 825 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 826 << " and ending in " << ToFunc->getPrintName() << " @ " 827 << ToFunc->getPrintName() << " @ " 828 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 829 NumInvalidTraces += Count; 830 return false; 831 } 832 833 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for " 834 << FromFunc->getPrintName() << ":" 835 << Twine::utohexstr(First.To) << " to " 836 << Twine::utohexstr(Second.From) << ".\n"); 837 for (const std::pair<uint64_t, uint64_t> &Pair : *FTs) 838 doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(), 839 Pair.second + FromFunc->getAddress(), Count, false); 840 841 return true; 842 } 843 844 bool DataAggregator::recordTrace( 845 BinaryFunction &BF, 846 const LBREntry &FirstLBR, 847 const LBREntry &SecondLBR, 848 uint64_t Count, 849 SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const { 850 BinaryContext &BC = BF.getBinaryContext(); 851 852 if (!BF.isSimple()) 853 return false; 854 855 assert(BF.hasCFG() && "can only record traces in CFG state"); 856 857 // Offsets of the trace within this function. 858 const uint64_t From = FirstLBR.To - BF.getAddress(); 859 const uint64_t To = SecondLBR.From - BF.getAddress(); 860 861 if (From > To) 862 return false; 863 864 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); 865 BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); 866 867 if (!FromBB || !ToBB) 868 return false; 869 870 // Adjust FromBB if the first LBR is a return from the last instruction in 871 // the previous block (that instruction should be a call). 872 if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && 873 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { 874 BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1]; 875 if (PrevBB->getSuccessor(FromBB->getLabel())) { 876 const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); 877 if (Instr && BC.MIB->isCall(*Instr)) 878 FromBB = PrevBB; 879 else 880 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR 881 << '\n'); 882 } else { 883 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n'); 884 } 885 } 886 887 // Fill out information for fall-through edges. The From and To could be 888 // within the same basic block, e.g. when two call instructions are in the 889 // same block. In this case we skip the processing. 890 if (FromBB == ToBB) 891 return true; 892 893 // Process blocks in the original layout order. 894 BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()]; 895 assert(BB == FromBB && "index mismatch"); 896 while (BB != ToBB) { 897 BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1]; 898 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout"); 899 900 // Check for bad LBRs. 901 if (!BB->getSuccessor(NextBB->getLabel())) { 902 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n" 903 << " " << FirstLBR << '\n' 904 << " " << SecondLBR << '\n'); 905 return false; 906 } 907 908 // Record fall-through jumps 909 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB); 910 BI.Count += Count; 911 912 if (Branches) { 913 const MCInst *Instr = BB->getLastNonPseudoInstr(); 914 uint64_t Offset = 0; 915 if (Instr) 916 Offset = BC.MIB->getAnnotationWithDefault<uint32_t>(*Instr, "Offset"); 917 else 918 Offset = BB->getOffset(); 919 920 Branches->emplace_back(Offset, NextBB->getOffset()); 921 } 922 923 BB = NextBB; 924 } 925 926 return true; 927 } 928 929 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>> 930 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, 931 const LBREntry &FirstLBR, 932 const LBREntry &SecondLBR, 933 uint64_t Count) const { 934 SmallVector<std::pair<uint64_t, uint64_t>, 16> Res; 935 936 if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res)) 937 return NoneType(); 938 939 return Res; 940 } 941 942 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred, 943 uint64_t Count) const { 944 if (To > BF.getSize()) 945 return false; 946 947 if (!BF.hasProfile()) 948 BF.ExecutionCount = 0; 949 950 BinaryBasicBlock *EntryBB = nullptr; 951 if (To == 0) { 952 BF.ExecutionCount += Count; 953 if (!BF.empty()) 954 EntryBB = &BF.front(); 955 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) { 956 if (BB->isEntryPoint()) 957 EntryBB = BB; 958 } 959 960 if (EntryBB) 961 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count); 962 963 return true; 964 } 965 966 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred, 967 uint64_t Count) const { 968 if (!BF.isSimple() || From > BF.getSize()) 969 return false; 970 971 if (!BF.hasProfile()) 972 BF.ExecutionCount = 0; 973 974 return true; 975 } 976 977 ErrorOr<LBREntry> DataAggregator::parseLBREntry() { 978 LBREntry Res; 979 ErrorOr<StringRef> FromStrRes = parseString('/'); 980 if (std::error_code EC = FromStrRes.getError()) 981 return EC; 982 StringRef OffsetStr = FromStrRes.get(); 983 if (OffsetStr.getAsInteger(0, Res.From)) { 984 reportError("expected hexadecimal number with From address"); 985 Diag << "Found: " << OffsetStr << "\n"; 986 return make_error_code(llvm::errc::io_error); 987 } 988 989 ErrorOr<StringRef> ToStrRes = parseString('/'); 990 if (std::error_code EC = ToStrRes.getError()) 991 return EC; 992 OffsetStr = ToStrRes.get(); 993 if (OffsetStr.getAsInteger(0, Res.To)) { 994 reportError("expected hexadecimal number with To address"); 995 Diag << "Found: " << OffsetStr << "\n"; 996 return make_error_code(llvm::errc::io_error); 997 } 998 999 ErrorOr<StringRef> MispredStrRes = parseString('/'); 1000 if (std::error_code EC = MispredStrRes.getError()) 1001 return EC; 1002 StringRef MispredStr = MispredStrRes.get(); 1003 if (MispredStr.size() != 1 || 1004 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) { 1005 reportError("expected single char for mispred bit"); 1006 Diag << "Found: " << MispredStr << "\n"; 1007 return make_error_code(llvm::errc::io_error); 1008 } 1009 Res.Mispred = MispredStr[0] == 'M'; 1010 1011 static bool MispredWarning = true; 1012 if (MispredStr[0] == '-' && MispredWarning) { 1013 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n"; 1014 MispredWarning = false; 1015 } 1016 1017 ErrorOr<StringRef> Rest = parseString(FieldSeparator, true); 1018 if (std::error_code EC = Rest.getError()) 1019 return EC; 1020 if (Rest.get().size() < 5) { 1021 reportError("expected rest of LBR entry"); 1022 Diag << "Found: " << Rest.get() << "\n"; 1023 return make_error_code(llvm::errc::io_error); 1024 } 1025 return Res; 1026 } 1027 1028 bool DataAggregator::checkAndConsumeFS() { 1029 if (ParsingBuf[0] != FieldSeparator) 1030 return false; 1031 1032 ParsingBuf = ParsingBuf.drop_front(1); 1033 Col += 1; 1034 return true; 1035 } 1036 1037 void DataAggregator::consumeRestOfLine() { 1038 size_t LineEnd = ParsingBuf.find_first_of('\n'); 1039 if (LineEnd == StringRef::npos) { 1040 ParsingBuf = StringRef(); 1041 Col = 0; 1042 Line += 1; 1043 return; 1044 } 1045 ParsingBuf = ParsingBuf.drop_front(LineEnd + 1); 1046 Col = 0; 1047 Line += 1; 1048 } 1049 1050 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() { 1051 PerfBranchSample Res; 1052 1053 while (checkAndConsumeFS()) { 1054 } 1055 1056 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1057 if (std::error_code EC = PIDRes.getError()) 1058 return EC; 1059 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1060 if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) { 1061 consumeRestOfLine(); 1062 return make_error_code(errc::no_such_process); 1063 } 1064 1065 while (checkAndConsumeFS()) { 1066 } 1067 1068 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true); 1069 if (std::error_code EC = PCRes.getError()) 1070 return EC; 1071 Res.PC = PCRes.get(); 1072 1073 if (checkAndConsumeNewLine()) 1074 return Res; 1075 1076 while (!checkAndConsumeNewLine()) { 1077 checkAndConsumeFS(); 1078 1079 ErrorOr<LBREntry> LBRRes = parseLBREntry(); 1080 if (std::error_code EC = LBRRes.getError()) 1081 return EC; 1082 LBREntry LBR = LBRRes.get(); 1083 if (ignoreKernelInterrupt(LBR)) 1084 continue; 1085 if (!BC->HasFixedLoadAddress) 1086 adjustLBR(LBR, MMapInfoIter->second); 1087 Res.LBR.push_back(LBR); 1088 } 1089 1090 return Res; 1091 } 1092 1093 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() { 1094 while (checkAndConsumeFS()) { 1095 } 1096 1097 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1098 if (std::error_code EC = PIDRes.getError()) 1099 return EC; 1100 1101 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1102 if (MMapInfoIter == BinaryMMapInfo.end()) { 1103 consumeRestOfLine(); 1104 return PerfBasicSample{StringRef(), 0}; 1105 } 1106 1107 while (checkAndConsumeFS()) { 1108 } 1109 1110 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1111 if (std::error_code EC = Event.getError()) 1112 return EC; 1113 1114 while (checkAndConsumeFS()) { 1115 } 1116 1117 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true); 1118 if (std::error_code EC = AddrRes.getError()) 1119 return EC; 1120 1121 if (!checkAndConsumeNewLine()) { 1122 reportError("expected end of line"); 1123 return make_error_code(llvm::errc::io_error); 1124 } 1125 1126 uint64_t Address = *AddrRes; 1127 if (!BC->HasFixedLoadAddress) 1128 adjustAddress(Address, MMapInfoIter->second); 1129 1130 return PerfBasicSample{Event.get(), Address}; 1131 } 1132 1133 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() { 1134 PerfMemSample Res{0, 0}; 1135 1136 while (checkAndConsumeFS()) { 1137 } 1138 1139 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1140 if (std::error_code EC = PIDRes.getError()) 1141 return EC; 1142 1143 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1144 if (MMapInfoIter == BinaryMMapInfo.end()) { 1145 consumeRestOfLine(); 1146 return Res; 1147 } 1148 1149 while (checkAndConsumeFS()) { 1150 } 1151 1152 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1153 if (std::error_code EC = Event.getError()) 1154 return EC; 1155 if (Event.get().find("mem-loads") == StringRef::npos) { 1156 consumeRestOfLine(); 1157 return Res; 1158 } 1159 1160 while (checkAndConsumeFS()) { 1161 } 1162 1163 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator); 1164 if (std::error_code EC = AddrRes.getError()) 1165 return EC; 1166 1167 while (checkAndConsumeFS()) { 1168 } 1169 1170 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true); 1171 if (std::error_code EC = PCRes.getError()) { 1172 consumeRestOfLine(); 1173 return EC; 1174 } 1175 1176 if (!checkAndConsumeNewLine()) { 1177 reportError("expected end of line"); 1178 return make_error_code(llvm::errc::io_error); 1179 } 1180 1181 uint64_t Address = *AddrRes; 1182 if (!BC->HasFixedLoadAddress) 1183 adjustAddress(Address, MMapInfoIter->second); 1184 1185 return PerfMemSample{PCRes.get(), Address}; 1186 } 1187 1188 ErrorOr<Location> DataAggregator::parseLocationOrOffset() { 1189 auto parseOffset = [this]() -> ErrorOr<Location> { 1190 ErrorOr<uint64_t> Res = parseHexField(FieldSeparator); 1191 if (std::error_code EC = Res.getError()) 1192 return EC; 1193 return Location(Res.get()); 1194 }; 1195 1196 size_t Sep = ParsingBuf.find_first_of(" \n"); 1197 if (Sep == StringRef::npos) 1198 return parseOffset(); 1199 StringRef LookAhead = ParsingBuf.substr(0, Sep); 1200 if (LookAhead.find_first_of(":") == StringRef::npos) 1201 return parseOffset(); 1202 1203 ErrorOr<StringRef> BuildID = parseString(':'); 1204 if (std::error_code EC = BuildID.getError()) 1205 return EC; 1206 ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator); 1207 if (std::error_code EC = Offset.getError()) 1208 return EC; 1209 return Location(true, BuildID.get(), Offset.get()); 1210 } 1211 1212 ErrorOr<DataAggregator::AggregatedLBREntry> 1213 DataAggregator::parseAggregatedLBREntry() { 1214 while (checkAndConsumeFS()) { 1215 } 1216 1217 ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator); 1218 if (std::error_code EC = TypeOrErr.getError()) 1219 return EC; 1220 auto Type = AggregatedLBREntry::BRANCH; 1221 if (TypeOrErr.get() == "B") { 1222 Type = AggregatedLBREntry::BRANCH; 1223 } else if (TypeOrErr.get() == "F") { 1224 Type = AggregatedLBREntry::FT; 1225 } else if (TypeOrErr.get() == "f") { 1226 Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN; 1227 } else { 1228 reportError("expected B, F or f"); 1229 return make_error_code(llvm::errc::io_error); 1230 } 1231 1232 while (checkAndConsumeFS()) { 1233 } 1234 ErrorOr<Location> From = parseLocationOrOffset(); 1235 if (std::error_code EC = From.getError()) 1236 return EC; 1237 1238 while (checkAndConsumeFS()) { 1239 } 1240 ErrorOr<Location> To = parseLocationOrOffset(); 1241 if (std::error_code EC = To.getError()) 1242 return EC; 1243 1244 while (checkAndConsumeFS()) { 1245 } 1246 ErrorOr<int64_t> Frequency = 1247 parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH); 1248 if (std::error_code EC = Frequency.getError()) 1249 return EC; 1250 1251 uint64_t Mispreds = 0; 1252 if (Type == AggregatedLBREntry::BRANCH) { 1253 while (checkAndConsumeFS()) { 1254 } 1255 ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true); 1256 if (std::error_code EC = MispredsOrErr.getError()) 1257 return EC; 1258 Mispreds = static_cast<uint64_t>(MispredsOrErr.get()); 1259 } 1260 1261 if (!checkAndConsumeNewLine()) { 1262 reportError("expected end of line"); 1263 return make_error_code(llvm::errc::io_error); 1264 } 1265 1266 return AggregatedLBREntry{From.get(), To.get(), 1267 static_cast<uint64_t>(Frequency.get()), Mispreds, 1268 Type}; 1269 } 1270 1271 bool DataAggregator::hasData() { 1272 if (ParsingBuf.size() == 0) 1273 return false; 1274 1275 return true; 1276 } 1277 1278 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const { 1279 return opts::IgnoreInterruptLBR && 1280 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr); 1281 } 1282 1283 std::error_code DataAggregator::printLBRHeatMap() { 1284 outs() << "PERF2BOLT: parse branch events...\n"; 1285 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1286 TimerGroupDesc, opts::TimeAggregator); 1287 1288 if (opts::LinuxKernelMode) { 1289 opts::HeatmapMaxAddress = 0xffffffffffffffff; 1290 opts::HeatmapMinAddress = KernelBaseAddr; 1291 } 1292 Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress, 1293 opts::HeatmapMaxAddress); 1294 uint64_t NumTotalSamples = 0; 1295 1296 while (hasData()) { 1297 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1298 if (std::error_code EC = SampleRes.getError()) { 1299 if (EC == errc::no_such_process) 1300 continue; 1301 return EC; 1302 } 1303 1304 PerfBranchSample &Sample = SampleRes.get(); 1305 1306 // LBRs are stored in reverse execution order. NextLBR refers to the next 1307 // executed branch record. 1308 const LBREntry *NextLBR = nullptr; 1309 for (const LBREntry &LBR : Sample.LBR) { 1310 if (NextLBR) { 1311 // Record fall-through trace. 1312 const uint64_t TraceFrom = LBR.To; 1313 const uint64_t TraceTo = NextLBR->From; 1314 ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount; 1315 } 1316 NextLBR = &LBR; 1317 } 1318 if (!Sample.LBR.empty()) { 1319 HM.registerAddress(Sample.LBR.front().To); 1320 HM.registerAddress(Sample.LBR.back().From); 1321 } 1322 NumTotalSamples += Sample.LBR.size(); 1323 } 1324 1325 if (!NumTotalSamples) { 1326 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. " 1327 "Cannot build heatmap.\n"; 1328 exit(1); 1329 } 1330 1331 outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n"; 1332 outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n"; 1333 1334 outs() << "HEATMAP: building heat map...\n"; 1335 1336 for (const auto &LBR : FallthroughLBRs) { 1337 const Trace &Trace = LBR.first; 1338 const FTInfo &Info = LBR.second; 1339 HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount); 1340 } 1341 1342 if (HM.getNumInvalidRanges()) 1343 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n'; 1344 1345 if (!HM.size()) { 1346 errs() << "HEATMAP-ERROR: no valid traces registered\n"; 1347 exit(1); 1348 } 1349 1350 HM.print(opts::HeatmapFile); 1351 if (opts::HeatmapFile == "-") 1352 HM.printCDF(opts::HeatmapFile); 1353 else 1354 HM.printCDF(opts::HeatmapFile + ".csv"); 1355 1356 return std::error_code(); 1357 } 1358 1359 std::error_code DataAggregator::parseBranchEvents() { 1360 outs() << "PERF2BOLT: parse branch events...\n"; 1361 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1362 TimerGroupDesc, opts::TimeAggregator); 1363 1364 uint64_t NumTotalSamples = 0; 1365 uint64_t NumEntries = 0; 1366 uint64_t NumSamples = 0; 1367 uint64_t NumSamplesNoLBR = 0; 1368 uint64_t NumTraces = 0; 1369 bool NeedsSkylakeFix = false; 1370 1371 while (hasData() && NumTotalSamples < opts::MaxSamples) { 1372 ++NumTotalSamples; 1373 1374 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1375 if (std::error_code EC = SampleRes.getError()) { 1376 if (EC == errc::no_such_process) 1377 continue; 1378 return EC; 1379 } 1380 ++NumSamples; 1381 1382 PerfBranchSample &Sample = SampleRes.get(); 1383 if (opts::WriteAutoFDOData) 1384 ++BasicSamples[Sample.PC]; 1385 1386 if (Sample.LBR.empty()) { 1387 ++NumSamplesNoLBR; 1388 continue; 1389 } 1390 1391 NumEntries += Sample.LBR.size(); 1392 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) { 1393 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n"; 1394 NeedsSkylakeFix = true; 1395 } 1396 1397 // LBRs are stored in reverse execution order. NextPC refers to the next 1398 // recorded executed PC. 1399 uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0; 1400 uint32_t NumEntry = 0; 1401 for (const LBREntry &LBR : Sample.LBR) { 1402 ++NumEntry; 1403 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries) 1404 // sometimes record entry 32 as an exact copy of entry 31. This will cause 1405 // us to likely record an invalid trace and generate a stale function for 1406 // BAT mode (non BAT disassembles the function and is able to ignore this 1407 // trace at aggregation time). Drop first 2 entries (last two, in 1408 // chronological order) 1409 if (NeedsSkylakeFix && NumEntry <= 2) 1410 continue; 1411 if (NextPC) { 1412 // Record fall-through trace. 1413 const uint64_t TraceFrom = LBR.To; 1414 const uint64_t TraceTo = NextPC; 1415 const BinaryFunction *TraceBF = 1416 getBinaryFunctionContainingAddress(TraceFrom); 1417 if (TraceBF && TraceBF->containsAddress(TraceTo)) { 1418 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; 1419 if (TraceBF->containsAddress(LBR.From)) 1420 ++Info.InternCount; 1421 else 1422 ++Info.ExternCount; 1423 } else { 1424 if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) { 1425 LLVM_DEBUG(dbgs() 1426 << "Invalid trace starting in " 1427 << TraceBF->getPrintName() << " @ " 1428 << Twine::utohexstr(TraceFrom - TraceBF->getAddress()) 1429 << " and ending @ " << Twine::utohexstr(TraceTo) 1430 << '\n'); 1431 ++NumInvalidTraces; 1432 } else { 1433 LLVM_DEBUG(dbgs() 1434 << "Out of range trace starting in " 1435 << (TraceBF ? TraceBF->getPrintName() : "None") << " @ " 1436 << Twine::utohexstr( 1437 TraceFrom - (TraceBF ? TraceBF->getAddress() : 0)) 1438 << " and ending in " 1439 << (getBinaryFunctionContainingAddress(TraceTo) 1440 ? getBinaryFunctionContainingAddress(TraceTo) 1441 ->getPrintName() 1442 : "None") 1443 << " @ " 1444 << Twine::utohexstr( 1445 TraceTo - 1446 (getBinaryFunctionContainingAddress(TraceTo) 1447 ? getBinaryFunctionContainingAddress(TraceTo) 1448 ->getAddress() 1449 : 0)) 1450 << '\n'); 1451 ++NumLongRangeTraces; 1452 } 1453 } 1454 ++NumTraces; 1455 } 1456 NextPC = LBR.From; 1457 1458 uint64_t From = LBR.From; 1459 if (!getBinaryFunctionContainingAddress(From)) 1460 From = 0; 1461 uint64_t To = LBR.To; 1462 if (!getBinaryFunctionContainingAddress(To)) 1463 To = 0; 1464 if (!From && !To) 1465 continue; 1466 BranchInfo &Info = BranchLBRs[Trace(From, To)]; 1467 ++Info.TakenCount; 1468 Info.MispredCount += LBR.Mispred; 1469 } 1470 } 1471 1472 for (const auto &LBR : BranchLBRs) { 1473 const Trace &Trace = LBR.first; 1474 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From)) 1475 BF->setHasProfileAvailable(); 1476 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To)) 1477 BF->setHasProfileAvailable(); 1478 } 1479 1480 auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) { 1481 OS << " ("; 1482 if (OS.has_colors()) { 1483 if (Percent > T2) 1484 OS.changeColor(raw_ostream::RED); 1485 else if (Percent > T1) 1486 OS.changeColor(raw_ostream::YELLOW); 1487 else 1488 OS.changeColor(raw_ostream::GREEN); 1489 } 1490 OS << format("%.1f%%", Percent); 1491 if (OS.has_colors()) 1492 OS.resetColor(); 1493 OS << ")"; 1494 }; 1495 1496 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries 1497 << " LBR entries\n"; 1498 if (NumTotalSamples) { 1499 if (NumSamples && NumSamplesNoLBR == NumSamples) { 1500 // Note: we don't know if perf2bolt is being used to parse memory samples 1501 // at this point. In this case, it is OK to parse zero LBRs. 1502 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack " 1503 "LBR. Record profile with perf record -j any or run perf2bolt " 1504 "in no-LBR mode with -nl (the performance improvement in -nl " 1505 "mode may be limited)\n"; 1506 } else { 1507 const uint64_t IgnoredSamples = NumTotalSamples - NumSamples; 1508 const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples; 1509 outs() << "PERF2BOLT: " << IgnoredSamples << " samples"; 1510 printColored(outs(), PercentIgnored, 20, 50); 1511 outs() << " were ignored\n"; 1512 if (PercentIgnored > 50.0f) 1513 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples " 1514 "were attributed to the input binary\n"; 1515 } 1516 } 1517 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1518 << NumInvalidTraces; 1519 float Perc = 0.0f; 1520 if (NumTraces > 0) { 1521 Perc = NumInvalidTraces * 100.0f / NumTraces; 1522 printColored(outs(), Perc, 5, 10); 1523 } 1524 outs() << "\n"; 1525 if (Perc > 10.0f) 1526 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1527 "binary is probably not the same binary used during profiling " 1528 "collection. The generated data may be ineffective for improving " 1529 "performance.\n\n"; 1530 1531 outs() << "PERF2BOLT: out of range traces involving unknown regions: " 1532 << NumLongRangeTraces; 1533 if (NumTraces > 0) 1534 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1535 outs() << "\n"; 1536 1537 if (NumColdSamples > 0) { 1538 const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples; 1539 outs() << "PERF2BOLT: " << NumColdSamples 1540 << format(" (%.1f%%)", ColdSamples) 1541 << " samples recorded in cold regions of split functions.\n"; 1542 if (ColdSamples > 5.0f) 1543 outs() 1544 << "WARNING: The BOLT-processed binary where samples were collected " 1545 "likely used bad data or your service observed a large shift in " 1546 "profile. You may want to audit this.\n"; 1547 } 1548 1549 return std::error_code(); 1550 } 1551 1552 void DataAggregator::processBranchEvents() { 1553 outs() << "PERF2BOLT: processing branch events...\n"; 1554 NamedRegionTimer T("processBranch", "Processing branch events", 1555 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1556 1557 for (const auto &AggrLBR : FallthroughLBRs) { 1558 const Trace &Loc = AggrLBR.first; 1559 const FTInfo &Info = AggrLBR.second; 1560 LBREntry First{Loc.From, Loc.From, false}; 1561 LBREntry Second{Loc.To, Loc.To, false}; 1562 if (Info.InternCount) 1563 doTrace(First, Second, Info.InternCount); 1564 if (Info.ExternCount) { 1565 First.From = 0; 1566 doTrace(First, Second, Info.ExternCount); 1567 } 1568 } 1569 1570 for (const auto &AggrLBR : BranchLBRs) { 1571 const Trace &Loc = AggrLBR.first; 1572 const BranchInfo &Info = AggrLBR.second; 1573 doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount); 1574 } 1575 } 1576 1577 std::error_code DataAggregator::parseBasicEvents() { 1578 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n"; 1579 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName, 1580 TimerGroupDesc, opts::TimeAggregator); 1581 while (hasData()) { 1582 ErrorOr<PerfBasicSample> Sample = parseBasicSample(); 1583 if (std::error_code EC = Sample.getError()) 1584 return EC; 1585 1586 if (!Sample->PC) 1587 continue; 1588 1589 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1590 BF->setHasProfileAvailable(); 1591 1592 ++BasicSamples[Sample->PC]; 1593 EventNames.insert(Sample->EventName); 1594 } 1595 1596 return std::error_code(); 1597 } 1598 1599 void DataAggregator::processBasicEvents() { 1600 outs() << "PERF2BOLT: processing basic events (without LBR)...\n"; 1601 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName, 1602 TimerGroupDesc, opts::TimeAggregator); 1603 uint64_t OutOfRangeSamples = 0; 1604 uint64_t NumSamples = 0; 1605 for (auto &Sample : BasicSamples) { 1606 const uint64_t PC = Sample.first; 1607 const uint64_t HitCount = Sample.second; 1608 NumSamples += HitCount; 1609 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1610 if (!Func) { 1611 OutOfRangeSamples += HitCount; 1612 continue; 1613 } 1614 1615 doSample(*Func, PC, HitCount); 1616 } 1617 outs() << "PERF2BOLT: read " << NumSamples << " samples\n"; 1618 1619 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: " 1620 << OutOfRangeSamples; 1621 float Perc = 0.0f; 1622 if (NumSamples > 0) { 1623 outs() << " ("; 1624 Perc = OutOfRangeSamples * 100.0f / NumSamples; 1625 if (outs().has_colors()) { 1626 if (Perc > 60.0f) 1627 outs().changeColor(raw_ostream::RED); 1628 else if (Perc > 40.0f) 1629 outs().changeColor(raw_ostream::YELLOW); 1630 else 1631 outs().changeColor(raw_ostream::GREEN); 1632 } 1633 outs() << format("%.1f%%", Perc); 1634 if (outs().has_colors()) 1635 outs().resetColor(); 1636 outs() << ")"; 1637 } 1638 outs() << "\n"; 1639 if (Perc > 80.0f) 1640 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1641 "binary is probably not the same binary used during profiling " 1642 "collection. The generated data may be ineffective for improving " 1643 "performance.\n\n"; 1644 } 1645 1646 std::error_code DataAggregator::parseMemEvents() { 1647 outs() << "PERF2BOLT: parsing memory events...\n"; 1648 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName, 1649 TimerGroupDesc, opts::TimeAggregator); 1650 while (hasData()) { 1651 ErrorOr<PerfMemSample> Sample = parseMemSample(); 1652 if (std::error_code EC = Sample.getError()) 1653 return EC; 1654 1655 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1656 BF->setHasProfileAvailable(); 1657 1658 MemSamples.emplace_back(std::move(Sample.get())); 1659 } 1660 1661 return std::error_code(); 1662 } 1663 1664 void DataAggregator::processMemEvents() { 1665 NamedRegionTimer T("ProcessMemEvents", "Processing mem events", 1666 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1667 for (const PerfMemSample &Sample : MemSamples) { 1668 uint64_t PC = Sample.PC; 1669 uint64_t Addr = Sample.Addr; 1670 StringRef FuncName; 1671 StringRef MemName; 1672 1673 // Try to resolve symbol for PC 1674 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1675 if (!Func) { 1676 LLVM_DEBUG(if (PC != 0) { 1677 dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x" 1678 << Twine::utohexstr(Addr) << "\n"; 1679 }); 1680 continue; 1681 } 1682 1683 FuncName = Func->getOneName(); 1684 PC -= Func->getAddress(); 1685 1686 // Try to resolve symbol for memory load 1687 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) { 1688 MemName = BD->getName(); 1689 Addr -= BD->getAddress(); 1690 } else if (opts::FilterMemProfile) { 1691 // Filter out heap/stack accesses 1692 continue; 1693 } 1694 1695 const Location FuncLoc(!FuncName.empty(), FuncName, PC); 1696 const Location AddrLoc(!MemName.empty(), MemName, Addr); 1697 1698 FuncMemData *MemData = &NamesToMemEvents[FuncName]; 1699 setMemData(*Func, MemData); 1700 MemData->update(FuncLoc, AddrLoc); 1701 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n"); 1702 } 1703 } 1704 1705 std::error_code DataAggregator::parsePreAggregatedLBRSamples() { 1706 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; 1707 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", 1708 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1709 while (hasData()) { 1710 ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry(); 1711 if (std::error_code EC = AggrEntry.getError()) 1712 return EC; 1713 1714 if (BinaryFunction *BF = 1715 getBinaryFunctionContainingAddress(AggrEntry->From.Offset)) 1716 BF->setHasProfileAvailable(); 1717 if (BinaryFunction *BF = 1718 getBinaryFunctionContainingAddress(AggrEntry->To.Offset)) 1719 BF->setHasProfileAvailable(); 1720 1721 AggregatedLBRs.emplace_back(std::move(AggrEntry.get())); 1722 } 1723 1724 return std::error_code(); 1725 } 1726 1727 void DataAggregator::processPreAggregated() { 1728 outs() << "PERF2BOLT: processing pre-aggregated profile...\n"; 1729 NamedRegionTimer T("processAggregated", "Processing aggregated branch events", 1730 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1731 1732 uint64_t NumTraces = 0; 1733 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { 1734 switch (AggrEntry.EntryType) { 1735 case AggregatedLBREntry::BRANCH: 1736 doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, 1737 AggrEntry.Mispreds); 1738 break; 1739 case AggregatedLBREntry::FT: 1740 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { 1741 LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT 1742 ? AggrEntry.From.Offset 1743 : 0, 1744 AggrEntry.From.Offset, false}; 1745 LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false}; 1746 doTrace(First, Second, AggrEntry.Count); 1747 NumTraces += AggrEntry.Count; 1748 break; 1749 } 1750 } 1751 } 1752 1753 outs() << "PERF2BOLT: read " << AggregatedLBRs.size() 1754 << " aggregated LBR entries\n"; 1755 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1756 << NumInvalidTraces; 1757 float Perc = 0.0f; 1758 if (NumTraces > 0) { 1759 outs() << " ("; 1760 Perc = NumInvalidTraces * 100.0f / NumTraces; 1761 if (outs().has_colors()) { 1762 if (Perc > 10.0f) 1763 outs().changeColor(raw_ostream::RED); 1764 else if (Perc > 5.0f) 1765 outs().changeColor(raw_ostream::YELLOW); 1766 else 1767 outs().changeColor(raw_ostream::GREEN); 1768 } 1769 outs() << format("%.1f%%", Perc); 1770 if (outs().has_colors()) 1771 outs().resetColor(); 1772 outs() << ")"; 1773 } 1774 outs() << "\n"; 1775 if (Perc > 10.0f) 1776 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1777 "binary is probably not the same binary used during profiling " 1778 "collection. The generated data may be ineffective for improving " 1779 "performance.\n\n"; 1780 1781 outs() << "PERF2BOLT: Out of range traces involving unknown regions: " 1782 << NumLongRangeTraces; 1783 if (NumTraces > 0) 1784 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1785 outs() << "\n"; 1786 } 1787 1788 Optional<int32_t> DataAggregator::parseCommExecEvent() { 1789 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1790 if (LineEnd == StringRef::npos) { 1791 reportError("expected rest of line"); 1792 Diag << "Found: " << ParsingBuf << "\n"; 1793 return NoneType(); 1794 } 1795 StringRef Line = ParsingBuf.substr(0, LineEnd); 1796 1797 size_t Pos = Line.find("PERF_RECORD_COMM exec"); 1798 if (Pos == StringRef::npos) 1799 return NoneType(); 1800 Line = Line.drop_front(Pos); 1801 1802 // Line: 1803 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>" 1804 StringRef PIDStr = Line.rsplit(':').second.split('/').first; 1805 int32_t PID; 1806 if (PIDStr.getAsInteger(10, PID)) { 1807 reportError("expected PID"); 1808 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1809 return NoneType(); 1810 } 1811 1812 return PID; 1813 } 1814 1815 namespace { 1816 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) { 1817 const StringRef SecTimeStr = TimeStr.split('.').first; 1818 const StringRef USecTimeStr = TimeStr.split('.').second; 1819 uint64_t SecTime; 1820 uint64_t USecTime; 1821 if (SecTimeStr.getAsInteger(10, SecTime) || 1822 USecTimeStr.getAsInteger(10, USecTime)) 1823 return NoneType(); 1824 return SecTime * 1000000ULL + USecTime; 1825 } 1826 } 1827 1828 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() { 1829 while (checkAndConsumeFS()) { 1830 } 1831 1832 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1833 if (LineEnd == StringRef::npos) { 1834 reportError("expected rest of line"); 1835 Diag << "Found: " << ParsingBuf << "\n"; 1836 return NoneType(); 1837 } 1838 StringRef Line = ParsingBuf.substr(0, LineEnd); 1839 1840 size_t Pos = Line.find("PERF_RECORD_FORK"); 1841 if (Pos == StringRef::npos) { 1842 consumeRestOfLine(); 1843 return NoneType(); 1844 } 1845 1846 ForkInfo FI; 1847 1848 const StringRef TimeStr = 1849 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1850 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) { 1851 FI.Time = *TimeRes; 1852 } 1853 1854 Line = Line.drop_front(Pos); 1855 1856 // Line: 1857 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>) 1858 const StringRef ChildPIDStr = Line.split('(').second.split(':').first; 1859 if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) { 1860 reportError("expected PID"); 1861 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n"; 1862 return NoneType(); 1863 } 1864 1865 const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first; 1866 if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) { 1867 reportError("expected PID"); 1868 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n"; 1869 return NoneType(); 1870 } 1871 1872 consumeRestOfLine(); 1873 1874 return FI; 1875 } 1876 1877 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>> 1878 DataAggregator::parseMMapEvent() { 1879 while (checkAndConsumeFS()) { 1880 } 1881 1882 MMapInfo ParsedInfo; 1883 1884 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1885 if (LineEnd == StringRef::npos) { 1886 reportError("expected rest of line"); 1887 Diag << "Found: " << ParsingBuf << "\n"; 1888 return make_error_code(llvm::errc::io_error); 1889 } 1890 StringRef Line = ParsingBuf.substr(0, LineEnd); 1891 1892 size_t Pos = Line.find("PERF_RECORD_MMAP2"); 1893 if (Pos == StringRef::npos) { 1894 consumeRestOfLine(); 1895 return std::make_pair(StringRef(), ParsedInfo); 1896 } 1897 1898 // Line: 1899 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name> 1900 1901 const StringRef TimeStr = 1902 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1903 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) 1904 ParsedInfo.Time = *TimeRes; 1905 1906 Line = Line.drop_front(Pos); 1907 1908 // Line: 1909 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name> 1910 1911 StringRef FileName = Line.rsplit(FieldSeparator).second; 1912 if (FileName.startswith("//") || FileName.startswith("[")) { 1913 consumeRestOfLine(); 1914 return std::make_pair(StringRef(), ParsedInfo); 1915 } 1916 FileName = sys::path::filename(FileName); 1917 1918 const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first; 1919 if (PIDStr.getAsInteger(10, ParsedInfo.PID)) { 1920 reportError("expected PID"); 1921 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1922 return make_error_code(llvm::errc::io_error); 1923 } 1924 1925 const StringRef BaseAddressStr = Line.split('[').second.split('(').first; 1926 if (BaseAddressStr.getAsInteger(0, ParsedInfo.BaseAddress)) { 1927 reportError("expected base address"); 1928 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n"; 1929 return make_error_code(llvm::errc::io_error); 1930 } 1931 1932 const StringRef SizeStr = Line.split('(').second.split(')').first; 1933 if (SizeStr.getAsInteger(0, ParsedInfo.Size)) { 1934 reportError("expected mmaped size"); 1935 Diag << "Found: " << SizeStr << "in '" << Line << "'\n"; 1936 return make_error_code(llvm::errc::io_error); 1937 } 1938 1939 const StringRef OffsetStr = 1940 Line.split('@').second.ltrim().split(FieldSeparator).first; 1941 if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) { 1942 reportError("expected mmaped page-aligned offset"); 1943 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n"; 1944 return make_error_code(llvm::errc::io_error); 1945 } 1946 1947 consumeRestOfLine(); 1948 1949 return std::make_pair(FileName, ParsedInfo); 1950 } 1951 1952 std::error_code DataAggregator::parseMMapEvents() { 1953 outs() << "PERF2BOLT: parsing perf-script mmap events output\n"; 1954 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName, 1955 TimerGroupDesc, opts::TimeAggregator); 1956 1957 std::multimap<StringRef, MMapInfo> GlobalMMapInfo; 1958 while (hasData()) { 1959 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent(); 1960 if (std::error_code EC = FileMMapInfoRes.getError()) 1961 return EC; 1962 1963 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get(); 1964 if (FileMMapInfo.second.PID == -1) 1965 continue; 1966 1967 // Consider only the first mapping of the file for any given PID 1968 bool PIDExists = false; 1969 auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first); 1970 for (auto MI = Range.first; MI != Range.second; ++MI) { 1971 if (MI->second.PID == FileMMapInfo.second.PID) { 1972 PIDExists = true; 1973 break; 1974 } 1975 } 1976 if (PIDExists) 1977 continue; 1978 1979 GlobalMMapInfo.insert(FileMMapInfo); 1980 } 1981 1982 LLVM_DEBUG({ 1983 dbgs() << "FileName -> mmap info:\n"; 1984 for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo) 1985 dbgs() << " " << Pair.first << " : " << Pair.second.PID << " [0x" 1986 << Twine::utohexstr(Pair.second.BaseAddress) << ", " 1987 << Twine::utohexstr(Pair.second.Size) << " @ " 1988 << Twine::utohexstr(Pair.second.Offset) << "]\n"; 1989 }); 1990 1991 StringRef NameToUse = llvm::sys::path::filename(BC->getFilename()); 1992 if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) { 1993 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName 1994 << "\" for profile matching\n"; 1995 NameToUse = BuildIDBinaryName; 1996 } 1997 1998 auto Range = GlobalMMapInfo.equal_range(NameToUse); 1999 for (auto I = Range.first; I != Range.second; ++I) { 2000 const MMapInfo &MMapInfo = I->second; 2001 if (BC->HasFixedLoadAddress && MMapInfo.BaseAddress) { 2002 // Check that the binary mapping matches one of the segments. 2003 bool MatchFound = false; 2004 for (auto &KV : BC->SegmentMapInfo) { 2005 SegmentInfo &SegInfo = KV.second; 2006 // The mapping is page-aligned and hence the BaseAddress could be 2007 // different from the segment start address. We cannot know the page 2008 // size of the mapping, but we know it should not exceed the segment 2009 // alignment value. Hence we are performing an approximate check. 2010 if (SegInfo.Address >= MMapInfo.BaseAddress && 2011 SegInfo.Address - MMapInfo.BaseAddress < SegInfo.Alignment) { 2012 MatchFound = true; 2013 break; 2014 } 2015 } 2016 if (!MatchFound) { 2017 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse 2018 << " at 0x" << Twine::utohexstr(MMapInfo.BaseAddress) << '\n'; 2019 continue; 2020 } 2021 } 2022 2023 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); 2024 } 2025 2026 if (BinaryMMapInfo.empty()) { 2027 if (errs().has_colors()) 2028 errs().changeColor(raw_ostream::RED); 2029 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \"" 2030 << BC->getFilename() << "\"."; 2031 if (!GlobalMMapInfo.empty()) { 2032 errs() << " Profile for the following binary name(s) is available:\n"; 2033 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE; 2034 I = GlobalMMapInfo.upper_bound(I->first)) 2035 errs() << " " << I->first << '\n'; 2036 errs() << "Please rename the input binary.\n"; 2037 } else { 2038 errs() << " Failed to extract any binary name from a profile.\n"; 2039 } 2040 if (errs().has_colors()) 2041 errs().resetColor(); 2042 2043 exit(1); 2044 } 2045 2046 return std::error_code(); 2047 } 2048 2049 std::error_code DataAggregator::parseTaskEvents() { 2050 outs() << "PERF2BOLT: parsing perf-script task events output\n"; 2051 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName, 2052 TimerGroupDesc, opts::TimeAggregator); 2053 2054 while (hasData()) { 2055 if (Optional<int32_t> CommInfo = parseCommExecEvent()) { 2056 // Remove forked child that ran execve 2057 auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo); 2058 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked) 2059 BinaryMMapInfo.erase(MMapInfoIter); 2060 consumeRestOfLine(); 2061 continue; 2062 } 2063 2064 Optional<ForkInfo> ForkInfo = parseForkEvent(); 2065 if (!ForkInfo) 2066 continue; 2067 2068 if (ForkInfo->ParentPID == ForkInfo->ChildPID) 2069 continue; 2070 2071 if (ForkInfo->Time == 0) { 2072 // Process was forked and mmaped before perf ran. In this case the child 2073 // should have its own mmap entry unless it was execve'd. 2074 continue; 2075 } 2076 2077 auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID); 2078 if (MMapInfoIter == BinaryMMapInfo.end()) 2079 continue; 2080 2081 MMapInfo MMapInfo = MMapInfoIter->second; 2082 MMapInfo.PID = ForkInfo->ChildPID; 2083 MMapInfo.Forked = true; 2084 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); 2085 } 2086 2087 outs() << "PERF2BOLT: input binary is associated with " 2088 << BinaryMMapInfo.size() << " PID(s)\n"; 2089 2090 LLVM_DEBUG({ 2091 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) 2092 outs() << " " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "") 2093 << ": (0x" << Twine::utohexstr(MMI.second.BaseAddress) << ": 0x" 2094 << Twine::utohexstr(MMI.second.Size) << ")\n"; 2095 }); 2096 2097 return std::error_code(); 2098 } 2099 2100 Optional<std::pair<StringRef, StringRef>> 2101 DataAggregator::parseNameBuildIDPair() { 2102 while (checkAndConsumeFS()) { 2103 } 2104 2105 ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true); 2106 if (std::error_code EC = BuildIDStr.getError()) 2107 return NoneType(); 2108 2109 ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true); 2110 if (std::error_code EC = NameStr.getError()) 2111 return NoneType(); 2112 2113 consumeRestOfLine(); 2114 return std::make_pair(NameStr.get(), BuildIDStr.get()); 2115 } 2116 2117 Optional<StringRef> 2118 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) { 2119 while (hasData()) { 2120 Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair(); 2121 if (!IDPair) 2122 return NoneType(); 2123 2124 if (IDPair->second.startswith(FileBuildID)) 2125 return sys::path::filename(IDPair->first); 2126 } 2127 return NoneType(); 2128 } 2129 2130 std::error_code 2131 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { 2132 std::error_code EC; 2133 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 2134 if (EC) 2135 return EC; 2136 2137 bool WriteMemLocs = false; 2138 2139 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) { 2140 if (WriteMemLocs) 2141 OutFile << (Loc.IsSymbol ? "4 " : "3 "); 2142 else 2143 OutFile << (Loc.IsSymbol ? "1 " : "0 "); 2144 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name)) 2145 << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator; 2146 }; 2147 2148 uint64_t BranchValues = 0; 2149 uint64_t MemValues = 0; 2150 2151 if (BAT) 2152 OutFile << "boltedcollection\n"; 2153 if (opts::BasicAggregation) { 2154 OutFile << "no_lbr"; 2155 for (const StringMapEntry<NoneType> &Entry : EventNames) 2156 OutFile << " " << Entry.getKey(); 2157 OutFile << "\n"; 2158 2159 for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) { 2160 for (const SampleInfo &SI : Func.getValue().Data) { 2161 writeLocation(SI.Loc); 2162 OutFile << SI.Hits << "\n"; 2163 ++BranchValues; 2164 } 2165 } 2166 } else { 2167 for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) { 2168 for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) { 2169 writeLocation(BI.From); 2170 writeLocation(BI.To); 2171 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2172 ++BranchValues; 2173 } 2174 for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) { 2175 // Do not output if source is a known symbol, since this was already 2176 // accounted for in the source function 2177 if (BI.From.IsSymbol) 2178 continue; 2179 writeLocation(BI.From); 2180 writeLocation(BI.To); 2181 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2182 ++BranchValues; 2183 } 2184 } 2185 2186 WriteMemLocs = true; 2187 for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) { 2188 for (const MemInfo &MemEvent : Func.getValue().Data) { 2189 writeLocation(MemEvent.Offset); 2190 writeLocation(MemEvent.Addr); 2191 OutFile << MemEvent.Count << "\n"; 2192 ++MemValues; 2193 } 2194 } 2195 } 2196 2197 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues 2198 << " memory objects to " << OutputFilename << "\n"; 2199 2200 return std::error_code(); 2201 } 2202 2203 void DataAggregator::dump() const { DataReader::dump(); } 2204 2205 void DataAggregator::dump(const LBREntry &LBR) const { 2206 Diag << "From: " << Twine::utohexstr(LBR.From) 2207 << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred 2208 << "\n"; 2209 } 2210 2211 void DataAggregator::dump(const PerfBranchSample &Sample) const { 2212 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n"; 2213 for (const LBREntry &LBR : Sample.LBR) 2214 dump(LBR); 2215 } 2216 2217 void DataAggregator::dump(const PerfMemSample &Sample) const { 2218 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n"; 2219 } 2220