1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions reads profile data written by perf record, 10 // aggregate it and then write it back to an output file. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "bolt/Profile/DataAggregator.h" 15 #include "bolt/Core/BinaryContext.h" 16 #include "bolt/Core/BinaryFunction.h" 17 #include "bolt/Profile/BoltAddressTranslation.h" 18 #include "bolt/Profile/Heatmap.h" 19 #include "bolt/Utils/CommandLineOpts.h" 20 #include "bolt/Utils/Utils.h" 21 #include "llvm/ADT/ScopeExit.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/FileSystem.h" 25 #include "llvm/Support/Process.h" 26 #include "llvm/Support/Program.h" 27 #include "llvm/Support/Regex.h" 28 #include "llvm/Support/Timer.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <map> 31 #include <unordered_map> 32 #include <utility> 33 34 #define DEBUG_TYPE "aggregator" 35 36 using namespace llvm; 37 using namespace bolt; 38 39 namespace opts { 40 41 static cl::opt<bool> 42 BasicAggregation("nl", 43 cl::desc("aggregate basic samples (without LBR info)"), 44 cl::init(false), 45 cl::ZeroOrMore, 46 cl::cat(AggregatorCategory)); 47 48 static cl::opt<bool> 49 FilterMemProfile("filter-mem-profile", 50 cl::desc("if processing a memory profile, filter out stack or heap accesses " 51 "that won't be useful for BOLT to reduce profile file size"), 52 cl::init(true), 53 cl::cat(AggregatorCategory)); 54 55 static cl::opt<unsigned long long> 56 FilterPID("pid", 57 cl::desc("only use samples from process with specified PID"), 58 cl::init(0), 59 cl::Optional, 60 cl::cat(AggregatorCategory)); 61 62 static cl::opt<bool> 63 IgnoreBuildID("ignore-build-id", 64 cl::desc("continue even if build-ids in input binary and perf.data mismatch"), 65 cl::init(false), 66 cl::cat(AggregatorCategory)); 67 68 static cl::opt<bool> 69 IgnoreInterruptLBR("ignore-interrupt-lbr", 70 cl::desc("ignore kernel interrupt LBR that happens asynchronously"), 71 cl::init(true), 72 cl::ZeroOrMore, 73 cl::cat(AggregatorCategory)); 74 75 static cl::opt<unsigned long long> 76 MaxSamples("max-samples", 77 cl::init(-1ULL), 78 cl::desc("maximum number of samples to read from LBR profile"), 79 cl::Optional, 80 cl::Hidden, 81 cl::cat(AggregatorCategory)); 82 83 static cl::opt<bool> 84 ReadPreAggregated("pa", 85 cl::desc("skip perf and read data from a pre-aggregated file format"), 86 cl::init(false), 87 cl::ZeroOrMore, 88 cl::cat(AggregatorCategory)); 89 90 static cl::opt<bool> 91 TimeAggregator("time-aggr", 92 cl::desc("time BOLT aggregator"), 93 cl::init(false), 94 cl::ZeroOrMore, 95 cl::cat(AggregatorCategory)); 96 97 static cl::opt<bool> 98 UseEventPC("use-event-pc", 99 cl::desc("use event PC in combination with LBR sampling"), 100 cl::init(false), 101 cl::ZeroOrMore, 102 cl::cat(AggregatorCategory)); 103 104 static cl::opt<bool> 105 WriteAutoFDOData("autofdo", 106 cl::desc("generate autofdo textual data instead of bolt data"), 107 cl::init(false), 108 cl::ZeroOrMore, 109 cl::cat(AggregatorCategory)); 110 111 } // namespace opts 112 113 namespace { 114 115 const char TimerGroupName[] = "aggregator"; 116 const char TimerGroupDesc[] = "Aggregator"; 117 118 } 119 120 constexpr uint64_t DataAggregator::KernelBaseAddr; 121 122 DataAggregator::~DataAggregator() { deleteTempFiles(); } 123 124 namespace { 125 void deleteTempFile(const std::string &FileName) { 126 if (std::error_code Errc = sys::fs::remove(FileName.c_str())) 127 errs() << "PERF2BOLT: failed to delete temporary file " << FileName 128 << " with error " << Errc.message() << "\n"; 129 } 130 } 131 132 void DataAggregator::deleteTempFiles() { 133 for (std::string &FileName : TempFiles) 134 deleteTempFile(FileName); 135 TempFiles.clear(); 136 } 137 138 void DataAggregator::findPerfExecutable() { 139 Optional<std::string> PerfExecutable = 140 sys::Process::FindInEnvPath("PATH", "perf"); 141 if (!PerfExecutable) { 142 outs() << "PERF2BOLT: No perf executable found!\n"; 143 exit(1); 144 } 145 PerfPath = *PerfExecutable; 146 } 147 148 void DataAggregator::start() { 149 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; 150 151 // Don't launch perf for pre-aggregated files 152 if (opts::ReadPreAggregated) 153 return; 154 155 findPerfExecutable(); 156 157 if (opts::BasicAggregation) 158 launchPerfProcess("events without LBR", 159 MainEventsPPI, 160 "script -F pid,event,ip", 161 /*Wait = */false); 162 else 163 launchPerfProcess("branch events", 164 MainEventsPPI, 165 "script -F pid,ip,brstack", 166 /*Wait = */false); 167 168 // Note: we launch script for mem events regardless of the option, as the 169 // command fails fairly fast if mem events were not collected. 170 launchPerfProcess("mem events", 171 MemEventsPPI, 172 "script -F pid,event,addr,ip", 173 /*Wait = */false); 174 175 launchPerfProcess("process events", 176 MMapEventsPPI, 177 "script --show-mmap-events", 178 /*Wait = */false); 179 180 launchPerfProcess("task events", 181 TaskEventsPPI, 182 "script --show-task-events", 183 /*Wait = */false); 184 } 185 186 void DataAggregator::abort() { 187 if (opts::ReadPreAggregated) 188 return; 189 190 std::string Error; 191 192 // Kill subprocesses in case they are not finished 193 sys::Wait(TaskEventsPPI.PI, 1, false, &Error); 194 sys::Wait(MMapEventsPPI.PI, 1, false, &Error); 195 sys::Wait(MainEventsPPI.PI, 1, false, &Error); 196 sys::Wait(MemEventsPPI.PI, 1, false, &Error); 197 198 deleteTempFiles(); 199 200 exit(1); 201 } 202 203 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, 204 const char *ArgsString, bool Wait) { 205 SmallVector<StringRef, 4> Argv; 206 207 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n'; 208 Argv.push_back(PerfPath.data()); 209 210 char *WritableArgsString = strdup(ArgsString); 211 char *Str = WritableArgsString; 212 do { 213 Argv.push_back(Str); 214 while (*Str && *Str != ' ') 215 ++Str; 216 if (!*Str) 217 break; 218 *Str++ = 0; 219 } while (true); 220 221 Argv.push_back("-f"); 222 Argv.push_back("-i"); 223 Argv.push_back(Filename.c_str()); 224 225 if (std::error_code Errc = 226 sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) { 227 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath 228 << " with error " << Errc.message() << "\n"; 229 exit(1); 230 } 231 TempFiles.push_back(PPI.StdoutPath.data()); 232 233 if (std::error_code Errc = 234 sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) { 235 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath 236 << " with error " << Errc.message() << "\n"; 237 exit(1); 238 } 239 TempFiles.push_back(PPI.StderrPath.data()); 240 241 Optional<StringRef> Redirects[] = { 242 llvm::None, // Stdin 243 StringRef(PPI.StdoutPath.data()), // Stdout 244 StringRef(PPI.StderrPath.data())}; // Stderr 245 246 LLVM_DEBUG({ 247 dbgs() << "Launching perf: "; 248 for (StringRef Arg : Argv) 249 dbgs() << Arg << " "; 250 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data() 251 << "\n"; 252 }); 253 254 if (Wait) 255 PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv, 256 /*envp*/ llvm::None, Redirects); 257 else 258 PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None, 259 Redirects); 260 261 free(WritableArgsString); 262 } 263 264 void DataAggregator::processFileBuildID(StringRef FileBuildID) { 265 PerfProcessInfo BuildIDProcessInfo; 266 launchPerfProcess("buildid list", 267 BuildIDProcessInfo, 268 "buildid-list", 269 /*Wait = */true); 270 271 if (BuildIDProcessInfo.PI.ReturnCode != 0) { 272 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 273 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data()); 274 StringRef ErrBuf = (*MB)->getBuffer(); 275 276 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode 277 << '\n'; 278 errs() << ErrBuf; 279 return; 280 } 281 282 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 283 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data()); 284 if (std::error_code EC = MB.getError()) { 285 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": " 286 << EC.message() << "\n"; 287 return; 288 } 289 290 FileBuf = std::move(*MB); 291 ParsingBuf = FileBuf->getBuffer(); 292 if (ParsingBuf.empty()) { 293 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf " 294 "data was recorded without it\n"; 295 return; 296 } 297 298 Col = 0; 299 Line = 1; 300 Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID); 301 if (!FileName) { 302 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. " 303 "This indicates the input binary supplied for data aggregation " 304 "is not the same recorded by perf when collecting profiling " 305 "data, or there were no samples recorded for the binary. " 306 "Use -ignore-build-id option to override.\n"; 307 if (!opts::IgnoreBuildID) 308 abort(); 309 } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) { 310 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n"; 311 BuildIDBinaryName = std::string(*FileName); 312 } else { 313 outs() << "PERF2BOLT: matched build-id and file name\n"; 314 } 315 316 return; 317 } 318 319 bool DataAggregator::checkPerfDataMagic(StringRef FileName) { 320 if (opts::ReadPreAggregated) 321 return true; 322 323 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName); 324 if (!FD) 325 return false; 326 327 char Buf[7] = {0, 0, 0, 0, 0, 0, 0}; 328 329 auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); }); 330 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice( 331 *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0); 332 if (!BytesRead || *BytesRead != 7) 333 return false; 334 335 if (strncmp(Buf, "PERFILE", 7) == 0) 336 return true; 337 return false; 338 } 339 340 void DataAggregator::parsePreAggregated() { 341 std::string Error; 342 343 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 344 MemoryBuffer::getFileOrSTDIN(Filename); 345 if (std::error_code EC = MB.getError()) { 346 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": " 347 << EC.message() << "\n"; 348 exit(1); 349 } 350 351 FileBuf = std::move(*MB); 352 ParsingBuf = FileBuf->getBuffer(); 353 Col = 0; 354 Line = 1; 355 if (parsePreAggregatedLBRSamples()) { 356 errs() << "PERF2BOLT: failed to parse samples\n"; 357 exit(1); 358 } 359 } 360 361 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) { 362 outs() << "PERF2BOLT: writing data for autofdo tools...\n"; 363 NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName, 364 TimerGroupDesc, opts::TimeAggregator); 365 366 std::error_code EC; 367 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 368 if (EC) 369 return EC; 370 371 // Format: 372 // number of unique traces 373 // from_1-to_1:count_1 374 // from_2-to_2:count_2 375 // ...... 376 // from_n-to_n:count_n 377 // number of unique sample addresses 378 // addr_1:count_1 379 // addr_2:count_2 380 // ...... 381 // addr_n:count_n 382 // number of unique LBR entries 383 // src_1->dst_1:count_1 384 // src_2->dst_2:count_2 385 // ...... 386 // src_n->dst_n:count_n 387 388 const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress; 389 390 // AutoFDO addresses are relative to the first allocated loadable program 391 // segment 392 auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t { 393 if (Address < FirstAllocAddress) 394 return 0; 395 return Address - FirstAllocAddress; 396 }; 397 398 OutFile << FallthroughLBRs.size() << "\n"; 399 for (const auto &AggrLBR : FallthroughLBRs) { 400 const Trace &Trace = AggrLBR.first; 401 const FTInfo &Info = AggrLBR.second; 402 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-" 403 << Twine::utohexstr(filterAddress(Trace.To)) << ":" 404 << (Info.InternCount + Info.ExternCount) << "\n"; 405 } 406 407 OutFile << BasicSamples.size() << "\n"; 408 for (const auto &Sample : BasicSamples) { 409 uint64_t PC = Sample.first; 410 uint64_t HitCount = Sample.second; 411 OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n"; 412 } 413 414 OutFile << BranchLBRs.size() << "\n"; 415 for (const auto &AggrLBR : BranchLBRs) { 416 const Trace &Trace = AggrLBR.first; 417 const BranchInfo &Info = AggrLBR.second; 418 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->" 419 << Twine::utohexstr(filterAddress(Trace.To)) << ":" 420 << Info.TakenCount << "\n"; 421 } 422 423 outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, " 424 << BasicSamples.size() << " sample addresses and " << BranchLBRs.size() 425 << " unique branches to " << OutputFilename << "\n"; 426 427 return std::error_code(); 428 } 429 430 void DataAggregator::filterBinaryMMapInfo() { 431 if (opts::FilterPID) { 432 auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID); 433 if (MMapInfoIter != BinaryMMapInfo.end()) { 434 MMapInfo MMap = MMapInfoIter->second; 435 BinaryMMapInfo.clear(); 436 BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap)); 437 } else { 438 if (errs().has_colors()) 439 errs().changeColor(raw_ostream::RED); 440 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \"" 441 << opts::FilterPID << "\"" 442 << " for binary \"" << BC->getFilename() << "\"."; 443 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary"); 444 errs() << " Profile for the following process is available:\n"; 445 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) 446 outs() << " " << MMI.second.PID 447 << (MMI.second.Forked ? " (forked)\n" : "\n"); 448 449 if (errs().has_colors()) 450 errs().resetColor(); 451 452 exit(1); 453 } 454 } 455 } 456 457 Error DataAggregator::preprocessProfile(BinaryContext &BC) { 458 this->BC = &BC; 459 460 if (opts::ReadPreAggregated) { 461 parsePreAggregated(); 462 return Error::success(); 463 } 464 465 if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) { 466 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; 467 processFileBuildID(*FileBuildID); 468 } else { 469 errs() << "BOLT-WARNING: build-id will not be checked because we could " 470 "not read one from input binary\n"; 471 } 472 473 auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) { 474 std::string Error; 475 outs() << "PERF2BOLT: waiting for perf " << Name 476 << " collection to finish...\n"; 477 sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error); 478 479 if (!Error.empty()) { 480 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n"; 481 deleteTempFiles(); 482 exit(1); 483 } 484 485 if (PI.ReturnCode != 0) { 486 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB = 487 MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data()); 488 StringRef ErrBuf = (*ErrorMB)->getBuffer(); 489 490 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n"; 491 errs() << ErrBuf; 492 deleteTempFiles(); 493 exit(1); 494 } 495 496 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 497 MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data()); 498 if (std::error_code EC = MB.getError()) { 499 errs() << "Cannot open " << Process.StdoutPath.data() << ": " 500 << EC.message() << "\n"; 501 deleteTempFiles(); 502 exit(1); 503 } 504 505 FileBuf = std::move(*MB); 506 ParsingBuf = FileBuf->getBuffer(); 507 Col = 0; 508 Line = 1; 509 }; 510 511 if (opts::LinuxKernelMode) { 512 // Current MMap parsing logic does not work with linux kernel. 513 // MMap entries for linux kernel uses PERF_RECORD_MMAP 514 // format instead of typical PERF_RECORD_MMAP2 format. 515 // Since linux kernel address mapping is absolute (same as 516 // in the ELF file), we avoid parsing MMap in linux kernel mode. 517 // While generating optimized linux kernel binary, we may need 518 // to parse MMap entries. 519 520 // In linux kernel mode, we analyze and optimize 521 // all linux kernel binary instructions, irrespective 522 // of whether they are due to system calls or due to 523 // interrupts. Therefore, we cannot ignore interrupt 524 // in Linux kernel mode. 525 opts::IgnoreInterruptLBR = false; 526 } else { 527 prepareToParse("mmap events", MMapEventsPPI); 528 if (parseMMapEvents()) 529 errs() << "PERF2BOLT: failed to parse mmap events\n"; 530 } 531 532 prepareToParse("task events", TaskEventsPPI); 533 if (parseTaskEvents()) 534 errs() << "PERF2BOLT: failed to parse task events\n"; 535 536 filterBinaryMMapInfo(); 537 prepareToParse("events", MainEventsPPI); 538 539 if (opts::HeatmapMode) { 540 if (std::error_code EC = printLBRHeatMap()) { 541 errs() << "ERROR: failed to print heat map: " << EC.message() << '\n'; 542 exit(1); 543 } 544 exit(0); 545 } 546 547 if ((!opts::BasicAggregation && parseBranchEvents()) || 548 (opts::BasicAggregation && parseBasicEvents())) 549 errs() << "PERF2BOLT: failed to parse samples\n"; 550 551 // We can finish early if the goal is just to generate data for autofdo 552 if (opts::WriteAutoFDOData) { 553 if (std::error_code EC = writeAutoFDOData(opts::OutputFilename)) 554 errs() << "Error writing autofdo data to file: " << EC.message() << "\n"; 555 556 deleteTempFiles(); 557 exit(0); 558 } 559 560 // Special handling for memory events 561 std::string Error; 562 sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error); 563 if (PI.ReturnCode != 0) { 564 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 565 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data()); 566 StringRef ErrBuf = (*MB)->getBuffer(); 567 568 deleteTempFiles(); 569 570 Regex NoData("Samples for '.*' event do not have ADDR attribute set. " 571 "Cannot print 'addr' field."); 572 if (!NoData.match(ErrBuf)) { 573 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n"; 574 errs() << ErrBuf; 575 exit(1); 576 } 577 return Error::success(); 578 } 579 580 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 581 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data()); 582 if (std::error_code EC = MB.getError()) { 583 errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": " 584 << EC.message() << "\n"; 585 deleteTempFiles(); 586 exit(1); 587 } 588 589 FileBuf = std::move(*MB); 590 ParsingBuf = FileBuf->getBuffer(); 591 Col = 0; 592 Line = 1; 593 if (const std::error_code EC = parseMemEvents()) 594 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message() 595 << '\n'; 596 597 deleteTempFiles(); 598 599 return Error::success(); 600 } 601 602 Error DataAggregator::readProfile(BinaryContext &BC) { 603 processProfile(BC); 604 605 for (auto &BFI : BC.getBinaryFunctions()) { 606 BinaryFunction &Function = BFI.second; 607 convertBranchData(Function); 608 } 609 610 if (opts::AggregateOnly) { 611 if (std::error_code EC = writeAggregatedFile(opts::OutputFilename)) 612 report_error("cannot create output data file", EC); 613 } 614 615 return Error::success(); 616 } 617 618 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { 619 return Function.hasProfileAvailable(); 620 } 621 622 void DataAggregator::processProfile(BinaryContext &BC) { 623 if (opts::ReadPreAggregated) 624 processPreAggregated(); 625 else if (opts::BasicAggregation) 626 processBasicEvents(); 627 else 628 processBranchEvents(); 629 630 processMemEvents(); 631 632 // Mark all functions with registered events as having a valid profile. 633 for (auto &BFI : BC.getBinaryFunctions()) { 634 BinaryFunction &BF = BFI.second; 635 if (getBranchData(BF)) { 636 const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE 637 : BinaryFunction::PF_LBR; 638 BF.markProfiled(Flags); 639 } 640 } 641 642 // Release intermediate storage. 643 clear(BranchLBRs); 644 clear(FallthroughLBRs); 645 clear(AggregatedLBRs); 646 clear(BasicSamples); 647 clear(MemSamples); 648 } 649 650 BinaryFunction * 651 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const { 652 if (!BC->containsAddress(Address)) 653 return nullptr; 654 655 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false, 656 /*UseMaxSize=*/true); 657 } 658 659 StringRef DataAggregator::getLocationName(BinaryFunction &Func, 660 uint64_t Count) { 661 if (!BAT) 662 return Func.getOneName(); 663 664 const BinaryFunction *OrigFunc = &Func; 665 if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) { 666 NumColdSamples += Count; 667 BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr); 668 if (HotFunc) 669 OrigFunc = HotFunc; 670 } 671 // If it is a local function, prefer the name containing the file name where 672 // the local function was declared 673 for (StringRef AlternativeName : OrigFunc->getNames()) { 674 size_t FileNameIdx = AlternativeName.find('/'); 675 // Confirm the alternative name has the pattern Symbol/FileName/1 before 676 // using it 677 if (FileNameIdx == StringRef::npos || 678 AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos) 679 continue; 680 return AlternativeName; 681 } 682 return OrigFunc->getOneName(); 683 } 684 685 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address, 686 uint64_t Count) { 687 auto I = NamesToSamples.find(Func.getOneName()); 688 if (I == NamesToSamples.end()) { 689 bool Success; 690 StringRef LocName = getLocationName(Func, Count); 691 std::tie(I, Success) = NamesToSamples.insert( 692 std::make_pair(Func.getOneName(), 693 FuncSampleData(LocName, FuncSampleData::ContainerTy()))); 694 } 695 696 Address -= Func.getAddress(); 697 if (BAT) 698 Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false); 699 700 I->second.bumpCount(Address, Count); 701 return true; 702 } 703 704 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From, 705 uint64_t To, uint64_t Count, 706 uint64_t Mispreds) { 707 FuncBranchData *AggrData = getBranchData(Func); 708 if (!AggrData) { 709 AggrData = &NamesToBranches[Func.getOneName()]; 710 AggrData->Name = getLocationName(Func, Count); 711 setBranchData(Func, AggrData); 712 } 713 714 From -= Func.getAddress(); 715 To -= Func.getAddress(); 716 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName() 717 << " @ " << Twine::utohexstr(From) << " -> " 718 << Func.getPrintName() << " @ " << Twine::utohexstr(To) 719 << '\n'); 720 if (BAT) { 721 From = BAT->translate(Func, From, /*IsBranchSrc=*/true); 722 To = BAT->translate(Func, To, /*IsBranchSrc=*/false); 723 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: " 724 << Func.getPrintName() << " @ " << Twine::utohexstr(From) 725 << " -> " << Func.getPrintName() << " @ " 726 << Twine::utohexstr(To) << '\n'); 727 } 728 729 AggrData->bumpBranchCount(From, To, Count, Mispreds); 730 return true; 731 } 732 733 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc, 734 BinaryFunction *ToFunc, uint64_t From, 735 uint64_t To, uint64_t Count, 736 uint64_t Mispreds) { 737 FuncBranchData *FromAggrData = nullptr; 738 FuncBranchData *ToAggrData = nullptr; 739 StringRef SrcFunc; 740 StringRef DstFunc; 741 if (FromFunc) { 742 SrcFunc = getLocationName(*FromFunc, Count); 743 FromAggrData = getBranchData(*FromFunc); 744 if (!FromAggrData) { 745 FromAggrData = &NamesToBranches[FromFunc->getOneName()]; 746 FromAggrData->Name = SrcFunc; 747 setBranchData(*FromFunc, FromAggrData); 748 } 749 From -= FromFunc->getAddress(); 750 if (BAT) 751 From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true); 752 753 recordExit(*FromFunc, From, Mispreds, Count); 754 } 755 if (ToFunc) { 756 DstFunc = getLocationName(*ToFunc, 0); 757 ToAggrData = getBranchData(*ToFunc); 758 if (!ToAggrData) { 759 ToAggrData = &NamesToBranches[ToFunc->getOneName()]; 760 ToAggrData->Name = DstFunc; 761 setBranchData(*ToFunc, ToAggrData); 762 } 763 To -= ToFunc->getAddress(); 764 if (BAT) 765 To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false); 766 767 recordEntry(*ToFunc, To, Mispreds, Count); 768 } 769 770 if (FromAggrData) 771 FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To), 772 Count, Mispreds); 773 if (ToAggrData) 774 ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To, 775 Count, Mispreds); 776 return true; 777 } 778 779 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, 780 uint64_t Mispreds) { 781 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From); 782 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To); 783 if (!FromFunc && !ToFunc) 784 return false; 785 786 if (FromFunc == ToFunc) { 787 recordBranch(*FromFunc, From - FromFunc->getAddress(), 788 To - FromFunc->getAddress(), Count, Mispreds); 789 return doIntraBranch(*FromFunc, From, To, Count, Mispreds); 790 } 791 792 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds); 793 } 794 795 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, 796 uint64_t Count) { 797 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To); 798 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From); 799 if (!FromFunc || !ToFunc) { 800 LLVM_DEBUG( 801 dbgs() << "Out of range trace starting in " << FromFunc->getPrintName() 802 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 803 << " and ending in " << ToFunc->getPrintName() << " @ " 804 << ToFunc->getPrintName() << " @ " 805 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 806 NumLongRangeTraces += Count; 807 return false; 808 } 809 if (FromFunc != ToFunc) { 810 NumInvalidTraces += Count; 811 LLVM_DEBUG( 812 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 813 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 814 << " and ending in " << ToFunc->getPrintName() << " @ " 815 << ToFunc->getPrintName() << " @ " 816 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 817 return false; 818 } 819 820 Optional<BoltAddressTranslation::FallthroughListTy> FTs = 821 BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From) 822 : getFallthroughsInTrace(*FromFunc, First, Second, Count); 823 if (!FTs) { 824 LLVM_DEBUG( 825 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 826 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 827 << " and ending in " << ToFunc->getPrintName() << " @ " 828 << ToFunc->getPrintName() << " @ " 829 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 830 NumInvalidTraces += Count; 831 return false; 832 } 833 834 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for " 835 << FromFunc->getPrintName() << ":" 836 << Twine::utohexstr(First.To) << " to " 837 << Twine::utohexstr(Second.From) << ".\n"); 838 for (const std::pair<uint64_t, uint64_t> &Pair : *FTs) 839 doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(), 840 Pair.second + FromFunc->getAddress(), Count, false); 841 842 return true; 843 } 844 845 bool DataAggregator::recordTrace( 846 BinaryFunction &BF, 847 const LBREntry &FirstLBR, 848 const LBREntry &SecondLBR, 849 uint64_t Count, 850 SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const { 851 BinaryContext &BC = BF.getBinaryContext(); 852 853 if (!BF.isSimple()) 854 return false; 855 856 assert(BF.hasCFG() && "can only record traces in CFG state"); 857 858 // Offsets of the trace within this function. 859 const uint64_t From = FirstLBR.To - BF.getAddress(); 860 const uint64_t To = SecondLBR.From - BF.getAddress(); 861 862 if (From > To) 863 return false; 864 865 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); 866 BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); 867 868 if (!FromBB || !ToBB) 869 return false; 870 871 // Adjust FromBB if the first LBR is a return from the last instruction in 872 // the previous block (that instruction should be a call). 873 if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && 874 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { 875 BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1]; 876 if (PrevBB->getSuccessor(FromBB->getLabel())) { 877 const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); 878 if (Instr && BC.MIB->isCall(*Instr)) 879 FromBB = PrevBB; 880 else 881 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR 882 << '\n'); 883 } else { 884 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n'); 885 } 886 } 887 888 // Fill out information for fall-through edges. The From and To could be 889 // within the same basic block, e.g. when two call instructions are in the 890 // same block. In this case we skip the processing. 891 if (FromBB == ToBB) 892 return true; 893 894 // Process blocks in the original layout order. 895 BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()]; 896 assert(BB == FromBB && "index mismatch"); 897 while (BB != ToBB) { 898 BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1]; 899 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout"); 900 901 // Check for bad LBRs. 902 if (!BB->getSuccessor(NextBB->getLabel())) { 903 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n" 904 << " " << FirstLBR << '\n' 905 << " " << SecondLBR << '\n'); 906 return false; 907 } 908 909 // Record fall-through jumps 910 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB); 911 BI.Count += Count; 912 913 if (Branches) { 914 const MCInst *Instr = BB->getLastNonPseudoInstr(); 915 uint64_t Offset = 0; 916 if (Instr) 917 Offset = BC.MIB->getAnnotationWithDefault<uint32_t>(*Instr, "Offset"); 918 else 919 Offset = BB->getOffset(); 920 921 Branches->emplace_back(Offset, NextBB->getOffset()); 922 } 923 924 BB = NextBB; 925 } 926 927 return true; 928 } 929 930 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>> 931 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, 932 const LBREntry &FirstLBR, 933 const LBREntry &SecondLBR, 934 uint64_t Count) const { 935 SmallVector<std::pair<uint64_t, uint64_t>, 16> Res; 936 937 if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res)) 938 return NoneType(); 939 940 return Res; 941 } 942 943 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred, 944 uint64_t Count) const { 945 if (To > BF.getSize()) 946 return false; 947 948 if (!BF.hasProfile()) 949 BF.ExecutionCount = 0; 950 951 BinaryBasicBlock *EntryBB = nullptr; 952 if (To == 0) { 953 BF.ExecutionCount += Count; 954 if (!BF.empty()) 955 EntryBB = &BF.front(); 956 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) { 957 if (BB->isEntryPoint()) 958 EntryBB = BB; 959 } 960 961 if (EntryBB) 962 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count); 963 964 return true; 965 } 966 967 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred, 968 uint64_t Count) const { 969 if (!BF.isSimple() || From > BF.getSize()) 970 return false; 971 972 if (!BF.hasProfile()) 973 BF.ExecutionCount = 0; 974 975 return true; 976 } 977 978 ErrorOr<LBREntry> DataAggregator::parseLBREntry() { 979 LBREntry Res; 980 ErrorOr<StringRef> FromStrRes = parseString('/'); 981 if (std::error_code EC = FromStrRes.getError()) 982 return EC; 983 StringRef OffsetStr = FromStrRes.get(); 984 if (OffsetStr.getAsInteger(0, Res.From)) { 985 reportError("expected hexadecimal number with From address"); 986 Diag << "Found: " << OffsetStr << "\n"; 987 return make_error_code(llvm::errc::io_error); 988 } 989 990 ErrorOr<StringRef> ToStrRes = parseString('/'); 991 if (std::error_code EC = ToStrRes.getError()) 992 return EC; 993 OffsetStr = ToStrRes.get(); 994 if (OffsetStr.getAsInteger(0, Res.To)) { 995 reportError("expected hexadecimal number with To address"); 996 Diag << "Found: " << OffsetStr << "\n"; 997 return make_error_code(llvm::errc::io_error); 998 } 999 1000 ErrorOr<StringRef> MispredStrRes = parseString('/'); 1001 if (std::error_code EC = MispredStrRes.getError()) 1002 return EC; 1003 StringRef MispredStr = MispredStrRes.get(); 1004 if (MispredStr.size() != 1 || 1005 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) { 1006 reportError("expected single char for mispred bit"); 1007 Diag << "Found: " << MispredStr << "\n"; 1008 return make_error_code(llvm::errc::io_error); 1009 } 1010 Res.Mispred = MispredStr[0] == 'M'; 1011 1012 static bool MispredWarning = true; 1013 if (MispredStr[0] == '-' && MispredWarning) { 1014 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n"; 1015 MispredWarning = false; 1016 } 1017 1018 ErrorOr<StringRef> Rest = parseString(FieldSeparator, true); 1019 if (std::error_code EC = Rest.getError()) 1020 return EC; 1021 if (Rest.get().size() < 5) { 1022 reportError("expected rest of LBR entry"); 1023 Diag << "Found: " << Rest.get() << "\n"; 1024 return make_error_code(llvm::errc::io_error); 1025 } 1026 return Res; 1027 } 1028 1029 bool DataAggregator::checkAndConsumeFS() { 1030 if (ParsingBuf[0] != FieldSeparator) 1031 return false; 1032 1033 ParsingBuf = ParsingBuf.drop_front(1); 1034 Col += 1; 1035 return true; 1036 } 1037 1038 void DataAggregator::consumeRestOfLine() { 1039 size_t LineEnd = ParsingBuf.find_first_of('\n'); 1040 if (LineEnd == StringRef::npos) { 1041 ParsingBuf = StringRef(); 1042 Col = 0; 1043 Line += 1; 1044 return; 1045 } 1046 ParsingBuf = ParsingBuf.drop_front(LineEnd + 1); 1047 Col = 0; 1048 Line += 1; 1049 } 1050 1051 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() { 1052 PerfBranchSample Res; 1053 1054 while (checkAndConsumeFS()) { 1055 } 1056 1057 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1058 if (std::error_code EC = PIDRes.getError()) 1059 return EC; 1060 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1061 if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) { 1062 consumeRestOfLine(); 1063 return make_error_code(errc::no_such_process); 1064 } 1065 1066 while (checkAndConsumeFS()) { 1067 } 1068 1069 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true); 1070 if (std::error_code EC = PCRes.getError()) 1071 return EC; 1072 Res.PC = PCRes.get(); 1073 1074 if (checkAndConsumeNewLine()) 1075 return Res; 1076 1077 while (!checkAndConsumeNewLine()) { 1078 checkAndConsumeFS(); 1079 1080 ErrorOr<LBREntry> LBRRes = parseLBREntry(); 1081 if (std::error_code EC = LBRRes.getError()) 1082 return EC; 1083 LBREntry LBR = LBRRes.get(); 1084 if (ignoreKernelInterrupt(LBR)) 1085 continue; 1086 if (!BC->HasFixedLoadAddress) 1087 adjustLBR(LBR, MMapInfoIter->second); 1088 Res.LBR.push_back(LBR); 1089 } 1090 1091 return Res; 1092 } 1093 1094 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() { 1095 while (checkAndConsumeFS()) { 1096 } 1097 1098 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1099 if (std::error_code EC = PIDRes.getError()) 1100 return EC; 1101 1102 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1103 if (MMapInfoIter == BinaryMMapInfo.end()) { 1104 consumeRestOfLine(); 1105 return PerfBasicSample{StringRef(), 0}; 1106 } 1107 1108 while (checkAndConsumeFS()) { 1109 } 1110 1111 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1112 if (std::error_code EC = Event.getError()) 1113 return EC; 1114 1115 while (checkAndConsumeFS()) { 1116 } 1117 1118 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true); 1119 if (std::error_code EC = AddrRes.getError()) 1120 return EC; 1121 1122 if (!checkAndConsumeNewLine()) { 1123 reportError("expected end of line"); 1124 return make_error_code(llvm::errc::io_error); 1125 } 1126 1127 uint64_t Address = *AddrRes; 1128 if (!BC->HasFixedLoadAddress) 1129 adjustAddress(Address, MMapInfoIter->second); 1130 1131 return PerfBasicSample{Event.get(), Address}; 1132 } 1133 1134 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() { 1135 PerfMemSample Res{0, 0}; 1136 1137 while (checkAndConsumeFS()) { 1138 } 1139 1140 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1141 if (std::error_code EC = PIDRes.getError()) 1142 return EC; 1143 1144 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1145 if (MMapInfoIter == BinaryMMapInfo.end()) { 1146 consumeRestOfLine(); 1147 return Res; 1148 } 1149 1150 while (checkAndConsumeFS()) { 1151 } 1152 1153 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1154 if (std::error_code EC = Event.getError()) 1155 return EC; 1156 if (Event.get().find("mem-loads") == StringRef::npos) { 1157 consumeRestOfLine(); 1158 return Res; 1159 } 1160 1161 while (checkAndConsumeFS()) { 1162 } 1163 1164 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator); 1165 if (std::error_code EC = AddrRes.getError()) 1166 return EC; 1167 1168 while (checkAndConsumeFS()) { 1169 } 1170 1171 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true); 1172 if (std::error_code EC = PCRes.getError()) { 1173 consumeRestOfLine(); 1174 return EC; 1175 } 1176 1177 if (!checkAndConsumeNewLine()) { 1178 reportError("expected end of line"); 1179 return make_error_code(llvm::errc::io_error); 1180 } 1181 1182 uint64_t Address = *AddrRes; 1183 if (!BC->HasFixedLoadAddress) 1184 adjustAddress(Address, MMapInfoIter->second); 1185 1186 return PerfMemSample{PCRes.get(), Address}; 1187 } 1188 1189 ErrorOr<Location> DataAggregator::parseLocationOrOffset() { 1190 auto parseOffset = [this]() -> ErrorOr<Location> { 1191 ErrorOr<uint64_t> Res = parseHexField(FieldSeparator); 1192 if (std::error_code EC = Res.getError()) 1193 return EC; 1194 return Location(Res.get()); 1195 }; 1196 1197 size_t Sep = ParsingBuf.find_first_of(" \n"); 1198 if (Sep == StringRef::npos) 1199 return parseOffset(); 1200 StringRef LookAhead = ParsingBuf.substr(0, Sep); 1201 if (LookAhead.find_first_of(":") == StringRef::npos) 1202 return parseOffset(); 1203 1204 ErrorOr<StringRef> BuildID = parseString(':'); 1205 if (std::error_code EC = BuildID.getError()) 1206 return EC; 1207 ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator); 1208 if (std::error_code EC = Offset.getError()) 1209 return EC; 1210 return Location(true, BuildID.get(), Offset.get()); 1211 } 1212 1213 ErrorOr<DataAggregator::AggregatedLBREntry> 1214 DataAggregator::parseAggregatedLBREntry() { 1215 while (checkAndConsumeFS()) { 1216 } 1217 1218 ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator); 1219 if (std::error_code EC = TypeOrErr.getError()) 1220 return EC; 1221 auto Type = AggregatedLBREntry::BRANCH; 1222 if (TypeOrErr.get() == "B") { 1223 Type = AggregatedLBREntry::BRANCH; 1224 } else if (TypeOrErr.get() == "F") { 1225 Type = AggregatedLBREntry::FT; 1226 } else if (TypeOrErr.get() == "f") { 1227 Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN; 1228 } else { 1229 reportError("expected B, F or f"); 1230 return make_error_code(llvm::errc::io_error); 1231 } 1232 1233 while (checkAndConsumeFS()) { 1234 } 1235 ErrorOr<Location> From = parseLocationOrOffset(); 1236 if (std::error_code EC = From.getError()) 1237 return EC; 1238 1239 while (checkAndConsumeFS()) { 1240 } 1241 ErrorOr<Location> To = parseLocationOrOffset(); 1242 if (std::error_code EC = To.getError()) 1243 return EC; 1244 1245 while (checkAndConsumeFS()) { 1246 } 1247 ErrorOr<int64_t> Frequency = 1248 parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH); 1249 if (std::error_code EC = Frequency.getError()) 1250 return EC; 1251 1252 uint64_t Mispreds = 0; 1253 if (Type == AggregatedLBREntry::BRANCH) { 1254 while (checkAndConsumeFS()) { 1255 } 1256 ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true); 1257 if (std::error_code EC = MispredsOrErr.getError()) 1258 return EC; 1259 Mispreds = static_cast<uint64_t>(MispredsOrErr.get()); 1260 } 1261 1262 if (!checkAndConsumeNewLine()) { 1263 reportError("expected end of line"); 1264 return make_error_code(llvm::errc::io_error); 1265 } 1266 1267 return AggregatedLBREntry{From.get(), To.get(), 1268 static_cast<uint64_t>(Frequency.get()), Mispreds, 1269 Type}; 1270 } 1271 1272 bool DataAggregator::hasData() { 1273 if (ParsingBuf.size() == 0) 1274 return false; 1275 1276 return true; 1277 } 1278 1279 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const { 1280 return opts::IgnoreInterruptLBR && 1281 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr); 1282 } 1283 1284 std::error_code DataAggregator::printLBRHeatMap() { 1285 outs() << "PERF2BOLT: parse branch events...\n"; 1286 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1287 TimerGroupDesc, opts::TimeAggregator); 1288 1289 if (opts::LinuxKernelMode) { 1290 opts::HeatmapMaxAddress = 0xffffffffffffffff; 1291 opts::HeatmapMinAddress = KernelBaseAddr; 1292 } 1293 Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress, 1294 opts::HeatmapMaxAddress); 1295 uint64_t NumTotalSamples = 0; 1296 1297 while (hasData()) { 1298 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1299 if (std::error_code EC = SampleRes.getError()) { 1300 if (EC == errc::no_such_process) 1301 continue; 1302 return EC; 1303 } 1304 1305 PerfBranchSample &Sample = SampleRes.get(); 1306 1307 // LBRs are stored in reverse execution order. NextLBR refers to the next 1308 // executed branch record. 1309 const LBREntry *NextLBR = nullptr; 1310 for (const LBREntry &LBR : Sample.LBR) { 1311 if (NextLBR) { 1312 // Record fall-through trace. 1313 const uint64_t TraceFrom = LBR.To; 1314 const uint64_t TraceTo = NextLBR->From; 1315 ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount; 1316 } 1317 NextLBR = &LBR; 1318 } 1319 if (!Sample.LBR.empty()) { 1320 HM.registerAddress(Sample.LBR.front().To); 1321 HM.registerAddress(Sample.LBR.back().From); 1322 } 1323 NumTotalSamples += Sample.LBR.size(); 1324 } 1325 1326 if (!NumTotalSamples) { 1327 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. " 1328 "Cannot build heatmap.\n"; 1329 exit(1); 1330 } 1331 1332 outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n"; 1333 outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n"; 1334 1335 outs() << "HEATMAP: building heat map...\n"; 1336 1337 for (const auto &LBR : FallthroughLBRs) { 1338 const Trace &Trace = LBR.first; 1339 const FTInfo &Info = LBR.second; 1340 HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount); 1341 } 1342 1343 if (HM.getNumInvalidRanges()) 1344 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n'; 1345 1346 if (!HM.size()) { 1347 errs() << "HEATMAP-ERROR: no valid traces registered\n"; 1348 exit(1); 1349 } 1350 1351 HM.print(opts::HeatmapFile); 1352 if (opts::HeatmapFile == "-") 1353 HM.printCDF(opts::HeatmapFile); 1354 else 1355 HM.printCDF(opts::HeatmapFile + ".csv"); 1356 1357 return std::error_code(); 1358 } 1359 1360 std::error_code DataAggregator::parseBranchEvents() { 1361 outs() << "PERF2BOLT: parse branch events...\n"; 1362 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1363 TimerGroupDesc, opts::TimeAggregator); 1364 1365 uint64_t NumTotalSamples = 0; 1366 uint64_t NumEntries = 0; 1367 uint64_t NumSamples = 0; 1368 uint64_t NumSamplesNoLBR = 0; 1369 uint64_t NumTraces = 0; 1370 bool NeedsSkylakeFix = false; 1371 1372 while (hasData() && NumTotalSamples < opts::MaxSamples) { 1373 ++NumTotalSamples; 1374 1375 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1376 if (std::error_code EC = SampleRes.getError()) { 1377 if (EC == errc::no_such_process) 1378 continue; 1379 return EC; 1380 } 1381 ++NumSamples; 1382 1383 PerfBranchSample &Sample = SampleRes.get(); 1384 if (opts::WriteAutoFDOData) 1385 ++BasicSamples[Sample.PC]; 1386 1387 if (Sample.LBR.empty()) { 1388 ++NumSamplesNoLBR; 1389 continue; 1390 } 1391 1392 NumEntries += Sample.LBR.size(); 1393 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) { 1394 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n"; 1395 NeedsSkylakeFix = true; 1396 } 1397 1398 // LBRs are stored in reverse execution order. NextPC refers to the next 1399 // recorded executed PC. 1400 uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0; 1401 uint32_t NumEntry = 0; 1402 for (const LBREntry &LBR : Sample.LBR) { 1403 ++NumEntry; 1404 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries) 1405 // sometimes record entry 32 as an exact copy of entry 31. This will cause 1406 // us to likely record an invalid trace and generate a stale function for 1407 // BAT mode (non BAT disassembles the function and is able to ignore this 1408 // trace at aggregation time). Drop first 2 entries (last two, in 1409 // chronological order) 1410 if (NeedsSkylakeFix && NumEntry <= 2) 1411 continue; 1412 if (NextPC) { 1413 // Record fall-through trace. 1414 const uint64_t TraceFrom = LBR.To; 1415 const uint64_t TraceTo = NextPC; 1416 const BinaryFunction *TraceBF = 1417 getBinaryFunctionContainingAddress(TraceFrom); 1418 if (TraceBF && TraceBF->containsAddress(TraceTo)) { 1419 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; 1420 if (TraceBF->containsAddress(LBR.From)) 1421 ++Info.InternCount; 1422 else 1423 ++Info.ExternCount; 1424 } else { 1425 if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) { 1426 LLVM_DEBUG(dbgs() 1427 << "Invalid trace starting in " 1428 << TraceBF->getPrintName() << " @ " 1429 << Twine::utohexstr(TraceFrom - TraceBF->getAddress()) 1430 << " and ending @ " << Twine::utohexstr(TraceTo) 1431 << '\n'); 1432 ++NumInvalidTraces; 1433 } else { 1434 LLVM_DEBUG(dbgs() 1435 << "Out of range trace starting in " 1436 << (TraceBF ? TraceBF->getPrintName() : "None") << " @ " 1437 << Twine::utohexstr( 1438 TraceFrom - (TraceBF ? TraceBF->getAddress() : 0)) 1439 << " and ending in " 1440 << (getBinaryFunctionContainingAddress(TraceTo) 1441 ? getBinaryFunctionContainingAddress(TraceTo) 1442 ->getPrintName() 1443 : "None") 1444 << " @ " 1445 << Twine::utohexstr( 1446 TraceTo - 1447 (getBinaryFunctionContainingAddress(TraceTo) 1448 ? getBinaryFunctionContainingAddress(TraceTo) 1449 ->getAddress() 1450 : 0)) 1451 << '\n'); 1452 ++NumLongRangeTraces; 1453 } 1454 } 1455 ++NumTraces; 1456 } 1457 NextPC = LBR.From; 1458 1459 uint64_t From = LBR.From; 1460 if (!getBinaryFunctionContainingAddress(From)) 1461 From = 0; 1462 uint64_t To = LBR.To; 1463 if (!getBinaryFunctionContainingAddress(To)) 1464 To = 0; 1465 if (!From && !To) 1466 continue; 1467 BranchInfo &Info = BranchLBRs[Trace(From, To)]; 1468 ++Info.TakenCount; 1469 Info.MispredCount += LBR.Mispred; 1470 } 1471 } 1472 1473 for (const auto &LBR : BranchLBRs) { 1474 const Trace &Trace = LBR.first; 1475 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From)) 1476 BF->setHasProfileAvailable(); 1477 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To)) 1478 BF->setHasProfileAvailable(); 1479 } 1480 1481 auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) { 1482 OS << " ("; 1483 if (OS.has_colors()) { 1484 if (Percent > T2) 1485 OS.changeColor(raw_ostream::RED); 1486 else if (Percent > T1) 1487 OS.changeColor(raw_ostream::YELLOW); 1488 else 1489 OS.changeColor(raw_ostream::GREEN); 1490 } 1491 OS << format("%.1f%%", Percent); 1492 if (OS.has_colors()) 1493 OS.resetColor(); 1494 OS << ")"; 1495 }; 1496 1497 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries 1498 << " LBR entries\n"; 1499 if (NumTotalSamples) { 1500 if (NumSamples && NumSamplesNoLBR == NumSamples) { 1501 // Note: we don't know if perf2bolt is being used to parse memory samples 1502 // at this point. In this case, it is OK to parse zero LBRs. 1503 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack " 1504 "LBR. Record profile with perf record -j any or run perf2bolt " 1505 "in no-LBR mode with -nl (the performance improvement in -nl " 1506 "mode may be limited)\n"; 1507 } else { 1508 const uint64_t IgnoredSamples = NumTotalSamples - NumSamples; 1509 const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples; 1510 outs() << "PERF2BOLT: " << IgnoredSamples << " samples"; 1511 printColored(outs(), PercentIgnored, 20, 50); 1512 outs() << " were ignored\n"; 1513 if (PercentIgnored > 50.0f) 1514 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples " 1515 "were attributed to the input binary\n"; 1516 } 1517 } 1518 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1519 << NumInvalidTraces; 1520 float Perc = 0.0f; 1521 if (NumTraces > 0) { 1522 Perc = NumInvalidTraces * 100.0f / NumTraces; 1523 printColored(outs(), Perc, 5, 10); 1524 } 1525 outs() << "\n"; 1526 if (Perc > 10.0f) 1527 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1528 "binary is probably not the same binary used during profiling " 1529 "collection. The generated data may be ineffective for improving " 1530 "performance.\n\n"; 1531 1532 outs() << "PERF2BOLT: out of range traces involving unknown regions: " 1533 << NumLongRangeTraces; 1534 if (NumTraces > 0) 1535 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1536 outs() << "\n"; 1537 1538 if (NumColdSamples > 0) { 1539 const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples; 1540 outs() << "PERF2BOLT: " << NumColdSamples 1541 << format(" (%.1f%%)", ColdSamples) 1542 << " samples recorded in cold regions of split functions.\n"; 1543 if (ColdSamples > 5.0f) 1544 outs() 1545 << "WARNING: The BOLT-processed binary where samples were collected " 1546 "likely used bad data or your service observed a large shift in " 1547 "profile. You may want to audit this.\n"; 1548 } 1549 1550 return std::error_code(); 1551 } 1552 1553 void DataAggregator::processBranchEvents() { 1554 outs() << "PERF2BOLT: processing branch events...\n"; 1555 NamedRegionTimer T("processBranch", "Processing branch events", 1556 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1557 1558 for (const auto &AggrLBR : FallthroughLBRs) { 1559 const Trace &Loc = AggrLBR.first; 1560 const FTInfo &Info = AggrLBR.second; 1561 LBREntry First{Loc.From, Loc.From, false}; 1562 LBREntry Second{Loc.To, Loc.To, false}; 1563 if (Info.InternCount) 1564 doTrace(First, Second, Info.InternCount); 1565 if (Info.ExternCount) { 1566 First.From = 0; 1567 doTrace(First, Second, Info.ExternCount); 1568 } 1569 } 1570 1571 for (const auto &AggrLBR : BranchLBRs) { 1572 const Trace &Loc = AggrLBR.first; 1573 const BranchInfo &Info = AggrLBR.second; 1574 doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount); 1575 } 1576 } 1577 1578 std::error_code DataAggregator::parseBasicEvents() { 1579 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n"; 1580 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName, 1581 TimerGroupDesc, opts::TimeAggregator); 1582 while (hasData()) { 1583 ErrorOr<PerfBasicSample> Sample = parseBasicSample(); 1584 if (std::error_code EC = Sample.getError()) 1585 return EC; 1586 1587 if (!Sample->PC) 1588 continue; 1589 1590 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1591 BF->setHasProfileAvailable(); 1592 1593 ++BasicSamples[Sample->PC]; 1594 EventNames.insert(Sample->EventName); 1595 } 1596 1597 return std::error_code(); 1598 } 1599 1600 void DataAggregator::processBasicEvents() { 1601 outs() << "PERF2BOLT: processing basic events (without LBR)...\n"; 1602 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName, 1603 TimerGroupDesc, opts::TimeAggregator); 1604 uint64_t OutOfRangeSamples = 0; 1605 uint64_t NumSamples = 0; 1606 for (auto &Sample : BasicSamples) { 1607 const uint64_t PC = Sample.first; 1608 const uint64_t HitCount = Sample.second; 1609 NumSamples += HitCount; 1610 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1611 if (!Func) { 1612 OutOfRangeSamples += HitCount; 1613 continue; 1614 } 1615 1616 doSample(*Func, PC, HitCount); 1617 } 1618 outs() << "PERF2BOLT: read " << NumSamples << " samples\n"; 1619 1620 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: " 1621 << OutOfRangeSamples; 1622 float Perc = 0.0f; 1623 if (NumSamples > 0) { 1624 outs() << " ("; 1625 Perc = OutOfRangeSamples * 100.0f / NumSamples; 1626 if (outs().has_colors()) { 1627 if (Perc > 60.0f) 1628 outs().changeColor(raw_ostream::RED); 1629 else if (Perc > 40.0f) 1630 outs().changeColor(raw_ostream::YELLOW); 1631 else 1632 outs().changeColor(raw_ostream::GREEN); 1633 } 1634 outs() << format("%.1f%%", Perc); 1635 if (outs().has_colors()) 1636 outs().resetColor(); 1637 outs() << ")"; 1638 } 1639 outs() << "\n"; 1640 if (Perc > 80.0f) 1641 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1642 "binary is probably not the same binary used during profiling " 1643 "collection. The generated data may be ineffective for improving " 1644 "performance.\n\n"; 1645 } 1646 1647 std::error_code DataAggregator::parseMemEvents() { 1648 outs() << "PERF2BOLT: parsing memory events...\n"; 1649 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName, 1650 TimerGroupDesc, opts::TimeAggregator); 1651 while (hasData()) { 1652 ErrorOr<PerfMemSample> Sample = parseMemSample(); 1653 if (std::error_code EC = Sample.getError()) 1654 return EC; 1655 1656 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1657 BF->setHasProfileAvailable(); 1658 1659 MemSamples.emplace_back(std::move(Sample.get())); 1660 } 1661 1662 return std::error_code(); 1663 } 1664 1665 void DataAggregator::processMemEvents() { 1666 NamedRegionTimer T("ProcessMemEvents", "Processing mem events", 1667 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1668 for (const PerfMemSample &Sample : MemSamples) { 1669 uint64_t PC = Sample.PC; 1670 uint64_t Addr = Sample.Addr; 1671 StringRef FuncName; 1672 StringRef MemName; 1673 1674 // Try to resolve symbol for PC 1675 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1676 if (!Func) { 1677 LLVM_DEBUG(if (PC != 0) { 1678 dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x" 1679 << Twine::utohexstr(Addr) << "\n"; 1680 }); 1681 continue; 1682 } 1683 1684 FuncName = Func->getOneName(); 1685 PC -= Func->getAddress(); 1686 1687 // Try to resolve symbol for memory load 1688 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) { 1689 MemName = BD->getName(); 1690 Addr -= BD->getAddress(); 1691 } else if (opts::FilterMemProfile) { 1692 // Filter out heap/stack accesses 1693 continue; 1694 } 1695 1696 const Location FuncLoc(!FuncName.empty(), FuncName, PC); 1697 const Location AddrLoc(!MemName.empty(), MemName, Addr); 1698 1699 FuncMemData *MemData = &NamesToMemEvents[FuncName]; 1700 setMemData(*Func, MemData); 1701 MemData->update(FuncLoc, AddrLoc); 1702 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n"); 1703 } 1704 } 1705 1706 std::error_code DataAggregator::parsePreAggregatedLBRSamples() { 1707 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; 1708 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", 1709 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1710 while (hasData()) { 1711 ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry(); 1712 if (std::error_code EC = AggrEntry.getError()) 1713 return EC; 1714 1715 if (BinaryFunction *BF = 1716 getBinaryFunctionContainingAddress(AggrEntry->From.Offset)) 1717 BF->setHasProfileAvailable(); 1718 if (BinaryFunction *BF = 1719 getBinaryFunctionContainingAddress(AggrEntry->To.Offset)) 1720 BF->setHasProfileAvailable(); 1721 1722 AggregatedLBRs.emplace_back(std::move(AggrEntry.get())); 1723 } 1724 1725 return std::error_code(); 1726 } 1727 1728 void DataAggregator::processPreAggregated() { 1729 outs() << "PERF2BOLT: processing pre-aggregated profile...\n"; 1730 NamedRegionTimer T("processAggregated", "Processing aggregated branch events", 1731 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1732 1733 uint64_t NumTraces = 0; 1734 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { 1735 switch (AggrEntry.EntryType) { 1736 case AggregatedLBREntry::BRANCH: 1737 doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, 1738 AggrEntry.Mispreds); 1739 break; 1740 case AggregatedLBREntry::FT: 1741 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { 1742 LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT 1743 ? AggrEntry.From.Offset 1744 : 0, 1745 AggrEntry.From.Offset, false}; 1746 LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false}; 1747 doTrace(First, Second, AggrEntry.Count); 1748 NumTraces += AggrEntry.Count; 1749 break; 1750 } 1751 } 1752 } 1753 1754 outs() << "PERF2BOLT: read " << AggregatedLBRs.size() 1755 << " aggregated LBR entries\n"; 1756 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1757 << NumInvalidTraces; 1758 float Perc = 0.0f; 1759 if (NumTraces > 0) { 1760 outs() << " ("; 1761 Perc = NumInvalidTraces * 100.0f / NumTraces; 1762 if (outs().has_colors()) { 1763 if (Perc > 10.0f) 1764 outs().changeColor(raw_ostream::RED); 1765 else if (Perc > 5.0f) 1766 outs().changeColor(raw_ostream::YELLOW); 1767 else 1768 outs().changeColor(raw_ostream::GREEN); 1769 } 1770 outs() << format("%.1f%%", Perc); 1771 if (outs().has_colors()) 1772 outs().resetColor(); 1773 outs() << ")"; 1774 } 1775 outs() << "\n"; 1776 if (Perc > 10.0f) 1777 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1778 "binary is probably not the same binary used during profiling " 1779 "collection. The generated data may be ineffective for improving " 1780 "performance.\n\n"; 1781 1782 outs() << "PERF2BOLT: Out of range traces involving unknown regions: " 1783 << NumLongRangeTraces; 1784 if (NumTraces > 0) 1785 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1786 outs() << "\n"; 1787 } 1788 1789 Optional<int32_t> DataAggregator::parseCommExecEvent() { 1790 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1791 if (LineEnd == StringRef::npos) { 1792 reportError("expected rest of line"); 1793 Diag << "Found: " << ParsingBuf << "\n"; 1794 return NoneType(); 1795 } 1796 StringRef Line = ParsingBuf.substr(0, LineEnd); 1797 1798 size_t Pos = Line.find("PERF_RECORD_COMM exec"); 1799 if (Pos == StringRef::npos) 1800 return NoneType(); 1801 Line = Line.drop_front(Pos); 1802 1803 // Line: 1804 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>" 1805 StringRef PIDStr = Line.rsplit(':').second.split('/').first; 1806 int32_t PID; 1807 if (PIDStr.getAsInteger(10, PID)) { 1808 reportError("expected PID"); 1809 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1810 return NoneType(); 1811 } 1812 1813 return PID; 1814 } 1815 1816 namespace { 1817 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) { 1818 const StringRef SecTimeStr = TimeStr.split('.').first; 1819 const StringRef USecTimeStr = TimeStr.split('.').second; 1820 uint64_t SecTime; 1821 uint64_t USecTime; 1822 if (SecTimeStr.getAsInteger(10, SecTime) || 1823 USecTimeStr.getAsInteger(10, USecTime)) 1824 return NoneType(); 1825 return SecTime * 1000000ULL + USecTime; 1826 } 1827 } 1828 1829 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() { 1830 while (checkAndConsumeFS()) { 1831 } 1832 1833 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1834 if (LineEnd == StringRef::npos) { 1835 reportError("expected rest of line"); 1836 Diag << "Found: " << ParsingBuf << "\n"; 1837 return NoneType(); 1838 } 1839 StringRef Line = ParsingBuf.substr(0, LineEnd); 1840 1841 size_t Pos = Line.find("PERF_RECORD_FORK"); 1842 if (Pos == StringRef::npos) { 1843 consumeRestOfLine(); 1844 return NoneType(); 1845 } 1846 1847 ForkInfo FI; 1848 1849 const StringRef TimeStr = 1850 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1851 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) { 1852 FI.Time = *TimeRes; 1853 } 1854 1855 Line = Line.drop_front(Pos); 1856 1857 // Line: 1858 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>) 1859 const StringRef ChildPIDStr = Line.split('(').second.split(':').first; 1860 if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) { 1861 reportError("expected PID"); 1862 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n"; 1863 return NoneType(); 1864 } 1865 1866 const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first; 1867 if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) { 1868 reportError("expected PID"); 1869 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n"; 1870 return NoneType(); 1871 } 1872 1873 consumeRestOfLine(); 1874 1875 return FI; 1876 } 1877 1878 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>> 1879 DataAggregator::parseMMapEvent() { 1880 while (checkAndConsumeFS()) { 1881 } 1882 1883 MMapInfo ParsedInfo; 1884 1885 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1886 if (LineEnd == StringRef::npos) { 1887 reportError("expected rest of line"); 1888 Diag << "Found: " << ParsingBuf << "\n"; 1889 return make_error_code(llvm::errc::io_error); 1890 } 1891 StringRef Line = ParsingBuf.substr(0, LineEnd); 1892 1893 size_t Pos = Line.find("PERF_RECORD_MMAP2"); 1894 if (Pos == StringRef::npos) { 1895 consumeRestOfLine(); 1896 return std::make_pair(StringRef(), ParsedInfo); 1897 } 1898 1899 // Line: 1900 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name> 1901 1902 const StringRef TimeStr = 1903 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1904 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) 1905 ParsedInfo.Time = *TimeRes; 1906 1907 Line = Line.drop_front(Pos); 1908 1909 // Line: 1910 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name> 1911 1912 StringRef FileName = Line.rsplit(FieldSeparator).second; 1913 if (FileName.startswith("//") || FileName.startswith("[")) { 1914 consumeRestOfLine(); 1915 return std::make_pair(StringRef(), ParsedInfo); 1916 } 1917 FileName = sys::path::filename(FileName); 1918 1919 const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first; 1920 if (PIDStr.getAsInteger(10, ParsedInfo.PID)) { 1921 reportError("expected PID"); 1922 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1923 return make_error_code(llvm::errc::io_error); 1924 } 1925 1926 const StringRef BaseAddressStr = Line.split('[').second.split('(').first; 1927 if (BaseAddressStr.getAsInteger(0, ParsedInfo.BaseAddress)) { 1928 reportError("expected base address"); 1929 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n"; 1930 return make_error_code(llvm::errc::io_error); 1931 } 1932 1933 const StringRef SizeStr = Line.split('(').second.split(')').first; 1934 if (SizeStr.getAsInteger(0, ParsedInfo.Size)) { 1935 reportError("expected mmaped size"); 1936 Diag << "Found: " << SizeStr << "in '" << Line << "'\n"; 1937 return make_error_code(llvm::errc::io_error); 1938 } 1939 1940 const StringRef OffsetStr = 1941 Line.split('@').second.ltrim().split(FieldSeparator).first; 1942 if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) { 1943 reportError("expected mmaped page-aligned offset"); 1944 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n"; 1945 return make_error_code(llvm::errc::io_error); 1946 } 1947 1948 consumeRestOfLine(); 1949 1950 return std::make_pair(FileName, ParsedInfo); 1951 } 1952 1953 std::error_code DataAggregator::parseMMapEvents() { 1954 outs() << "PERF2BOLT: parsing perf-script mmap events output\n"; 1955 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName, 1956 TimerGroupDesc, opts::TimeAggregator); 1957 1958 std::multimap<StringRef, MMapInfo> GlobalMMapInfo; 1959 while (hasData()) { 1960 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent(); 1961 if (std::error_code EC = FileMMapInfoRes.getError()) 1962 return EC; 1963 1964 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get(); 1965 if (FileMMapInfo.second.PID == -1) 1966 continue; 1967 1968 // Consider only the first mapping of the file for any given PID 1969 bool PIDExists = false; 1970 auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first); 1971 for (auto MI = Range.first; MI != Range.second; ++MI) { 1972 if (MI->second.PID == FileMMapInfo.second.PID) { 1973 PIDExists = true; 1974 break; 1975 } 1976 } 1977 if (PIDExists) 1978 continue; 1979 1980 GlobalMMapInfo.insert(FileMMapInfo); 1981 } 1982 1983 LLVM_DEBUG({ 1984 dbgs() << "FileName -> mmap info:\n"; 1985 for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo) 1986 dbgs() << " " << Pair.first << " : " << Pair.second.PID << " [0x" 1987 << Twine::utohexstr(Pair.second.BaseAddress) << ", " 1988 << Twine::utohexstr(Pair.second.Size) << " @ " 1989 << Twine::utohexstr(Pair.second.Offset) << "]\n"; 1990 }); 1991 1992 StringRef NameToUse = llvm::sys::path::filename(BC->getFilename()); 1993 if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) { 1994 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName 1995 << "\" for profile matching\n"; 1996 NameToUse = BuildIDBinaryName; 1997 } 1998 1999 auto Range = GlobalMMapInfo.equal_range(NameToUse); 2000 for (auto I = Range.first; I != Range.second; ++I) { 2001 const MMapInfo &MMapInfo = I->second; 2002 if (BC->HasFixedLoadAddress && MMapInfo.BaseAddress) { 2003 // Check that the binary mapping matches one of the segments. 2004 bool MatchFound = false; 2005 for (auto &KV : BC->SegmentMapInfo) { 2006 SegmentInfo &SegInfo = KV.second; 2007 // The mapping is page-aligned and hence the BaseAddress could be 2008 // different from the segment start address. We cannot know the page 2009 // size of the mapping, but we know it should not exceed the segment 2010 // alignment value. Hence we are performing an approximate check. 2011 if (SegInfo.Address >= MMapInfo.BaseAddress && 2012 SegInfo.Address - MMapInfo.BaseAddress < SegInfo.Alignment) { 2013 MatchFound = true; 2014 break; 2015 } 2016 } 2017 if (!MatchFound) { 2018 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse 2019 << " at 0x" << Twine::utohexstr(MMapInfo.BaseAddress) << '\n'; 2020 continue; 2021 } 2022 } 2023 2024 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); 2025 } 2026 2027 if (BinaryMMapInfo.empty()) { 2028 if (errs().has_colors()) 2029 errs().changeColor(raw_ostream::RED); 2030 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \"" 2031 << BC->getFilename() << "\"."; 2032 if (!GlobalMMapInfo.empty()) { 2033 errs() << " Profile for the following binary name(s) is available:\n"; 2034 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE; 2035 I = GlobalMMapInfo.upper_bound(I->first)) 2036 errs() << " " << I->first << '\n'; 2037 errs() << "Please rename the input binary.\n"; 2038 } else { 2039 errs() << " Failed to extract any binary name from a profile.\n"; 2040 } 2041 if (errs().has_colors()) 2042 errs().resetColor(); 2043 2044 exit(1); 2045 } 2046 2047 return std::error_code(); 2048 } 2049 2050 std::error_code DataAggregator::parseTaskEvents() { 2051 outs() << "PERF2BOLT: parsing perf-script task events output\n"; 2052 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName, 2053 TimerGroupDesc, opts::TimeAggregator); 2054 2055 while (hasData()) { 2056 if (Optional<int32_t> CommInfo = parseCommExecEvent()) { 2057 // Remove forked child that ran execve 2058 auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo); 2059 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked) 2060 BinaryMMapInfo.erase(MMapInfoIter); 2061 consumeRestOfLine(); 2062 continue; 2063 } 2064 2065 Optional<ForkInfo> ForkInfo = parseForkEvent(); 2066 if (!ForkInfo) 2067 continue; 2068 2069 if (ForkInfo->ParentPID == ForkInfo->ChildPID) 2070 continue; 2071 2072 if (ForkInfo->Time == 0) { 2073 // Process was forked and mmaped before perf ran. In this case the child 2074 // should have its own mmap entry unless it was execve'd. 2075 continue; 2076 } 2077 2078 auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID); 2079 if (MMapInfoIter == BinaryMMapInfo.end()) 2080 continue; 2081 2082 MMapInfo MMapInfo = MMapInfoIter->second; 2083 MMapInfo.PID = ForkInfo->ChildPID; 2084 MMapInfo.Forked = true; 2085 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); 2086 } 2087 2088 outs() << "PERF2BOLT: input binary is associated with " 2089 << BinaryMMapInfo.size() << " PID(s)\n"; 2090 2091 LLVM_DEBUG({ 2092 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) 2093 outs() << " " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "") 2094 << ": (0x" << Twine::utohexstr(MMI.second.BaseAddress) << ": 0x" 2095 << Twine::utohexstr(MMI.second.Size) << ")\n"; 2096 }); 2097 2098 return std::error_code(); 2099 } 2100 2101 Optional<std::pair<StringRef, StringRef>> 2102 DataAggregator::parseNameBuildIDPair() { 2103 while (checkAndConsumeFS()) { 2104 } 2105 2106 ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true); 2107 if (std::error_code EC = BuildIDStr.getError()) 2108 return NoneType(); 2109 2110 ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true); 2111 if (std::error_code EC = NameStr.getError()) 2112 return NoneType(); 2113 2114 consumeRestOfLine(); 2115 return std::make_pair(NameStr.get(), BuildIDStr.get()); 2116 } 2117 2118 Optional<StringRef> 2119 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) { 2120 while (hasData()) { 2121 Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair(); 2122 if (!IDPair) 2123 return NoneType(); 2124 2125 if (IDPair->second.startswith(FileBuildID)) 2126 return sys::path::filename(IDPair->first); 2127 } 2128 return NoneType(); 2129 } 2130 2131 std::error_code 2132 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { 2133 std::error_code EC; 2134 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 2135 if (EC) 2136 return EC; 2137 2138 bool WriteMemLocs = false; 2139 2140 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) { 2141 if (WriteMemLocs) 2142 OutFile << (Loc.IsSymbol ? "4 " : "3 "); 2143 else 2144 OutFile << (Loc.IsSymbol ? "1 " : "0 "); 2145 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name)) 2146 << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator; 2147 }; 2148 2149 uint64_t BranchValues = 0; 2150 uint64_t MemValues = 0; 2151 2152 if (BAT) 2153 OutFile << "boltedcollection\n"; 2154 if (opts::BasicAggregation) { 2155 OutFile << "no_lbr"; 2156 for (const StringMapEntry<NoneType> &Entry : EventNames) 2157 OutFile << " " << Entry.getKey(); 2158 OutFile << "\n"; 2159 2160 for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) { 2161 for (const SampleInfo &SI : Func.getValue().Data) { 2162 writeLocation(SI.Loc); 2163 OutFile << SI.Hits << "\n"; 2164 ++BranchValues; 2165 } 2166 } 2167 } else { 2168 for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) { 2169 for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) { 2170 writeLocation(BI.From); 2171 writeLocation(BI.To); 2172 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2173 ++BranchValues; 2174 } 2175 for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) { 2176 // Do not output if source is a known symbol, since this was already 2177 // accounted for in the source function 2178 if (BI.From.IsSymbol) 2179 continue; 2180 writeLocation(BI.From); 2181 writeLocation(BI.To); 2182 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2183 ++BranchValues; 2184 } 2185 } 2186 2187 WriteMemLocs = true; 2188 for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) { 2189 for (const MemInfo &MemEvent : Func.getValue().Data) { 2190 writeLocation(MemEvent.Offset); 2191 writeLocation(MemEvent.Addr); 2192 OutFile << MemEvent.Count << "\n"; 2193 ++MemValues; 2194 } 2195 } 2196 } 2197 2198 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues 2199 << " memory objects to " << OutputFilename << "\n"; 2200 2201 return std::error_code(); 2202 } 2203 2204 void DataAggregator::dump() const { DataReader::dump(); } 2205 2206 void DataAggregator::dump(const LBREntry &LBR) const { 2207 Diag << "From: " << Twine::utohexstr(LBR.From) 2208 << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred 2209 << "\n"; 2210 } 2211 2212 void DataAggregator::dump(const PerfBranchSample &Sample) const { 2213 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n"; 2214 for (const LBREntry &LBR : Sample.LBR) 2215 dump(LBR); 2216 } 2217 2218 void DataAggregator::dump(const PerfMemSample &Sample) const { 2219 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n"; 2220 } 2221