1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions reads profile data written by perf record, 10 // aggregate it and then write it back to an output file. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "bolt/Profile/DataAggregator.h" 15 #include "bolt/Core/BinaryContext.h" 16 #include "bolt/Core/BinaryFunction.h" 17 #include "bolt/Profile/BoltAddressTranslation.h" 18 #include "bolt/Profile/Heatmap.h" 19 #include "bolt/Utils/CommandLineOpts.h" 20 #include "bolt/Utils/Utils.h" 21 #include "llvm/ADT/ScopeExit.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/Errc.h" 25 #include "llvm/Support/FileSystem.h" 26 #include "llvm/Support/Process.h" 27 #include "llvm/Support/Program.h" 28 #include "llvm/Support/Regex.h" 29 #include "llvm/Support/Timer.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include <map> 32 #include <unordered_map> 33 #include <utility> 34 35 #define DEBUG_TYPE "aggregator" 36 37 using namespace llvm; 38 using namespace bolt; 39 40 namespace opts { 41 42 static cl::opt<bool> 43 BasicAggregation("nl", 44 cl::desc("aggregate basic samples (without LBR info)"), 45 cl::init(false), 46 cl::ZeroOrMore, 47 cl::cat(AggregatorCategory)); 48 49 static cl::opt<bool> 50 FilterMemProfile("filter-mem-profile", 51 cl::desc("if processing a memory profile, filter out stack or heap accesses " 52 "that won't be useful for BOLT to reduce profile file size"), 53 cl::init(true), 54 cl::cat(AggregatorCategory)); 55 56 static cl::opt<unsigned long long> 57 FilterPID("pid", 58 cl::desc("only use samples from process with specified PID"), 59 cl::init(0), 60 cl::Optional, 61 cl::cat(AggregatorCategory)); 62 63 static cl::opt<bool> 64 IgnoreBuildID("ignore-build-id", 65 cl::desc("continue even if build-ids in input binary and perf.data mismatch"), 66 cl::init(false), 67 cl::cat(AggregatorCategory)); 68 69 static cl::opt<bool> 70 IgnoreInterruptLBR("ignore-interrupt-lbr", 71 cl::desc("ignore kernel interrupt LBR that happens asynchronously"), 72 cl::init(true), 73 cl::ZeroOrMore, 74 cl::cat(AggregatorCategory)); 75 76 static cl::opt<unsigned long long> 77 MaxSamples("max-samples", 78 cl::init(-1ULL), 79 cl::desc("maximum number of samples to read from LBR profile"), 80 cl::Optional, 81 cl::Hidden, 82 cl::cat(AggregatorCategory)); 83 84 static cl::opt<bool> 85 ReadPreAggregated("pa", 86 cl::desc("skip perf and read data from a pre-aggregated file format"), 87 cl::init(false), 88 cl::ZeroOrMore, 89 cl::cat(AggregatorCategory)); 90 91 static cl::opt<bool> 92 TimeAggregator("time-aggr", 93 cl::desc("time BOLT aggregator"), 94 cl::init(false), 95 cl::ZeroOrMore, 96 cl::cat(AggregatorCategory)); 97 98 static cl::opt<bool> 99 UseEventPC("use-event-pc", 100 cl::desc("use event PC in combination with LBR sampling"), 101 cl::init(false), 102 cl::ZeroOrMore, 103 cl::cat(AggregatorCategory)); 104 105 static cl::opt<bool> 106 WriteAutoFDOData("autofdo", 107 cl::desc("generate autofdo textual data instead of bolt data"), 108 cl::init(false), 109 cl::ZeroOrMore, 110 cl::cat(AggregatorCategory)); 111 112 } // namespace opts 113 114 namespace { 115 116 const char TimerGroupName[] = "aggregator"; 117 const char TimerGroupDesc[] = "Aggregator"; 118 119 } 120 121 constexpr uint64_t DataAggregator::KernelBaseAddr; 122 123 DataAggregator::~DataAggregator() { deleteTempFiles(); } 124 125 namespace { 126 void deleteTempFile(const std::string &FileName) { 127 if (std::error_code Errc = sys::fs::remove(FileName.c_str())) 128 errs() << "PERF2BOLT: failed to delete temporary file " << FileName 129 << " with error " << Errc.message() << "\n"; 130 } 131 } 132 133 void DataAggregator::deleteTempFiles() { 134 for (std::string &FileName : TempFiles) 135 deleteTempFile(FileName); 136 TempFiles.clear(); 137 } 138 139 void DataAggregator::findPerfExecutable() { 140 Optional<std::string> PerfExecutable = 141 sys::Process::FindInEnvPath("PATH", "perf"); 142 if (!PerfExecutable) { 143 outs() << "PERF2BOLT: No perf executable found!\n"; 144 exit(1); 145 } 146 PerfPath = *PerfExecutable; 147 } 148 149 void DataAggregator::start() { 150 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; 151 152 // Don't launch perf for pre-aggregated files 153 if (opts::ReadPreAggregated) 154 return; 155 156 findPerfExecutable(); 157 158 if (opts::BasicAggregation) 159 launchPerfProcess("events without LBR", 160 MainEventsPPI, 161 "script -F pid,event,ip", 162 /*Wait = */false); 163 else 164 launchPerfProcess("branch events", 165 MainEventsPPI, 166 "script -F pid,ip,brstack", 167 /*Wait = */false); 168 169 // Note: we launch script for mem events regardless of the option, as the 170 // command fails fairly fast if mem events were not collected. 171 launchPerfProcess("mem events", 172 MemEventsPPI, 173 "script -F pid,event,addr,ip", 174 /*Wait = */false); 175 176 launchPerfProcess("process events", 177 MMapEventsPPI, 178 "script --show-mmap-events", 179 /*Wait = */false); 180 181 launchPerfProcess("task events", 182 TaskEventsPPI, 183 "script --show-task-events", 184 /*Wait = */false); 185 } 186 187 void DataAggregator::abort() { 188 if (opts::ReadPreAggregated) 189 return; 190 191 std::string Error; 192 193 // Kill subprocesses in case they are not finished 194 sys::Wait(TaskEventsPPI.PI, 1, false, &Error); 195 sys::Wait(MMapEventsPPI.PI, 1, false, &Error); 196 sys::Wait(MainEventsPPI.PI, 1, false, &Error); 197 sys::Wait(MemEventsPPI.PI, 1, false, &Error); 198 199 deleteTempFiles(); 200 201 exit(1); 202 } 203 204 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, 205 const char *ArgsString, bool Wait) { 206 SmallVector<StringRef, 4> Argv; 207 208 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n'; 209 Argv.push_back(PerfPath.data()); 210 211 char *WritableArgsString = strdup(ArgsString); 212 char *Str = WritableArgsString; 213 do { 214 Argv.push_back(Str); 215 while (*Str && *Str != ' ') 216 ++Str; 217 if (!*Str) 218 break; 219 *Str++ = 0; 220 } while (true); 221 222 Argv.push_back("-f"); 223 Argv.push_back("-i"); 224 Argv.push_back(Filename.c_str()); 225 226 if (std::error_code Errc = 227 sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) { 228 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath 229 << " with error " << Errc.message() << "\n"; 230 exit(1); 231 } 232 TempFiles.push_back(PPI.StdoutPath.data()); 233 234 if (std::error_code Errc = 235 sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) { 236 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath 237 << " with error " << Errc.message() << "\n"; 238 exit(1); 239 } 240 TempFiles.push_back(PPI.StderrPath.data()); 241 242 Optional<StringRef> Redirects[] = { 243 llvm::None, // Stdin 244 StringRef(PPI.StdoutPath.data()), // Stdout 245 StringRef(PPI.StderrPath.data())}; // Stderr 246 247 LLVM_DEBUG({ 248 dbgs() << "Launching perf: "; 249 for (StringRef Arg : Argv) 250 dbgs() << Arg << " "; 251 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data() 252 << "\n"; 253 }); 254 255 if (Wait) 256 PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv, 257 /*envp*/ llvm::None, Redirects); 258 else 259 PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None, 260 Redirects); 261 262 free(WritableArgsString); 263 } 264 265 void DataAggregator::processFileBuildID(StringRef FileBuildID) { 266 PerfProcessInfo BuildIDProcessInfo; 267 launchPerfProcess("buildid list", 268 BuildIDProcessInfo, 269 "buildid-list", 270 /*Wait = */true); 271 272 if (BuildIDProcessInfo.PI.ReturnCode != 0) { 273 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 274 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data()); 275 StringRef ErrBuf = (*MB)->getBuffer(); 276 277 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode 278 << '\n'; 279 errs() << ErrBuf; 280 return; 281 } 282 283 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 284 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data()); 285 if (std::error_code EC = MB.getError()) { 286 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": " 287 << EC.message() << "\n"; 288 return; 289 } 290 291 FileBuf = std::move(*MB); 292 ParsingBuf = FileBuf->getBuffer(); 293 if (ParsingBuf.empty()) { 294 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf " 295 "data was recorded without it\n"; 296 return; 297 } 298 299 Col = 0; 300 Line = 1; 301 Optional<StringRef> FileName = getFileNameForBuildID(FileBuildID); 302 if (!FileName) { 303 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. " 304 "This indicates the input binary supplied for data aggregation " 305 "is not the same recorded by perf when collecting profiling " 306 "data, or there were no samples recorded for the binary. " 307 "Use -ignore-build-id option to override.\n"; 308 if (!opts::IgnoreBuildID) 309 abort(); 310 } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) { 311 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n"; 312 BuildIDBinaryName = std::string(*FileName); 313 } else { 314 outs() << "PERF2BOLT: matched build-id and file name\n"; 315 } 316 317 return; 318 } 319 320 bool DataAggregator::checkPerfDataMagic(StringRef FileName) { 321 if (opts::ReadPreAggregated) 322 return true; 323 324 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName); 325 if (!FD) 326 return false; 327 328 char Buf[7] = {0, 0, 0, 0, 0, 0, 0}; 329 330 auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); }); 331 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice( 332 *FD, makeMutableArrayRef(Buf, sizeof(Buf)), 0); 333 if (!BytesRead || *BytesRead != 7) 334 return false; 335 336 if (strncmp(Buf, "PERFILE", 7) == 0) 337 return true; 338 return false; 339 } 340 341 void DataAggregator::parsePreAggregated() { 342 std::string Error; 343 344 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 345 MemoryBuffer::getFileOrSTDIN(Filename); 346 if (std::error_code EC = MB.getError()) { 347 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": " 348 << EC.message() << "\n"; 349 exit(1); 350 } 351 352 FileBuf = std::move(*MB); 353 ParsingBuf = FileBuf->getBuffer(); 354 Col = 0; 355 Line = 1; 356 if (parsePreAggregatedLBRSamples()) { 357 errs() << "PERF2BOLT: failed to parse samples\n"; 358 exit(1); 359 } 360 } 361 362 std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) { 363 outs() << "PERF2BOLT: writing data for autofdo tools...\n"; 364 NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName, 365 TimerGroupDesc, opts::TimeAggregator); 366 367 std::error_code EC; 368 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 369 if (EC) 370 return EC; 371 372 // Format: 373 // number of unique traces 374 // from_1-to_1:count_1 375 // from_2-to_2:count_2 376 // ...... 377 // from_n-to_n:count_n 378 // number of unique sample addresses 379 // addr_1:count_1 380 // addr_2:count_2 381 // ...... 382 // addr_n:count_n 383 // number of unique LBR entries 384 // src_1->dst_1:count_1 385 // src_2->dst_2:count_2 386 // ...... 387 // src_n->dst_n:count_n 388 389 const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress; 390 391 // AutoFDO addresses are relative to the first allocated loadable program 392 // segment 393 auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t { 394 if (Address < FirstAllocAddress) 395 return 0; 396 return Address - FirstAllocAddress; 397 }; 398 399 OutFile << FallthroughLBRs.size() << "\n"; 400 for (const auto &AggrLBR : FallthroughLBRs) { 401 const Trace &Trace = AggrLBR.first; 402 const FTInfo &Info = AggrLBR.second; 403 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "-" 404 << Twine::utohexstr(filterAddress(Trace.To)) << ":" 405 << (Info.InternCount + Info.ExternCount) << "\n"; 406 } 407 408 OutFile << BasicSamples.size() << "\n"; 409 for (const auto &Sample : BasicSamples) { 410 uint64_t PC = Sample.first; 411 uint64_t HitCount = Sample.second; 412 OutFile << Twine::utohexstr(filterAddress(PC)) << ":" << HitCount << "\n"; 413 } 414 415 OutFile << BranchLBRs.size() << "\n"; 416 for (const auto &AggrLBR : BranchLBRs) { 417 const Trace &Trace = AggrLBR.first; 418 const BranchInfo &Info = AggrLBR.second; 419 OutFile << Twine::utohexstr(filterAddress(Trace.From)) << "->" 420 << Twine::utohexstr(filterAddress(Trace.To)) << ":" 421 << Info.TakenCount << "\n"; 422 } 423 424 outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, " 425 << BasicSamples.size() << " sample addresses and " << BranchLBRs.size() 426 << " unique branches to " << OutputFilename << "\n"; 427 428 return std::error_code(); 429 } 430 431 void DataAggregator::filterBinaryMMapInfo() { 432 if (opts::FilterPID) { 433 auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID); 434 if (MMapInfoIter != BinaryMMapInfo.end()) { 435 MMapInfo MMap = MMapInfoIter->second; 436 BinaryMMapInfo.clear(); 437 BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap)); 438 } else { 439 if (errs().has_colors()) 440 errs().changeColor(raw_ostream::RED); 441 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \"" 442 << opts::FilterPID << "\"" 443 << " for binary \"" << BC->getFilename() << "\"."; 444 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary"); 445 errs() << " Profile for the following process is available:\n"; 446 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) 447 outs() << " " << MMI.second.PID 448 << (MMI.second.Forked ? " (forked)\n" : "\n"); 449 450 if (errs().has_colors()) 451 errs().resetColor(); 452 453 exit(1); 454 } 455 } 456 } 457 458 Error DataAggregator::preprocessProfile(BinaryContext &BC) { 459 this->BC = &BC; 460 461 if (opts::ReadPreAggregated) { 462 parsePreAggregated(); 463 return Error::success(); 464 } 465 466 if (Optional<StringRef> FileBuildID = BC.getFileBuildID()) { 467 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; 468 processFileBuildID(*FileBuildID); 469 } else { 470 errs() << "BOLT-WARNING: build-id will not be checked because we could " 471 "not read one from input binary\n"; 472 } 473 474 auto prepareToParse = [&](StringRef Name, PerfProcessInfo &Process) { 475 std::string Error; 476 outs() << "PERF2BOLT: waiting for perf " << Name 477 << " collection to finish...\n"; 478 sys::ProcessInfo PI = sys::Wait(Process.PI, 0, true, &Error); 479 480 if (!Error.empty()) { 481 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n"; 482 deleteTempFiles(); 483 exit(1); 484 } 485 486 if (PI.ReturnCode != 0) { 487 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB = 488 MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data()); 489 StringRef ErrBuf = (*ErrorMB)->getBuffer(); 490 491 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n"; 492 errs() << ErrBuf; 493 deleteTempFiles(); 494 exit(1); 495 } 496 497 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 498 MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data()); 499 if (std::error_code EC = MB.getError()) { 500 errs() << "Cannot open " << Process.StdoutPath.data() << ": " 501 << EC.message() << "\n"; 502 deleteTempFiles(); 503 exit(1); 504 } 505 506 FileBuf = std::move(*MB); 507 ParsingBuf = FileBuf->getBuffer(); 508 Col = 0; 509 Line = 1; 510 }; 511 512 if (opts::LinuxKernelMode) { 513 // Current MMap parsing logic does not work with linux kernel. 514 // MMap entries for linux kernel uses PERF_RECORD_MMAP 515 // format instead of typical PERF_RECORD_MMAP2 format. 516 // Since linux kernel address mapping is absolute (same as 517 // in the ELF file), we avoid parsing MMap in linux kernel mode. 518 // While generating optimized linux kernel binary, we may need 519 // to parse MMap entries. 520 521 // In linux kernel mode, we analyze and optimize 522 // all linux kernel binary instructions, irrespective 523 // of whether they are due to system calls or due to 524 // interrupts. Therefore, we cannot ignore interrupt 525 // in Linux kernel mode. 526 opts::IgnoreInterruptLBR = false; 527 } else { 528 prepareToParse("mmap events", MMapEventsPPI); 529 if (parseMMapEvents()) 530 errs() << "PERF2BOLT: failed to parse mmap events\n"; 531 } 532 533 prepareToParse("task events", TaskEventsPPI); 534 if (parseTaskEvents()) 535 errs() << "PERF2BOLT: failed to parse task events\n"; 536 537 filterBinaryMMapInfo(); 538 prepareToParse("events", MainEventsPPI); 539 540 if (opts::HeatmapMode) { 541 if (std::error_code EC = printLBRHeatMap()) { 542 errs() << "ERROR: failed to print heat map: " << EC.message() << '\n'; 543 exit(1); 544 } 545 exit(0); 546 } 547 548 if ((!opts::BasicAggregation && parseBranchEvents()) || 549 (opts::BasicAggregation && parseBasicEvents())) 550 errs() << "PERF2BOLT: failed to parse samples\n"; 551 552 // We can finish early if the goal is just to generate data for autofdo 553 if (opts::WriteAutoFDOData) { 554 if (std::error_code EC = writeAutoFDOData(opts::OutputFilename)) 555 errs() << "Error writing autofdo data to file: " << EC.message() << "\n"; 556 557 deleteTempFiles(); 558 exit(0); 559 } 560 561 // Special handling for memory events 562 std::string Error; 563 sys::ProcessInfo PI = sys::Wait(MemEventsPPI.PI, 0, true, &Error); 564 if (PI.ReturnCode != 0) { 565 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 566 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StderrPath.data()); 567 StringRef ErrBuf = (*MB)->getBuffer(); 568 569 deleteTempFiles(); 570 571 Regex NoData("Samples for '.*' event do not have ADDR attribute set. " 572 "Cannot print 'addr' field."); 573 if (!NoData.match(ErrBuf)) { 574 errs() << "PERF-ERROR: return code " << PI.ReturnCode << "\n"; 575 errs() << ErrBuf; 576 exit(1); 577 } 578 return Error::success(); 579 } 580 581 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 582 MemoryBuffer::getFileOrSTDIN(MemEventsPPI.StdoutPath.data()); 583 if (std::error_code EC = MB.getError()) { 584 errs() << "Cannot open " << MemEventsPPI.StdoutPath.data() << ": " 585 << EC.message() << "\n"; 586 deleteTempFiles(); 587 exit(1); 588 } 589 590 FileBuf = std::move(*MB); 591 ParsingBuf = FileBuf->getBuffer(); 592 Col = 0; 593 Line = 1; 594 if (const std::error_code EC = parseMemEvents()) 595 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message() 596 << '\n'; 597 598 deleteTempFiles(); 599 600 return Error::success(); 601 } 602 603 Error DataAggregator::readProfile(BinaryContext &BC) { 604 processProfile(BC); 605 606 for (auto &BFI : BC.getBinaryFunctions()) { 607 BinaryFunction &Function = BFI.second; 608 convertBranchData(Function); 609 } 610 611 if (opts::AggregateOnly) { 612 if (std::error_code EC = writeAggregatedFile(opts::OutputFilename)) 613 report_error("cannot create output data file", EC); 614 } 615 616 return Error::success(); 617 } 618 619 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { 620 return Function.hasProfileAvailable(); 621 } 622 623 void DataAggregator::processProfile(BinaryContext &BC) { 624 if (opts::ReadPreAggregated) 625 processPreAggregated(); 626 else if (opts::BasicAggregation) 627 processBasicEvents(); 628 else 629 processBranchEvents(); 630 631 processMemEvents(); 632 633 // Mark all functions with registered events as having a valid profile. 634 for (auto &BFI : BC.getBinaryFunctions()) { 635 BinaryFunction &BF = BFI.second; 636 if (getBranchData(BF)) { 637 const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE 638 : BinaryFunction::PF_LBR; 639 BF.markProfiled(Flags); 640 } 641 } 642 643 // Release intermediate storage. 644 clear(BranchLBRs); 645 clear(FallthroughLBRs); 646 clear(AggregatedLBRs); 647 clear(BasicSamples); 648 clear(MemSamples); 649 } 650 651 BinaryFunction * 652 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const { 653 if (!BC->containsAddress(Address)) 654 return nullptr; 655 656 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false, 657 /*UseMaxSize=*/true); 658 } 659 660 StringRef DataAggregator::getLocationName(BinaryFunction &Func, 661 uint64_t Count) { 662 if (!BAT) 663 return Func.getOneName(); 664 665 const BinaryFunction *OrigFunc = &Func; 666 if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) { 667 NumColdSamples += Count; 668 BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr); 669 if (HotFunc) 670 OrigFunc = HotFunc; 671 } 672 // If it is a local function, prefer the name containing the file name where 673 // the local function was declared 674 for (StringRef AlternativeName : OrigFunc->getNames()) { 675 size_t FileNameIdx = AlternativeName.find('/'); 676 // Confirm the alternative name has the pattern Symbol/FileName/1 before 677 // using it 678 if (FileNameIdx == StringRef::npos || 679 AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos) 680 continue; 681 return AlternativeName; 682 } 683 return OrigFunc->getOneName(); 684 } 685 686 bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address, 687 uint64_t Count) { 688 auto I = NamesToSamples.find(Func.getOneName()); 689 if (I == NamesToSamples.end()) { 690 bool Success; 691 StringRef LocName = getLocationName(Func, Count); 692 std::tie(I, Success) = NamesToSamples.insert( 693 std::make_pair(Func.getOneName(), 694 FuncSampleData(LocName, FuncSampleData::ContainerTy()))); 695 } 696 697 Address -= Func.getAddress(); 698 if (BAT) 699 Address = BAT->translate(Func, Address, /*IsBranchSrc=*/false); 700 701 I->second.bumpCount(Address, Count); 702 return true; 703 } 704 705 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From, 706 uint64_t To, uint64_t Count, 707 uint64_t Mispreds) { 708 FuncBranchData *AggrData = getBranchData(Func); 709 if (!AggrData) { 710 AggrData = &NamesToBranches[Func.getOneName()]; 711 AggrData->Name = getLocationName(Func, Count); 712 setBranchData(Func, AggrData); 713 } 714 715 From -= Func.getAddress(); 716 To -= Func.getAddress(); 717 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " << Func.getPrintName() 718 << " @ " << Twine::utohexstr(From) << " -> " 719 << Func.getPrintName() << " @ " << Twine::utohexstr(To) 720 << '\n'); 721 if (BAT) { 722 From = BAT->translate(Func, From, /*IsBranchSrc=*/true); 723 To = BAT->translate(Func, To, /*IsBranchSrc=*/false); 724 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: " 725 << Func.getPrintName() << " @ " << Twine::utohexstr(From) 726 << " -> " << Func.getPrintName() << " @ " 727 << Twine::utohexstr(To) << '\n'); 728 } 729 730 AggrData->bumpBranchCount(From, To, Count, Mispreds); 731 return true; 732 } 733 734 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc, 735 BinaryFunction *ToFunc, uint64_t From, 736 uint64_t To, uint64_t Count, 737 uint64_t Mispreds) { 738 FuncBranchData *FromAggrData = nullptr; 739 FuncBranchData *ToAggrData = nullptr; 740 StringRef SrcFunc; 741 StringRef DstFunc; 742 if (FromFunc) { 743 SrcFunc = getLocationName(*FromFunc, Count); 744 FromAggrData = getBranchData(*FromFunc); 745 if (!FromAggrData) { 746 FromAggrData = &NamesToBranches[FromFunc->getOneName()]; 747 FromAggrData->Name = SrcFunc; 748 setBranchData(*FromFunc, FromAggrData); 749 } 750 From -= FromFunc->getAddress(); 751 if (BAT) 752 From = BAT->translate(*FromFunc, From, /*IsBranchSrc=*/true); 753 754 recordExit(*FromFunc, From, Mispreds, Count); 755 } 756 if (ToFunc) { 757 DstFunc = getLocationName(*ToFunc, 0); 758 ToAggrData = getBranchData(*ToFunc); 759 if (!ToAggrData) { 760 ToAggrData = &NamesToBranches[ToFunc->getOneName()]; 761 ToAggrData->Name = DstFunc; 762 setBranchData(*ToFunc, ToAggrData); 763 } 764 To -= ToFunc->getAddress(); 765 if (BAT) 766 To = BAT->translate(*ToFunc, To, /*IsBranchSrc=*/false); 767 768 recordEntry(*ToFunc, To, Mispreds, Count); 769 } 770 771 if (FromAggrData) 772 FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To), 773 Count, Mispreds); 774 if (ToAggrData) 775 ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To, 776 Count, Mispreds); 777 return true; 778 } 779 780 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, 781 uint64_t Mispreds) { 782 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From); 783 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To); 784 if (!FromFunc && !ToFunc) 785 return false; 786 787 if (FromFunc == ToFunc) { 788 recordBranch(*FromFunc, From - FromFunc->getAddress(), 789 To - FromFunc->getAddress(), Count, Mispreds); 790 return doIntraBranch(*FromFunc, From, To, Count, Mispreds); 791 } 792 793 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds); 794 } 795 796 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, 797 uint64_t Count) { 798 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To); 799 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From); 800 if (!FromFunc || !ToFunc) { 801 LLVM_DEBUG( 802 dbgs() << "Out of range trace starting in " << FromFunc->getPrintName() 803 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 804 << " and ending in " << ToFunc->getPrintName() << " @ " 805 << ToFunc->getPrintName() << " @ " 806 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 807 NumLongRangeTraces += Count; 808 return false; 809 } 810 if (FromFunc != ToFunc) { 811 NumInvalidTraces += Count; 812 LLVM_DEBUG( 813 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 814 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 815 << " and ending in " << ToFunc->getPrintName() << " @ " 816 << ToFunc->getPrintName() << " @ " 817 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 818 return false; 819 } 820 821 Optional<BoltAddressTranslation::FallthroughListTy> FTs = 822 BAT ? BAT->getFallthroughsInTrace(*FromFunc, First.To, Second.From) 823 : getFallthroughsInTrace(*FromFunc, First, Second, Count); 824 if (!FTs) { 825 LLVM_DEBUG( 826 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 827 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 828 << " and ending in " << ToFunc->getPrintName() << " @ " 829 << ToFunc->getPrintName() << " @ " 830 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 831 NumInvalidTraces += Count; 832 return false; 833 } 834 835 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for " 836 << FromFunc->getPrintName() << ":" 837 << Twine::utohexstr(First.To) << " to " 838 << Twine::utohexstr(Second.From) << ".\n"); 839 for (const std::pair<uint64_t, uint64_t> &Pair : *FTs) 840 doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(), 841 Pair.second + FromFunc->getAddress(), Count, false); 842 843 return true; 844 } 845 846 bool DataAggregator::recordTrace( 847 BinaryFunction &BF, 848 const LBREntry &FirstLBR, 849 const LBREntry &SecondLBR, 850 uint64_t Count, 851 SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) const { 852 BinaryContext &BC = BF.getBinaryContext(); 853 854 if (!BF.isSimple()) 855 return false; 856 857 assert(BF.hasCFG() && "can only record traces in CFG state"); 858 859 // Offsets of the trace within this function. 860 const uint64_t From = FirstLBR.To - BF.getAddress(); 861 const uint64_t To = SecondLBR.From - BF.getAddress(); 862 863 if (From > To) 864 return false; 865 866 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); 867 BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); 868 869 if (!FromBB || !ToBB) 870 return false; 871 872 // Adjust FromBB if the first LBR is a return from the last instruction in 873 // the previous block (that instruction should be a call). 874 if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && 875 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { 876 BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1]; 877 if (PrevBB->getSuccessor(FromBB->getLabel())) { 878 const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); 879 if (Instr && BC.MIB->isCall(*Instr)) 880 FromBB = PrevBB; 881 else 882 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR 883 << '\n'); 884 } else { 885 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n'); 886 } 887 } 888 889 // Fill out information for fall-through edges. The From and To could be 890 // within the same basic block, e.g. when two call instructions are in the 891 // same block. In this case we skip the processing. 892 if (FromBB == ToBB) 893 return true; 894 895 // Process blocks in the original layout order. 896 BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()]; 897 assert(BB == FromBB && "index mismatch"); 898 while (BB != ToBB) { 899 BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1]; 900 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout"); 901 902 // Check for bad LBRs. 903 if (!BB->getSuccessor(NextBB->getLabel())) { 904 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n" 905 << " " << FirstLBR << '\n' 906 << " " << SecondLBR << '\n'); 907 return false; 908 } 909 910 // Record fall-through jumps 911 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*NextBB); 912 BI.Count += Count; 913 914 if (Branches) { 915 const MCInst *Instr = BB->getLastNonPseudoInstr(); 916 uint64_t Offset = 0; 917 if (Instr) 918 Offset = BC.MIB->getOffsetWithDefault(*Instr, 0); 919 else 920 Offset = BB->getOffset(); 921 922 Branches->emplace_back(Offset, NextBB->getOffset()); 923 } 924 925 BB = NextBB; 926 } 927 928 return true; 929 } 930 931 Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>> 932 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, 933 const LBREntry &FirstLBR, 934 const LBREntry &SecondLBR, 935 uint64_t Count) const { 936 SmallVector<std::pair<uint64_t, uint64_t>, 16> Res; 937 938 if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res)) 939 return NoneType(); 940 941 return Res; 942 } 943 944 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred, 945 uint64_t Count) const { 946 if (To > BF.getSize()) 947 return false; 948 949 if (!BF.hasProfile()) 950 BF.ExecutionCount = 0; 951 952 BinaryBasicBlock *EntryBB = nullptr; 953 if (To == 0) { 954 BF.ExecutionCount += Count; 955 if (!BF.empty()) 956 EntryBB = &BF.front(); 957 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) { 958 if (BB->isEntryPoint()) 959 EntryBB = BB; 960 } 961 962 if (EntryBB) 963 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count); 964 965 return true; 966 } 967 968 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred, 969 uint64_t Count) const { 970 if (!BF.isSimple() || From > BF.getSize()) 971 return false; 972 973 if (!BF.hasProfile()) 974 BF.ExecutionCount = 0; 975 976 return true; 977 } 978 979 ErrorOr<LBREntry> DataAggregator::parseLBREntry() { 980 LBREntry Res; 981 ErrorOr<StringRef> FromStrRes = parseString('/'); 982 if (std::error_code EC = FromStrRes.getError()) 983 return EC; 984 StringRef OffsetStr = FromStrRes.get(); 985 if (OffsetStr.getAsInteger(0, Res.From)) { 986 reportError("expected hexadecimal number with From address"); 987 Diag << "Found: " << OffsetStr << "\n"; 988 return make_error_code(llvm::errc::io_error); 989 } 990 991 ErrorOr<StringRef> ToStrRes = parseString('/'); 992 if (std::error_code EC = ToStrRes.getError()) 993 return EC; 994 OffsetStr = ToStrRes.get(); 995 if (OffsetStr.getAsInteger(0, Res.To)) { 996 reportError("expected hexadecimal number with To address"); 997 Diag << "Found: " << OffsetStr << "\n"; 998 return make_error_code(llvm::errc::io_error); 999 } 1000 1001 ErrorOr<StringRef> MispredStrRes = parseString('/'); 1002 if (std::error_code EC = MispredStrRes.getError()) 1003 return EC; 1004 StringRef MispredStr = MispredStrRes.get(); 1005 if (MispredStr.size() != 1 || 1006 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) { 1007 reportError("expected single char for mispred bit"); 1008 Diag << "Found: " << MispredStr << "\n"; 1009 return make_error_code(llvm::errc::io_error); 1010 } 1011 Res.Mispred = MispredStr[0] == 'M'; 1012 1013 static bool MispredWarning = true; 1014 if (MispredStr[0] == '-' && MispredWarning) { 1015 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n"; 1016 MispredWarning = false; 1017 } 1018 1019 ErrorOr<StringRef> Rest = parseString(FieldSeparator, true); 1020 if (std::error_code EC = Rest.getError()) 1021 return EC; 1022 if (Rest.get().size() < 5) { 1023 reportError("expected rest of LBR entry"); 1024 Diag << "Found: " << Rest.get() << "\n"; 1025 return make_error_code(llvm::errc::io_error); 1026 } 1027 return Res; 1028 } 1029 1030 bool DataAggregator::checkAndConsumeFS() { 1031 if (ParsingBuf[0] != FieldSeparator) 1032 return false; 1033 1034 ParsingBuf = ParsingBuf.drop_front(1); 1035 Col += 1; 1036 return true; 1037 } 1038 1039 void DataAggregator::consumeRestOfLine() { 1040 size_t LineEnd = ParsingBuf.find_first_of('\n'); 1041 if (LineEnd == StringRef::npos) { 1042 ParsingBuf = StringRef(); 1043 Col = 0; 1044 Line += 1; 1045 return; 1046 } 1047 ParsingBuf = ParsingBuf.drop_front(LineEnd + 1); 1048 Col = 0; 1049 Line += 1; 1050 } 1051 1052 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() { 1053 PerfBranchSample Res; 1054 1055 while (checkAndConsumeFS()) { 1056 } 1057 1058 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1059 if (std::error_code EC = PIDRes.getError()) 1060 return EC; 1061 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1062 if (!opts::LinuxKernelMode && MMapInfoIter == BinaryMMapInfo.end()) { 1063 consumeRestOfLine(); 1064 return make_error_code(errc::no_such_process); 1065 } 1066 1067 while (checkAndConsumeFS()) { 1068 } 1069 1070 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true); 1071 if (std::error_code EC = PCRes.getError()) 1072 return EC; 1073 Res.PC = PCRes.get(); 1074 1075 if (checkAndConsumeNewLine()) 1076 return Res; 1077 1078 while (!checkAndConsumeNewLine()) { 1079 checkAndConsumeFS(); 1080 1081 ErrorOr<LBREntry> LBRRes = parseLBREntry(); 1082 if (std::error_code EC = LBRRes.getError()) 1083 return EC; 1084 LBREntry LBR = LBRRes.get(); 1085 if (ignoreKernelInterrupt(LBR)) 1086 continue; 1087 if (!BC->HasFixedLoadAddress) 1088 adjustLBR(LBR, MMapInfoIter->second); 1089 Res.LBR.push_back(LBR); 1090 } 1091 1092 return Res; 1093 } 1094 1095 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() { 1096 while (checkAndConsumeFS()) { 1097 } 1098 1099 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1100 if (std::error_code EC = PIDRes.getError()) 1101 return EC; 1102 1103 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1104 if (MMapInfoIter == BinaryMMapInfo.end()) { 1105 consumeRestOfLine(); 1106 return PerfBasicSample{StringRef(), 0}; 1107 } 1108 1109 while (checkAndConsumeFS()) { 1110 } 1111 1112 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1113 if (std::error_code EC = Event.getError()) 1114 return EC; 1115 1116 while (checkAndConsumeFS()) { 1117 } 1118 1119 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true); 1120 if (std::error_code EC = AddrRes.getError()) 1121 return EC; 1122 1123 if (!checkAndConsumeNewLine()) { 1124 reportError("expected end of line"); 1125 return make_error_code(llvm::errc::io_error); 1126 } 1127 1128 uint64_t Address = *AddrRes; 1129 if (!BC->HasFixedLoadAddress) 1130 adjustAddress(Address, MMapInfoIter->second); 1131 1132 return PerfBasicSample{Event.get(), Address}; 1133 } 1134 1135 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() { 1136 PerfMemSample Res{0, 0}; 1137 1138 while (checkAndConsumeFS()) { 1139 } 1140 1141 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1142 if (std::error_code EC = PIDRes.getError()) 1143 return EC; 1144 1145 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1146 if (MMapInfoIter == BinaryMMapInfo.end()) { 1147 consumeRestOfLine(); 1148 return Res; 1149 } 1150 1151 while (checkAndConsumeFS()) { 1152 } 1153 1154 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1155 if (std::error_code EC = Event.getError()) 1156 return EC; 1157 if (Event.get().find("mem-loads") == StringRef::npos) { 1158 consumeRestOfLine(); 1159 return Res; 1160 } 1161 1162 while (checkAndConsumeFS()) { 1163 } 1164 1165 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator); 1166 if (std::error_code EC = AddrRes.getError()) 1167 return EC; 1168 1169 while (checkAndConsumeFS()) { 1170 } 1171 1172 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true); 1173 if (std::error_code EC = PCRes.getError()) { 1174 consumeRestOfLine(); 1175 return EC; 1176 } 1177 1178 if (!checkAndConsumeNewLine()) { 1179 reportError("expected end of line"); 1180 return make_error_code(llvm::errc::io_error); 1181 } 1182 1183 uint64_t Address = *AddrRes; 1184 if (!BC->HasFixedLoadAddress) 1185 adjustAddress(Address, MMapInfoIter->second); 1186 1187 return PerfMemSample{PCRes.get(), Address}; 1188 } 1189 1190 ErrorOr<Location> DataAggregator::parseLocationOrOffset() { 1191 auto parseOffset = [this]() -> ErrorOr<Location> { 1192 ErrorOr<uint64_t> Res = parseHexField(FieldSeparator); 1193 if (std::error_code EC = Res.getError()) 1194 return EC; 1195 return Location(Res.get()); 1196 }; 1197 1198 size_t Sep = ParsingBuf.find_first_of(" \n"); 1199 if (Sep == StringRef::npos) 1200 return parseOffset(); 1201 StringRef LookAhead = ParsingBuf.substr(0, Sep); 1202 if (LookAhead.find_first_of(":") == StringRef::npos) 1203 return parseOffset(); 1204 1205 ErrorOr<StringRef> BuildID = parseString(':'); 1206 if (std::error_code EC = BuildID.getError()) 1207 return EC; 1208 ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator); 1209 if (std::error_code EC = Offset.getError()) 1210 return EC; 1211 return Location(true, BuildID.get(), Offset.get()); 1212 } 1213 1214 ErrorOr<DataAggregator::AggregatedLBREntry> 1215 DataAggregator::parseAggregatedLBREntry() { 1216 while (checkAndConsumeFS()) { 1217 } 1218 1219 ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator); 1220 if (std::error_code EC = TypeOrErr.getError()) 1221 return EC; 1222 auto Type = AggregatedLBREntry::BRANCH; 1223 if (TypeOrErr.get() == "B") { 1224 Type = AggregatedLBREntry::BRANCH; 1225 } else if (TypeOrErr.get() == "F") { 1226 Type = AggregatedLBREntry::FT; 1227 } else if (TypeOrErr.get() == "f") { 1228 Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN; 1229 } else { 1230 reportError("expected B, F or f"); 1231 return make_error_code(llvm::errc::io_error); 1232 } 1233 1234 while (checkAndConsumeFS()) { 1235 } 1236 ErrorOr<Location> From = parseLocationOrOffset(); 1237 if (std::error_code EC = From.getError()) 1238 return EC; 1239 1240 while (checkAndConsumeFS()) { 1241 } 1242 ErrorOr<Location> To = parseLocationOrOffset(); 1243 if (std::error_code EC = To.getError()) 1244 return EC; 1245 1246 while (checkAndConsumeFS()) { 1247 } 1248 ErrorOr<int64_t> Frequency = 1249 parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH); 1250 if (std::error_code EC = Frequency.getError()) 1251 return EC; 1252 1253 uint64_t Mispreds = 0; 1254 if (Type == AggregatedLBREntry::BRANCH) { 1255 while (checkAndConsumeFS()) { 1256 } 1257 ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true); 1258 if (std::error_code EC = MispredsOrErr.getError()) 1259 return EC; 1260 Mispreds = static_cast<uint64_t>(MispredsOrErr.get()); 1261 } 1262 1263 if (!checkAndConsumeNewLine()) { 1264 reportError("expected end of line"); 1265 return make_error_code(llvm::errc::io_error); 1266 } 1267 1268 return AggregatedLBREntry{From.get(), To.get(), 1269 static_cast<uint64_t>(Frequency.get()), Mispreds, 1270 Type}; 1271 } 1272 1273 bool DataAggregator::hasData() { 1274 if (ParsingBuf.size() == 0) 1275 return false; 1276 1277 return true; 1278 } 1279 1280 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const { 1281 return opts::IgnoreInterruptLBR && 1282 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr); 1283 } 1284 1285 std::error_code DataAggregator::printLBRHeatMap() { 1286 outs() << "PERF2BOLT: parse branch events...\n"; 1287 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1288 TimerGroupDesc, opts::TimeAggregator); 1289 1290 if (opts::LinuxKernelMode) { 1291 opts::HeatmapMaxAddress = 0xffffffffffffffff; 1292 opts::HeatmapMinAddress = KernelBaseAddr; 1293 } 1294 Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress, 1295 opts::HeatmapMaxAddress); 1296 uint64_t NumTotalSamples = 0; 1297 1298 while (hasData()) { 1299 if (opts::BasicAggregation) { 1300 ErrorOr<PerfBasicSample> SampleRes = parseBasicSample(); 1301 if (std::error_code EC = SampleRes.getError()) { 1302 if (EC == errc::no_such_process) 1303 continue; 1304 return EC; 1305 } 1306 PerfBasicSample &Sample = SampleRes.get(); 1307 HM.registerAddress(Sample.PC); 1308 NumTotalSamples++; 1309 } else { 1310 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1311 if (std::error_code EC = SampleRes.getError()) { 1312 if (EC == errc::no_such_process) 1313 continue; 1314 return EC; 1315 } 1316 1317 PerfBranchSample &Sample = SampleRes.get(); 1318 1319 // LBRs are stored in reverse execution order. NextLBR refers to the next 1320 // executed branch record. 1321 const LBREntry *NextLBR = nullptr; 1322 for (const LBREntry &LBR : Sample.LBR) { 1323 if (NextLBR) { 1324 // Record fall-through trace. 1325 const uint64_t TraceFrom = LBR.To; 1326 const uint64_t TraceTo = NextLBR->From; 1327 ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount; 1328 } 1329 NextLBR = &LBR; 1330 } 1331 if (!Sample.LBR.empty()) { 1332 HM.registerAddress(Sample.LBR.front().To); 1333 HM.registerAddress(Sample.LBR.back().From); 1334 } 1335 NumTotalSamples += Sample.LBR.size(); 1336 } 1337 } 1338 1339 if (!NumTotalSamples) { 1340 if (!opts::BasicAggregation) { 1341 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. " 1342 "Cannot build heatmap. Use -nl for building heatmap from " 1343 "basic events.\n"; 1344 } else { 1345 errs() << "HEATMAP-ERROR: no samples detected in profile. " 1346 "Cannot build heatmap."; 1347 } 1348 exit(1); 1349 } 1350 1351 outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n"; 1352 outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n"; 1353 1354 outs() << "HEATMAP: building heat map...\n"; 1355 1356 for (const auto &LBR : FallthroughLBRs) { 1357 const Trace &Trace = LBR.first; 1358 const FTInfo &Info = LBR.second; 1359 HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount); 1360 } 1361 1362 if (HM.getNumInvalidRanges()) 1363 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n'; 1364 1365 if (!HM.size()) { 1366 errs() << "HEATMAP-ERROR: no valid traces registered\n"; 1367 exit(1); 1368 } 1369 1370 HM.print(opts::OutputFilename); 1371 if (opts::OutputFilename == "-") 1372 HM.printCDF(opts::OutputFilename); 1373 else 1374 HM.printCDF(opts::OutputFilename + ".csv"); 1375 1376 return std::error_code(); 1377 } 1378 1379 std::error_code DataAggregator::parseBranchEvents() { 1380 outs() << "PERF2BOLT: parse branch events...\n"; 1381 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1382 TimerGroupDesc, opts::TimeAggregator); 1383 1384 uint64_t NumTotalSamples = 0; 1385 uint64_t NumEntries = 0; 1386 uint64_t NumSamples = 0; 1387 uint64_t NumSamplesNoLBR = 0; 1388 uint64_t NumTraces = 0; 1389 bool NeedsSkylakeFix = false; 1390 1391 while (hasData() && NumTotalSamples < opts::MaxSamples) { 1392 ++NumTotalSamples; 1393 1394 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1395 if (std::error_code EC = SampleRes.getError()) { 1396 if (EC == errc::no_such_process) 1397 continue; 1398 return EC; 1399 } 1400 ++NumSamples; 1401 1402 PerfBranchSample &Sample = SampleRes.get(); 1403 if (opts::WriteAutoFDOData) 1404 ++BasicSamples[Sample.PC]; 1405 1406 if (Sample.LBR.empty()) { 1407 ++NumSamplesNoLBR; 1408 continue; 1409 } 1410 1411 NumEntries += Sample.LBR.size(); 1412 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) { 1413 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n"; 1414 NeedsSkylakeFix = true; 1415 } 1416 1417 // LBRs are stored in reverse execution order. NextPC refers to the next 1418 // recorded executed PC. 1419 uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0; 1420 uint32_t NumEntry = 0; 1421 for (const LBREntry &LBR : Sample.LBR) { 1422 ++NumEntry; 1423 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries) 1424 // sometimes record entry 32 as an exact copy of entry 31. This will cause 1425 // us to likely record an invalid trace and generate a stale function for 1426 // BAT mode (non BAT disassembles the function and is able to ignore this 1427 // trace at aggregation time). Drop first 2 entries (last two, in 1428 // chronological order) 1429 if (NeedsSkylakeFix && NumEntry <= 2) 1430 continue; 1431 if (NextPC) { 1432 // Record fall-through trace. 1433 const uint64_t TraceFrom = LBR.To; 1434 const uint64_t TraceTo = NextPC; 1435 const BinaryFunction *TraceBF = 1436 getBinaryFunctionContainingAddress(TraceFrom); 1437 if (TraceBF && TraceBF->containsAddress(TraceTo)) { 1438 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; 1439 if (TraceBF->containsAddress(LBR.From)) 1440 ++Info.InternCount; 1441 else 1442 ++Info.ExternCount; 1443 } else { 1444 if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) { 1445 LLVM_DEBUG(dbgs() 1446 << "Invalid trace starting in " 1447 << TraceBF->getPrintName() << " @ " 1448 << Twine::utohexstr(TraceFrom - TraceBF->getAddress()) 1449 << " and ending @ " << Twine::utohexstr(TraceTo) 1450 << '\n'); 1451 ++NumInvalidTraces; 1452 } else { 1453 LLVM_DEBUG(dbgs() 1454 << "Out of range trace starting in " 1455 << (TraceBF ? TraceBF->getPrintName() : "None") << " @ " 1456 << Twine::utohexstr( 1457 TraceFrom - (TraceBF ? TraceBF->getAddress() : 0)) 1458 << " and ending in " 1459 << (getBinaryFunctionContainingAddress(TraceTo) 1460 ? getBinaryFunctionContainingAddress(TraceTo) 1461 ->getPrintName() 1462 : "None") 1463 << " @ " 1464 << Twine::utohexstr( 1465 TraceTo - 1466 (getBinaryFunctionContainingAddress(TraceTo) 1467 ? getBinaryFunctionContainingAddress(TraceTo) 1468 ->getAddress() 1469 : 0)) 1470 << '\n'); 1471 ++NumLongRangeTraces; 1472 } 1473 } 1474 ++NumTraces; 1475 } 1476 NextPC = LBR.From; 1477 1478 uint64_t From = LBR.From; 1479 if (!getBinaryFunctionContainingAddress(From)) 1480 From = 0; 1481 uint64_t To = LBR.To; 1482 if (!getBinaryFunctionContainingAddress(To)) 1483 To = 0; 1484 if (!From && !To) 1485 continue; 1486 BranchInfo &Info = BranchLBRs[Trace(From, To)]; 1487 ++Info.TakenCount; 1488 Info.MispredCount += LBR.Mispred; 1489 } 1490 } 1491 1492 for (const auto &LBR : BranchLBRs) { 1493 const Trace &Trace = LBR.first; 1494 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.From)) 1495 BF->setHasProfileAvailable(); 1496 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Trace.To)) 1497 BF->setHasProfileAvailable(); 1498 } 1499 1500 auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) { 1501 OS << " ("; 1502 if (OS.has_colors()) { 1503 if (Percent > T2) 1504 OS.changeColor(raw_ostream::RED); 1505 else if (Percent > T1) 1506 OS.changeColor(raw_ostream::YELLOW); 1507 else 1508 OS.changeColor(raw_ostream::GREEN); 1509 } 1510 OS << format("%.1f%%", Percent); 1511 if (OS.has_colors()) 1512 OS.resetColor(); 1513 OS << ")"; 1514 }; 1515 1516 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries 1517 << " LBR entries\n"; 1518 if (NumTotalSamples) { 1519 if (NumSamples && NumSamplesNoLBR == NumSamples) { 1520 // Note: we don't know if perf2bolt is being used to parse memory samples 1521 // at this point. In this case, it is OK to parse zero LBRs. 1522 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack " 1523 "LBR. Record profile with perf record -j any or run perf2bolt " 1524 "in no-LBR mode with -nl (the performance improvement in -nl " 1525 "mode may be limited)\n"; 1526 } else { 1527 const uint64_t IgnoredSamples = NumTotalSamples - NumSamples; 1528 const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples; 1529 outs() << "PERF2BOLT: " << IgnoredSamples << " samples"; 1530 printColored(outs(), PercentIgnored, 20, 50); 1531 outs() << " were ignored\n"; 1532 if (PercentIgnored > 50.0f) 1533 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples " 1534 "were attributed to the input binary\n"; 1535 } 1536 } 1537 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1538 << NumInvalidTraces; 1539 float Perc = 0.0f; 1540 if (NumTraces > 0) { 1541 Perc = NumInvalidTraces * 100.0f / NumTraces; 1542 printColored(outs(), Perc, 5, 10); 1543 } 1544 outs() << "\n"; 1545 if (Perc > 10.0f) 1546 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1547 "binary is probably not the same binary used during profiling " 1548 "collection. The generated data may be ineffective for improving " 1549 "performance.\n\n"; 1550 1551 outs() << "PERF2BOLT: out of range traces involving unknown regions: " 1552 << NumLongRangeTraces; 1553 if (NumTraces > 0) 1554 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1555 outs() << "\n"; 1556 1557 if (NumColdSamples > 0) { 1558 const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples; 1559 outs() << "PERF2BOLT: " << NumColdSamples 1560 << format(" (%.1f%%)", ColdSamples) 1561 << " samples recorded in cold regions of split functions.\n"; 1562 if (ColdSamples > 5.0f) 1563 outs() 1564 << "WARNING: The BOLT-processed binary where samples were collected " 1565 "likely used bad data or your service observed a large shift in " 1566 "profile. You may want to audit this.\n"; 1567 } 1568 1569 return std::error_code(); 1570 } 1571 1572 void DataAggregator::processBranchEvents() { 1573 outs() << "PERF2BOLT: processing branch events...\n"; 1574 NamedRegionTimer T("processBranch", "Processing branch events", 1575 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1576 1577 for (const auto &AggrLBR : FallthroughLBRs) { 1578 const Trace &Loc = AggrLBR.first; 1579 const FTInfo &Info = AggrLBR.second; 1580 LBREntry First{Loc.From, Loc.From, false}; 1581 LBREntry Second{Loc.To, Loc.To, false}; 1582 if (Info.InternCount) 1583 doTrace(First, Second, Info.InternCount); 1584 if (Info.ExternCount) { 1585 First.From = 0; 1586 doTrace(First, Second, Info.ExternCount); 1587 } 1588 } 1589 1590 for (const auto &AggrLBR : BranchLBRs) { 1591 const Trace &Loc = AggrLBR.first; 1592 const BranchInfo &Info = AggrLBR.second; 1593 doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount); 1594 } 1595 } 1596 1597 std::error_code DataAggregator::parseBasicEvents() { 1598 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n"; 1599 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName, 1600 TimerGroupDesc, opts::TimeAggregator); 1601 while (hasData()) { 1602 ErrorOr<PerfBasicSample> Sample = parseBasicSample(); 1603 if (std::error_code EC = Sample.getError()) 1604 return EC; 1605 1606 if (!Sample->PC) 1607 continue; 1608 1609 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1610 BF->setHasProfileAvailable(); 1611 1612 ++BasicSamples[Sample->PC]; 1613 EventNames.insert(Sample->EventName); 1614 } 1615 1616 return std::error_code(); 1617 } 1618 1619 void DataAggregator::processBasicEvents() { 1620 outs() << "PERF2BOLT: processing basic events (without LBR)...\n"; 1621 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName, 1622 TimerGroupDesc, opts::TimeAggregator); 1623 uint64_t OutOfRangeSamples = 0; 1624 uint64_t NumSamples = 0; 1625 for (auto &Sample : BasicSamples) { 1626 const uint64_t PC = Sample.first; 1627 const uint64_t HitCount = Sample.second; 1628 NumSamples += HitCount; 1629 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1630 if (!Func) { 1631 OutOfRangeSamples += HitCount; 1632 continue; 1633 } 1634 1635 doSample(*Func, PC, HitCount); 1636 } 1637 outs() << "PERF2BOLT: read " << NumSamples << " samples\n"; 1638 1639 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: " 1640 << OutOfRangeSamples; 1641 float Perc = 0.0f; 1642 if (NumSamples > 0) { 1643 outs() << " ("; 1644 Perc = OutOfRangeSamples * 100.0f / NumSamples; 1645 if (outs().has_colors()) { 1646 if (Perc > 60.0f) 1647 outs().changeColor(raw_ostream::RED); 1648 else if (Perc > 40.0f) 1649 outs().changeColor(raw_ostream::YELLOW); 1650 else 1651 outs().changeColor(raw_ostream::GREEN); 1652 } 1653 outs() << format("%.1f%%", Perc); 1654 if (outs().has_colors()) 1655 outs().resetColor(); 1656 outs() << ")"; 1657 } 1658 outs() << "\n"; 1659 if (Perc > 80.0f) 1660 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1661 "binary is probably not the same binary used during profiling " 1662 "collection. The generated data may be ineffective for improving " 1663 "performance.\n\n"; 1664 } 1665 1666 std::error_code DataAggregator::parseMemEvents() { 1667 outs() << "PERF2BOLT: parsing memory events...\n"; 1668 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName, 1669 TimerGroupDesc, opts::TimeAggregator); 1670 while (hasData()) { 1671 ErrorOr<PerfMemSample> Sample = parseMemSample(); 1672 if (std::error_code EC = Sample.getError()) 1673 return EC; 1674 1675 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1676 BF->setHasProfileAvailable(); 1677 1678 MemSamples.emplace_back(std::move(Sample.get())); 1679 } 1680 1681 return std::error_code(); 1682 } 1683 1684 void DataAggregator::processMemEvents() { 1685 NamedRegionTimer T("ProcessMemEvents", "Processing mem events", 1686 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1687 for (const PerfMemSample &Sample : MemSamples) { 1688 uint64_t PC = Sample.PC; 1689 uint64_t Addr = Sample.Addr; 1690 StringRef FuncName; 1691 StringRef MemName; 1692 1693 // Try to resolve symbol for PC 1694 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1695 if (!Func) { 1696 LLVM_DEBUG(if (PC != 0) { 1697 dbgs() << "Skipped mem event: 0x" << Twine::utohexstr(PC) << " => 0x" 1698 << Twine::utohexstr(Addr) << "\n"; 1699 }); 1700 continue; 1701 } 1702 1703 FuncName = Func->getOneName(); 1704 PC -= Func->getAddress(); 1705 1706 // Try to resolve symbol for memory load 1707 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) { 1708 MemName = BD->getName(); 1709 Addr -= BD->getAddress(); 1710 } else if (opts::FilterMemProfile) { 1711 // Filter out heap/stack accesses 1712 continue; 1713 } 1714 1715 const Location FuncLoc(!FuncName.empty(), FuncName, PC); 1716 const Location AddrLoc(!MemName.empty(), MemName, Addr); 1717 1718 FuncMemData *MemData = &NamesToMemEvents[FuncName]; 1719 setMemData(*Func, MemData); 1720 MemData->update(FuncLoc, AddrLoc); 1721 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n"); 1722 } 1723 } 1724 1725 std::error_code DataAggregator::parsePreAggregatedLBRSamples() { 1726 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; 1727 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", 1728 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1729 while (hasData()) { 1730 ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry(); 1731 if (std::error_code EC = AggrEntry.getError()) 1732 return EC; 1733 1734 if (BinaryFunction *BF = 1735 getBinaryFunctionContainingAddress(AggrEntry->From.Offset)) 1736 BF->setHasProfileAvailable(); 1737 if (BinaryFunction *BF = 1738 getBinaryFunctionContainingAddress(AggrEntry->To.Offset)) 1739 BF->setHasProfileAvailable(); 1740 1741 AggregatedLBRs.emplace_back(std::move(AggrEntry.get())); 1742 } 1743 1744 return std::error_code(); 1745 } 1746 1747 void DataAggregator::processPreAggregated() { 1748 outs() << "PERF2BOLT: processing pre-aggregated profile...\n"; 1749 NamedRegionTimer T("processAggregated", "Processing aggregated branch events", 1750 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1751 1752 uint64_t NumTraces = 0; 1753 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { 1754 switch (AggrEntry.EntryType) { 1755 case AggregatedLBREntry::BRANCH: 1756 doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, 1757 AggrEntry.Mispreds); 1758 break; 1759 case AggregatedLBREntry::FT: 1760 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { 1761 LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT 1762 ? AggrEntry.From.Offset 1763 : 0, 1764 AggrEntry.From.Offset, false}; 1765 LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false}; 1766 doTrace(First, Second, AggrEntry.Count); 1767 NumTraces += AggrEntry.Count; 1768 break; 1769 } 1770 } 1771 } 1772 1773 outs() << "PERF2BOLT: read " << AggregatedLBRs.size() 1774 << " aggregated LBR entries\n"; 1775 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1776 << NumInvalidTraces; 1777 float Perc = 0.0f; 1778 if (NumTraces > 0) { 1779 outs() << " ("; 1780 Perc = NumInvalidTraces * 100.0f / NumTraces; 1781 if (outs().has_colors()) { 1782 if (Perc > 10.0f) 1783 outs().changeColor(raw_ostream::RED); 1784 else if (Perc > 5.0f) 1785 outs().changeColor(raw_ostream::YELLOW); 1786 else 1787 outs().changeColor(raw_ostream::GREEN); 1788 } 1789 outs() << format("%.1f%%", Perc); 1790 if (outs().has_colors()) 1791 outs().resetColor(); 1792 outs() << ")"; 1793 } 1794 outs() << "\n"; 1795 if (Perc > 10.0f) 1796 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1797 "binary is probably not the same binary used during profiling " 1798 "collection. The generated data may be ineffective for improving " 1799 "performance.\n\n"; 1800 1801 outs() << "PERF2BOLT: Out of range traces involving unknown regions: " 1802 << NumLongRangeTraces; 1803 if (NumTraces > 0) 1804 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1805 outs() << "\n"; 1806 } 1807 1808 Optional<int32_t> DataAggregator::parseCommExecEvent() { 1809 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1810 if (LineEnd == StringRef::npos) { 1811 reportError("expected rest of line"); 1812 Diag << "Found: " << ParsingBuf << "\n"; 1813 return NoneType(); 1814 } 1815 StringRef Line = ParsingBuf.substr(0, LineEnd); 1816 1817 size_t Pos = Line.find("PERF_RECORD_COMM exec"); 1818 if (Pos == StringRef::npos) 1819 return NoneType(); 1820 Line = Line.drop_front(Pos); 1821 1822 // Line: 1823 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>" 1824 StringRef PIDStr = Line.rsplit(':').second.split('/').first; 1825 int32_t PID; 1826 if (PIDStr.getAsInteger(10, PID)) { 1827 reportError("expected PID"); 1828 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1829 return NoneType(); 1830 } 1831 1832 return PID; 1833 } 1834 1835 namespace { 1836 Optional<uint64_t> parsePerfTime(const StringRef TimeStr) { 1837 const StringRef SecTimeStr = TimeStr.split('.').first; 1838 const StringRef USecTimeStr = TimeStr.split('.').second; 1839 uint64_t SecTime; 1840 uint64_t USecTime; 1841 if (SecTimeStr.getAsInteger(10, SecTime) || 1842 USecTimeStr.getAsInteger(10, USecTime)) 1843 return NoneType(); 1844 return SecTime * 1000000ULL + USecTime; 1845 } 1846 } 1847 1848 Optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() { 1849 while (checkAndConsumeFS()) { 1850 } 1851 1852 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1853 if (LineEnd == StringRef::npos) { 1854 reportError("expected rest of line"); 1855 Diag << "Found: " << ParsingBuf << "\n"; 1856 return NoneType(); 1857 } 1858 StringRef Line = ParsingBuf.substr(0, LineEnd); 1859 1860 size_t Pos = Line.find("PERF_RECORD_FORK"); 1861 if (Pos == StringRef::npos) { 1862 consumeRestOfLine(); 1863 return NoneType(); 1864 } 1865 1866 ForkInfo FI; 1867 1868 const StringRef TimeStr = 1869 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1870 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) { 1871 FI.Time = *TimeRes; 1872 } 1873 1874 Line = Line.drop_front(Pos); 1875 1876 // Line: 1877 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>) 1878 const StringRef ChildPIDStr = Line.split('(').second.split(':').first; 1879 if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) { 1880 reportError("expected PID"); 1881 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n"; 1882 return NoneType(); 1883 } 1884 1885 const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first; 1886 if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) { 1887 reportError("expected PID"); 1888 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n"; 1889 return NoneType(); 1890 } 1891 1892 consumeRestOfLine(); 1893 1894 return FI; 1895 } 1896 1897 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>> 1898 DataAggregator::parseMMapEvent() { 1899 while (checkAndConsumeFS()) { 1900 } 1901 1902 MMapInfo ParsedInfo; 1903 1904 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1905 if (LineEnd == StringRef::npos) { 1906 reportError("expected rest of line"); 1907 Diag << "Found: " << ParsingBuf << "\n"; 1908 return make_error_code(llvm::errc::io_error); 1909 } 1910 StringRef Line = ParsingBuf.substr(0, LineEnd); 1911 1912 size_t Pos = Line.find("PERF_RECORD_MMAP2"); 1913 if (Pos == StringRef::npos) { 1914 consumeRestOfLine(); 1915 return std::make_pair(StringRef(), ParsedInfo); 1916 } 1917 1918 // Line: 1919 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name> 1920 1921 const StringRef TimeStr = 1922 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1923 if (Optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) 1924 ParsedInfo.Time = *TimeRes; 1925 1926 Line = Line.drop_front(Pos); 1927 1928 // Line: 1929 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name> 1930 1931 StringRef FileName = Line.rsplit(FieldSeparator).second; 1932 if (FileName.startswith("//") || FileName.startswith("[")) { 1933 consumeRestOfLine(); 1934 return std::make_pair(StringRef(), ParsedInfo); 1935 } 1936 FileName = sys::path::filename(FileName); 1937 1938 const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first; 1939 if (PIDStr.getAsInteger(10, ParsedInfo.PID)) { 1940 reportError("expected PID"); 1941 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1942 return make_error_code(llvm::errc::io_error); 1943 } 1944 1945 const StringRef BaseAddressStr = Line.split('[').second.split('(').first; 1946 if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) { 1947 reportError("expected base address"); 1948 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n"; 1949 return make_error_code(llvm::errc::io_error); 1950 } 1951 1952 const StringRef SizeStr = Line.split('(').second.split(')').first; 1953 if (SizeStr.getAsInteger(0, ParsedInfo.Size)) { 1954 reportError("expected mmaped size"); 1955 Diag << "Found: " << SizeStr << "in '" << Line << "'\n"; 1956 return make_error_code(llvm::errc::io_error); 1957 } 1958 1959 const StringRef OffsetStr = 1960 Line.split('@').second.ltrim().split(FieldSeparator).first; 1961 if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) { 1962 reportError("expected mmaped page-aligned offset"); 1963 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n"; 1964 return make_error_code(llvm::errc::io_error); 1965 } 1966 1967 consumeRestOfLine(); 1968 1969 return std::make_pair(FileName, ParsedInfo); 1970 } 1971 1972 std::error_code DataAggregator::parseMMapEvents() { 1973 outs() << "PERF2BOLT: parsing perf-script mmap events output\n"; 1974 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName, 1975 TimerGroupDesc, opts::TimeAggregator); 1976 1977 std::multimap<StringRef, MMapInfo> GlobalMMapInfo; 1978 while (hasData()) { 1979 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent(); 1980 if (std::error_code EC = FileMMapInfoRes.getError()) 1981 return EC; 1982 1983 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get(); 1984 if (FileMMapInfo.second.PID == -1) 1985 continue; 1986 1987 // Consider only the first mapping of the file for any given PID 1988 bool PIDExists = false; 1989 auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first); 1990 for (auto MI = Range.first; MI != Range.second; ++MI) { 1991 if (MI->second.PID == FileMMapInfo.second.PID) { 1992 PIDExists = true; 1993 break; 1994 } 1995 } 1996 if (PIDExists) 1997 continue; 1998 1999 GlobalMMapInfo.insert(FileMMapInfo); 2000 } 2001 2002 LLVM_DEBUG({ 2003 dbgs() << "FileName -> mmap info:\n"; 2004 for (const std::pair<const StringRef, MMapInfo> &Pair : GlobalMMapInfo) 2005 dbgs() << " " << Pair.first << " : " << Pair.second.PID << " [0x" 2006 << Twine::utohexstr(Pair.second.MMapAddress) << ", " 2007 << Twine::utohexstr(Pair.second.Size) << " @ " 2008 << Twine::utohexstr(Pair.second.Offset) << "]\n"; 2009 }); 2010 2011 StringRef NameToUse = llvm::sys::path::filename(BC->getFilename()); 2012 if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) { 2013 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName 2014 << "\" for profile matching\n"; 2015 NameToUse = BuildIDBinaryName; 2016 } 2017 2018 auto Range = GlobalMMapInfo.equal_range(NameToUse); 2019 for (auto I = Range.first; I != Range.second; ++I) { 2020 MMapInfo &MMapInfo = I->second; 2021 if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) { 2022 // Check that the binary mapping matches one of the segments. 2023 bool MatchFound = false; 2024 for (auto &KV : BC->SegmentMapInfo) { 2025 SegmentInfo &SegInfo = KV.second; 2026 // The mapping is page-aligned and hence the MMapAddress could be 2027 // different from the segment start address. We cannot know the page 2028 // size of the mapping, but we know it should not exceed the segment 2029 // alignment value. Hence we are performing an approximate check. 2030 if (SegInfo.Address >= MMapInfo.MMapAddress && 2031 SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment) { 2032 MatchFound = true; 2033 break; 2034 } 2035 } 2036 if (!MatchFound) { 2037 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse 2038 << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n'; 2039 continue; 2040 } 2041 } 2042 2043 // Set base address for shared objects. 2044 if (!BC->HasFixedLoadAddress) { 2045 Optional<uint64_t> BaseAddress = 2046 BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset); 2047 if (!BaseAddress) { 2048 errs() << "PERF2BOLT-WARNING: unable to find base address of the " 2049 "binary when memory mapped at 0x" 2050 << Twine::utohexstr(MMapInfo.MMapAddress) 2051 << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset) 2052 << ". Ignoring profile data for this mapping\n"; 2053 continue; 2054 } else { 2055 MMapInfo.BaseAddress = *BaseAddress; 2056 } 2057 } 2058 2059 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); 2060 } 2061 2062 if (BinaryMMapInfo.empty()) { 2063 if (errs().has_colors()) 2064 errs().changeColor(raw_ostream::RED); 2065 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \"" 2066 << BC->getFilename() << "\"."; 2067 if (!GlobalMMapInfo.empty()) { 2068 errs() << " Profile for the following binary name(s) is available:\n"; 2069 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE; 2070 I = GlobalMMapInfo.upper_bound(I->first)) 2071 errs() << " " << I->first << '\n'; 2072 errs() << "Please rename the input binary.\n"; 2073 } else { 2074 errs() << " Failed to extract any binary name from a profile.\n"; 2075 } 2076 if (errs().has_colors()) 2077 errs().resetColor(); 2078 2079 exit(1); 2080 } 2081 2082 return std::error_code(); 2083 } 2084 2085 std::error_code DataAggregator::parseTaskEvents() { 2086 outs() << "PERF2BOLT: parsing perf-script task events output\n"; 2087 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName, 2088 TimerGroupDesc, opts::TimeAggregator); 2089 2090 while (hasData()) { 2091 if (Optional<int32_t> CommInfo = parseCommExecEvent()) { 2092 // Remove forked child that ran execve 2093 auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo); 2094 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked) 2095 BinaryMMapInfo.erase(MMapInfoIter); 2096 consumeRestOfLine(); 2097 continue; 2098 } 2099 2100 Optional<ForkInfo> ForkInfo = parseForkEvent(); 2101 if (!ForkInfo) 2102 continue; 2103 2104 if (ForkInfo->ParentPID == ForkInfo->ChildPID) 2105 continue; 2106 2107 if (ForkInfo->Time == 0) { 2108 // Process was forked and mmaped before perf ran. In this case the child 2109 // should have its own mmap entry unless it was execve'd. 2110 continue; 2111 } 2112 2113 auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID); 2114 if (MMapInfoIter == BinaryMMapInfo.end()) 2115 continue; 2116 2117 MMapInfo MMapInfo = MMapInfoIter->second; 2118 MMapInfo.PID = ForkInfo->ChildPID; 2119 MMapInfo.Forked = true; 2120 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); 2121 } 2122 2123 outs() << "PERF2BOLT: input binary is associated with " 2124 << BinaryMMapInfo.size() << " PID(s)\n"; 2125 2126 LLVM_DEBUG({ 2127 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) 2128 outs() << " " << MMI.second.PID << (MMI.second.Forked ? " (forked)" : "") 2129 << ": (0x" << Twine::utohexstr(MMI.second.MMapAddress) << ": 0x" 2130 << Twine::utohexstr(MMI.second.Size) << ")\n"; 2131 }); 2132 2133 return std::error_code(); 2134 } 2135 2136 Optional<std::pair<StringRef, StringRef>> 2137 DataAggregator::parseNameBuildIDPair() { 2138 while (checkAndConsumeFS()) { 2139 } 2140 2141 ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true); 2142 if (std::error_code EC = BuildIDStr.getError()) 2143 return NoneType(); 2144 2145 ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true); 2146 if (std::error_code EC = NameStr.getError()) 2147 return NoneType(); 2148 2149 consumeRestOfLine(); 2150 return std::make_pair(NameStr.get(), BuildIDStr.get()); 2151 } 2152 2153 Optional<StringRef> 2154 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) { 2155 while (hasData()) { 2156 Optional<std::pair<StringRef, StringRef>> IDPair = parseNameBuildIDPair(); 2157 if (!IDPair) 2158 return NoneType(); 2159 2160 if (IDPair->second.startswith(FileBuildID)) 2161 return sys::path::filename(IDPair->first); 2162 } 2163 return NoneType(); 2164 } 2165 2166 std::error_code 2167 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { 2168 std::error_code EC; 2169 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 2170 if (EC) 2171 return EC; 2172 2173 bool WriteMemLocs = false; 2174 2175 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) { 2176 if (WriteMemLocs) 2177 OutFile << (Loc.IsSymbol ? "4 " : "3 "); 2178 else 2179 OutFile << (Loc.IsSymbol ? "1 " : "0 "); 2180 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name)) 2181 << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator; 2182 }; 2183 2184 uint64_t BranchValues = 0; 2185 uint64_t MemValues = 0; 2186 2187 if (BAT) 2188 OutFile << "boltedcollection\n"; 2189 if (opts::BasicAggregation) { 2190 OutFile << "no_lbr"; 2191 for (const StringMapEntry<NoneType> &Entry : EventNames) 2192 OutFile << " " << Entry.getKey(); 2193 OutFile << "\n"; 2194 2195 for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) { 2196 for (const SampleInfo &SI : Func.getValue().Data) { 2197 writeLocation(SI.Loc); 2198 OutFile << SI.Hits << "\n"; 2199 ++BranchValues; 2200 } 2201 } 2202 } else { 2203 for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) { 2204 for (const llvm::bolt::BranchInfo &BI : Func.getValue().Data) { 2205 writeLocation(BI.From); 2206 writeLocation(BI.To); 2207 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2208 ++BranchValues; 2209 } 2210 for (const llvm::bolt::BranchInfo &BI : Func.getValue().EntryData) { 2211 // Do not output if source is a known symbol, since this was already 2212 // accounted for in the source function 2213 if (BI.From.IsSymbol) 2214 continue; 2215 writeLocation(BI.From); 2216 writeLocation(BI.To); 2217 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2218 ++BranchValues; 2219 } 2220 } 2221 2222 WriteMemLocs = true; 2223 for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) { 2224 for (const MemInfo &MemEvent : Func.getValue().Data) { 2225 writeLocation(MemEvent.Offset); 2226 writeLocation(MemEvent.Addr); 2227 OutFile << MemEvent.Count << "\n"; 2228 ++MemValues; 2229 } 2230 } 2231 } 2232 2233 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues 2234 << " memory objects to " << OutputFilename << "\n"; 2235 2236 return std::error_code(); 2237 } 2238 2239 void DataAggregator::dump() const { DataReader::dump(); } 2240 2241 void DataAggregator::dump(const LBREntry &LBR) const { 2242 Diag << "From: " << Twine::utohexstr(LBR.From) 2243 << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred 2244 << "\n"; 2245 } 2246 2247 void DataAggregator::dump(const PerfBranchSample &Sample) const { 2248 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n"; 2249 for (const LBREntry &LBR : Sample.LBR) 2250 dump(LBR); 2251 } 2252 2253 void DataAggregator::dump(const PerfMemSample &Sample) const { 2254 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n"; 2255 } 2256