1 //===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "PerfReader.h" 9 #include "ProfileGenerator.h" 10 #include "llvm/Support/FileSystem.h" 11 #include "llvm/Support/Process.h" 12 13 #define DEBUG_TYPE "perf-reader" 14 15 cl::opt<bool> SkipSymbolization("skip-symbolization", cl::init(false), 16 cl::ZeroOrMore, 17 cl::desc("Dump the unsymbolized profile to the " 18 "output file. It will show unwinder " 19 "output for CS profile generation.")); 20 21 static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::init(false), 22 cl::ZeroOrMore, 23 cl::desc("Print binary load events.")); 24 25 static cl::opt<bool> 26 UseOffset("use-offset", cl::init(true), cl::ZeroOrMore, 27 cl::desc("Work with `--skip-symbolization` or " 28 "`--unsymbolized-profile` to write/read the " 29 "offset instead of virtual address.")); 30 static cl::opt<bool> 31 IgnoreStackSamples("ignore-stack-samples", cl::init(false), cl::ZeroOrMore, 32 cl::desc("Ignore call stack samples for hybrid samples " 33 "and produce context-insensitive profile.")); 34 cl::opt<bool> ShowDetailedWarning("show-detailed-warning", cl::init(false), 35 cl::ZeroOrMore, 36 cl::desc("Show detailed warning message.")); 37 38 extern cl::opt<std::string> PerfTraceFilename; 39 extern cl::opt<bool> ShowDisassemblyOnly; 40 extern cl::opt<bool> ShowSourceLocations; 41 extern cl::opt<std::string> OutputFilename; 42 43 namespace llvm { 44 namespace sampleprof { 45 46 void VirtualUnwinder::unwindCall(UnwindState &State) { 47 // The 2nd frame after leaf could be missing if stack sample is 48 // taken when IP is within prolog/epilog, as frame chain isn't 49 // setup yet. Fill in the missing frame in that case. 50 // TODO: Currently we just assume all the addr that can't match the 51 // 2nd frame is in prolog/epilog. In the future, we will switch to 52 // pro/epi tracker(Dwarf CFI) for the precise check. 53 uint64_t Source = State.getCurrentLBRSource(); 54 auto *ParentFrame = State.getParentFrame(); 55 if (ParentFrame == State.getDummyRootPtr() || 56 ParentFrame->Address != Source) { 57 State.switchToFrame(Source); 58 } else { 59 State.popFrame(); 60 } 61 State.InstPtr.update(Source); 62 } 63 64 void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) { 65 InstructionPointer &IP = State.InstPtr; 66 uint64_t Target = State.getCurrentLBRTarget(); 67 uint64_t End = IP.Address; 68 if (Binary->usePseudoProbes()) { 69 // We don't need to top frame probe since it should be extracted 70 // from the range. 71 // The outcome of the virtual unwinding with pseudo probes is a 72 // map from a context key to the address range being unwound. 73 // This means basically linear unwinding is not needed for pseudo 74 // probes. The range will be simply recorded here and will be 75 // converted to a list of pseudo probes to report in ProfileGenerator. 76 State.getParentFrame()->recordRangeCount(Target, End, Repeat); 77 } else { 78 // Unwind linear execution part. 79 // Split and record the range by different inline context. For example: 80 // [0x01] ... main:1 # Target 81 // [0x02] ... main:2 82 // [0x03] ... main:3 @ foo:1 83 // [0x04] ... main:3 @ foo:2 84 // [0x05] ... main:3 @ foo:3 85 // [0x06] ... main:4 86 // [0x07] ... main:5 # End 87 // It will be recorded: 88 // [main:*] : [0x06, 0x07], [0x01, 0x02] 89 // [main:3 @ foo:*] : [0x03, 0x05] 90 while (IP.Address > Target) { 91 uint64_t PrevIP = IP.Address; 92 IP.backward(); 93 // Break into segments for implicit call/return due to inlining 94 bool SameInlinee = Binary->inlineContextEqual(PrevIP, IP.Address); 95 if (!SameInlinee) { 96 State.switchToFrame(PrevIP); 97 State.CurrentLeafFrame->recordRangeCount(PrevIP, End, Repeat); 98 End = IP.Address; 99 } 100 } 101 assert(IP.Address == Target && "The last one must be the target address."); 102 // Record the remaining range, [0x01, 0x02] in the example 103 State.switchToFrame(IP.Address); 104 State.CurrentLeafFrame->recordRangeCount(IP.Address, End, Repeat); 105 } 106 } 107 108 void VirtualUnwinder::unwindReturn(UnwindState &State) { 109 // Add extra frame as we unwind through the return 110 const LBREntry &LBR = State.getCurrentLBR(); 111 uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(LBR.Target); 112 State.switchToFrame(CallAddr); 113 State.pushFrame(LBR.Source); 114 State.InstPtr.update(LBR.Source); 115 } 116 117 void VirtualUnwinder::unwindBranchWithinFrame(UnwindState &State) { 118 // TODO: Tolerate tail call for now, as we may see tail call from libraries. 119 // This is only for intra function branches, excluding tail calls. 120 uint64_t Source = State.getCurrentLBRSource(); 121 State.switchToFrame(Source); 122 State.InstPtr.update(Source); 123 } 124 125 std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() { 126 std::shared_ptr<StringBasedCtxKey> KeyStr = 127 std::make_shared<StringBasedCtxKey>(); 128 KeyStr->Context = Binary->getExpandedContext(Stack, KeyStr->WasLeafInlined); 129 if (KeyStr->Context.empty()) 130 return nullptr; 131 return KeyStr; 132 } 133 134 std::shared_ptr<ProbeBasedCtxKey> ProbeStack::getContextKey() { 135 std::shared_ptr<ProbeBasedCtxKey> ProbeBasedKey = 136 std::make_shared<ProbeBasedCtxKey>(); 137 for (auto CallProbe : Stack) { 138 ProbeBasedKey->Probes.emplace_back(CallProbe); 139 } 140 CSProfileGenerator::compressRecursionContext<const MCDecodedPseudoProbe *>( 141 ProbeBasedKey->Probes); 142 CSProfileGenerator::trimContext<const MCDecodedPseudoProbe *>( 143 ProbeBasedKey->Probes); 144 return ProbeBasedKey; 145 } 146 147 template <typename T> 148 void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, 149 T &Stack) { 150 if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty()) 151 return; 152 153 std::shared_ptr<ContextKey> Key = Stack.getContextKey(); 154 if (Key == nullptr) 155 return; 156 auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter()); 157 SampleCounter &SCounter = Ret.first->second; 158 for (auto &Item : Cur->RangeSamples) { 159 uint64_t StartOffset = Binary->virtualAddrToOffset(std::get<0>(Item)); 160 uint64_t EndOffset = Binary->virtualAddrToOffset(std::get<1>(Item)); 161 SCounter.recordRangeCount(StartOffset, EndOffset, std::get<2>(Item)); 162 } 163 164 for (auto &Item : Cur->BranchSamples) { 165 uint64_t SourceOffset = Binary->virtualAddrToOffset(std::get<0>(Item)); 166 uint64_t TargetOffset = Binary->virtualAddrToOffset(std::get<1>(Item)); 167 SCounter.recordBranchCount(SourceOffset, TargetOffset, std::get<2>(Item)); 168 } 169 } 170 171 template <typename T> 172 void VirtualUnwinder::collectSamplesFromFrameTrie( 173 UnwindState::ProfiledFrame *Cur, T &Stack) { 174 if (!Cur->isDummyRoot()) { 175 if (!Stack.pushFrame(Cur)) { 176 // Process truncated context 177 // Start a new traversal ignoring its bottom context 178 T EmptyStack(Binary); 179 collectSamplesFromFrame(Cur, EmptyStack); 180 for (const auto &Item : Cur->Children) { 181 collectSamplesFromFrameTrie(Item.second.get(), EmptyStack); 182 } 183 184 // Keep note of untracked call site and deduplicate them 185 // for warning later. 186 if (!Cur->isLeafFrame()) 187 UntrackedCallsites.insert(Cur->Address); 188 189 return; 190 } 191 } 192 193 collectSamplesFromFrame(Cur, Stack); 194 // Process children frame 195 for (const auto &Item : Cur->Children) { 196 collectSamplesFromFrameTrie(Item.second.get(), Stack); 197 } 198 // Recover the call stack 199 Stack.popFrame(); 200 } 201 202 void VirtualUnwinder::collectSamplesFromFrameTrie( 203 UnwindState::ProfiledFrame *Cur) { 204 if (Binary->usePseudoProbes()) { 205 ProbeStack Stack(Binary); 206 collectSamplesFromFrameTrie<ProbeStack>(Cur, Stack); 207 } else { 208 FrameStack Stack(Binary); 209 collectSamplesFromFrameTrie<FrameStack>(Cur, Stack); 210 } 211 } 212 213 void VirtualUnwinder::recordBranchCount(const LBREntry &Branch, 214 UnwindState &State, uint64_t Repeat) { 215 if (Branch.IsArtificial) 216 return; 217 218 if (Binary->usePseudoProbes()) { 219 // Same as recordRangeCount, We don't need to top frame probe since we will 220 // extract it from branch's source address 221 State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target, 222 Repeat); 223 } else { 224 State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target, 225 Repeat); 226 } 227 } 228 229 bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) { 230 // Capture initial state as starting point for unwinding. 231 UnwindState State(Sample, Binary); 232 233 // Sanity check - making sure leaf of LBR aligns with leaf of stack sample 234 // Stack sample sometimes can be unreliable, so filter out bogus ones. 235 if (!State.validateInitialState()) 236 return false; 237 238 // Also do not attempt linear unwind for the leaf range as it's incomplete. 239 bool IsLeaf = true; 240 241 // Now process the LBR samples in parrallel with stack sample 242 // Note that we do not reverse the LBR entry order so we can 243 // unwind the sample stack as we walk through LBR entries. 244 while (State.hasNextLBR()) { 245 State.checkStateConsistency(); 246 247 // Unwind implicit calls/returns from inlining, along the linear path, 248 // break into smaller sub section each with its own calling context. 249 if (!IsLeaf) { 250 unwindLinear(State, Repeat); 251 } 252 IsLeaf = false; 253 254 // Save the LBR branch before it gets unwound. 255 const LBREntry &Branch = State.getCurrentLBR(); 256 257 if (isCallState(State)) { 258 // Unwind calls - we know we encountered call if LBR overlaps with 259 // transition between leaf the 2nd frame. Note that for calls that 260 // were not in the original stack sample, we should have added the 261 // extra frame when processing the return paired with this call. 262 unwindCall(State); 263 } else if (isReturnState(State)) { 264 // Unwind returns - check whether the IP is indeed at a return instruction 265 unwindReturn(State); 266 } else { 267 // Unwind branches - for regular intra function branches, we only 268 // need to record branch with context. 269 unwindBranchWithinFrame(State); 270 } 271 State.advanceLBR(); 272 // Record `branch` with calling context after unwinding. 273 recordBranchCount(Branch, State, Repeat); 274 } 275 // As samples are aggregated on trie, record them into counter map 276 collectSamplesFromFrameTrie(State.getDummyRootPtr()); 277 278 return true; 279 } 280 281 std::unique_ptr<PerfReaderBase> 282 PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput) { 283 std::unique_ptr<PerfReaderBase> PerfReader; 284 285 if (PerfInput.Format == PerfFormat::UnsymbolizedProfile) { 286 PerfReader.reset( 287 new UnsymbolizedProfileReader(Binary, PerfInput.InputFile)); 288 return PerfReader; 289 } 290 291 // For perf data input, we need to convert them into perf script first. 292 if (PerfInput.Format == PerfFormat::PerfData) 293 PerfInput = PerfScriptReader::convertPerfDataToTrace(Binary, PerfInput); 294 295 assert((PerfInput.Format == PerfFormat::PerfScript) && 296 "Should be a perfscript!"); 297 298 PerfInput.Content = 299 PerfScriptReader::checkPerfScriptType(PerfInput.InputFile); 300 if (PerfInput.Content == PerfContent::LBRStack) { 301 PerfReader.reset(new HybridPerfReader(Binary, PerfInput.InputFile)); 302 } else if (PerfInput.Content == PerfContent::LBR) { 303 PerfReader.reset(new LBRPerfReader(Binary, PerfInput.InputFile)); 304 } else { 305 exitWithError("Unsupported perfscript!"); 306 } 307 308 return PerfReader; 309 } 310 311 PerfInputFile PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, 312 PerfInputFile &File) { 313 StringRef PerfData = File.InputFile; 314 // Run perf script to retrieve PIDs matching binary we're interested in. 315 auto PerfExecutable = sys::Process::FindInEnvPath("PATH", "perf"); 316 if (!PerfExecutable) { 317 exitWithError("Perf not found."); 318 } 319 std::string PerfPath = *PerfExecutable; 320 std::string PerfTraceFile = PerfData.str() + ".script.tmp"; 321 StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events", 322 "-F", "comm,pid", "-i", 323 PerfData}; 324 Optional<StringRef> Redirects[] = {llvm::None, // Stdin 325 StringRef(PerfTraceFile), // Stdout 326 StringRef(PerfTraceFile)}; // Stderr 327 sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, llvm::None, Redirects); 328 329 // Collect the PIDs 330 TraceStream TraceIt(PerfTraceFile); 331 std::string PIDs; 332 std::unordered_set<uint32_t> PIDSet; 333 while (!TraceIt.isAtEoF()) { 334 MMapEvent MMap; 335 if (isMMap2Event(TraceIt.getCurrentLine()) && 336 extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) { 337 auto It = PIDSet.emplace(MMap.PID); 338 if (It.second) { 339 if (!PIDs.empty()) { 340 PIDs.append(","); 341 } 342 PIDs.append(utostr(MMap.PID)); 343 } 344 } 345 TraceIt.advance(); 346 } 347 348 if (PIDs.empty()) { 349 exitWithError("No relevant mmap event is found in perf data."); 350 } 351 352 // Run perf script again to retrieve events for PIDs collected above 353 StringRef ScriptSampleArgs[] = {PerfPath, "script", "--show-mmap-events", 354 "-F", "ip,brstack", "--pid", 355 PIDs, "-i", PerfData}; 356 sys::ExecuteAndWait(PerfPath, ScriptSampleArgs, llvm::None, Redirects); 357 358 return {PerfTraceFile, PerfFormat::PerfScript, PerfContent::UnknownContent}; 359 } 360 361 void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) { 362 // Drop the event which doesn't belong to user-provided binary 363 StringRef BinaryName = llvm::sys::path::filename(Event.BinaryPath); 364 if (Binary->getName() != BinaryName) 365 return; 366 367 // Drop the event if its image is loaded at the same address 368 if (Event.Address == Binary->getBaseAddress()) { 369 Binary->setIsLoadedByMMap(true); 370 return; 371 } 372 373 if (Event.Offset == Binary->getTextSegmentOffset()) { 374 // A binary image could be unloaded and then reloaded at different 375 // place, so update binary load address. 376 // Only update for the first executable segment and assume all other 377 // segments are loaded at consecutive memory addresses, which is the case on 378 // X64. 379 Binary->setBaseAddress(Event.Address); 380 Binary->setIsLoadedByMMap(true); 381 } else { 382 // Verify segments are loaded consecutively. 383 const auto &Offsets = Binary->getTextSegmentOffsets(); 384 auto It = std::lower_bound(Offsets.begin(), Offsets.end(), Event.Offset); 385 if (It != Offsets.end() && *It == Event.Offset) { 386 // The event is for loading a separate executable segment. 387 auto I = std::distance(Offsets.begin(), It); 388 const auto &PreferredAddrs = Binary->getPreferredTextSegmentAddresses(); 389 if (PreferredAddrs[I] - Binary->getPreferredBaseAddress() != 390 Event.Address - Binary->getBaseAddress()) 391 exitWithError("Executable segments not loaded consecutively"); 392 } else { 393 if (It == Offsets.begin()) 394 exitWithError("File offset not found"); 395 else { 396 // Find the segment the event falls in. A large segment could be loaded 397 // via multiple mmap calls with consecutive memory addresses. 398 --It; 399 assert(*It < Event.Offset); 400 if (Event.Offset - *It != Event.Address - Binary->getBaseAddress()) 401 exitWithError("Segment not loaded by consecutive mmaps"); 402 } 403 } 404 } 405 } 406 407 static std::string getContextKeyStr(ContextKey *K, 408 const ProfiledBinary *Binary) { 409 if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(K)) { 410 return SampleContext::getContextString(CtxKey->Context); 411 } else if (const auto *CtxKey = dyn_cast<ProbeBasedCtxKey>(K)) { 412 SampleContextFrameVector ContextStack; 413 for (const auto *Probe : CtxKey->Probes) { 414 Binary->getInlineContextForProbe(Probe, ContextStack, true); 415 } 416 // Probe context key at this point does not have leaf probe, so do not 417 // include the leaf inline location. 418 return SampleContext::getContextString(ContextStack, true); 419 } else { 420 llvm_unreachable("unexpected key type"); 421 } 422 } 423 424 void HybridPerfReader::unwindSamples() { 425 std::set<uint64_t> AllUntrackedCallsites; 426 for (const auto &Item : AggregatedSamples) { 427 const PerfSample *Sample = Item.first.getPtr(); 428 VirtualUnwinder Unwinder(&SampleCounters, Binary); 429 Unwinder.unwind(Sample, Item.second); 430 auto &CurrUntrackedCallsites = Unwinder.getUntrackedCallsites(); 431 AllUntrackedCallsites.insert(CurrUntrackedCallsites.begin(), 432 CurrUntrackedCallsites.end()); 433 } 434 435 // Warn about untracked frames due to missing probes. 436 if (ShowDetailedWarning) { 437 for (auto Address : AllUntrackedCallsites) 438 WithColor::warning() << "Profile context truncated due to missing probe " 439 << "for call instruction at " 440 << format("0x%" PRIx64, Address) << "\n"; 441 } 442 443 emitWarningSummary(AllUntrackedCallsites.size(), SampleCounters.size(), 444 "of profiled contexts are truncated due to missing probe " 445 "for call instruction."); 446 } 447 448 bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, 449 SmallVectorImpl<LBREntry> &LBRStack) { 450 // The raw format of LBR stack is like: 451 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... 452 // ... 0x4005c8/0x4005dc/P/-/-/0 453 // It's in FIFO order and seperated by whitespace. 454 SmallVector<StringRef, 32> Records; 455 TraceIt.getCurrentLine().split(Records, " ", -1, false); 456 auto WarnInvalidLBR = [](TraceStream &TraceIt) { 457 WithColor::warning() << "Invalid address in LBR record at line " 458 << TraceIt.getLineNumber() << ": " 459 << TraceIt.getCurrentLine() << "\n"; 460 }; 461 462 // Skip the leading instruction pointer. 463 size_t Index = 0; 464 uint64_t LeadingAddr; 465 if (!Records.empty() && !Records[0].contains('/')) { 466 if (Records[0].getAsInteger(16, LeadingAddr)) { 467 WarnInvalidLBR(TraceIt); 468 TraceIt.advance(); 469 return false; 470 } 471 Index = 1; 472 } 473 // Now extract LBR samples - note that we do not reverse the 474 // LBR entry order so we can unwind the sample stack as we walk 475 // through LBR entries. 476 uint64_t PrevTrDst = 0; 477 478 while (Index < Records.size()) { 479 auto &Token = Records[Index++]; 480 if (Token.size() == 0) 481 continue; 482 483 SmallVector<StringRef, 8> Addresses; 484 Token.split(Addresses, "/"); 485 uint64_t Src; 486 uint64_t Dst; 487 488 // Stop at broken LBR records. 489 if (Addresses.size() < 2 || Addresses[0].substr(2).getAsInteger(16, Src) || 490 Addresses[1].substr(2).getAsInteger(16, Dst)) { 491 WarnInvalidLBR(TraceIt); 492 break; 493 } 494 495 bool SrcIsInternal = Binary->addressIsCode(Src); 496 bool DstIsInternal = Binary->addressIsCode(Dst); 497 bool IsExternal = !SrcIsInternal && !DstIsInternal; 498 bool IsIncoming = !SrcIsInternal && DstIsInternal; 499 bool IsOutgoing = SrcIsInternal && !DstIsInternal; 500 bool IsArtificial = false; 501 502 // Ignore branches outside the current binary. Ignore all remaining branches 503 // if there's no incoming branch before the external branch in reverse 504 // order. 505 if (IsExternal) { 506 if (PrevTrDst) 507 continue; 508 if (!LBRStack.empty()) { 509 WithColor::warning() 510 << "Invalid transfer to external code in LBR record at line " 511 << TraceIt.getLineNumber() << ": " << TraceIt.getCurrentLine() 512 << "\n"; 513 } 514 break; 515 } 516 517 if (IsOutgoing) { 518 if (!PrevTrDst) { 519 // This is unpaired outgoing jump which is likely due to interrupt or 520 // incomplete LBR trace. Ignore current and subsequent entries since 521 // they are likely in different contexts. 522 break; 523 } 524 525 if (Binary->addressIsReturn(Src)) { 526 // In a callback case, a return from internal code, say A, to external 527 // runtime can happen. The external runtime can then call back to 528 // another internal routine, say B. Making an artificial branch that 529 // looks like a return from A to B can confuse the unwinder to treat 530 // the instruction before B as the call instruction. 531 break; 532 } 533 534 // For transition to external code, group the Source with the next 535 // availabe transition target. 536 Dst = PrevTrDst; 537 PrevTrDst = 0; 538 IsArtificial = true; 539 } else { 540 if (PrevTrDst) { 541 // If we have seen an incoming transition from external code to internal 542 // code, but not a following outgoing transition, the incoming 543 // transition is likely due to interrupt which is usually unpaired. 544 // Ignore current and subsequent entries since they are likely in 545 // different contexts. 546 break; 547 } 548 549 if (IsIncoming) { 550 // For transition from external code (such as dynamic libraries) to 551 // the current binary, keep track of the branch target which will be 552 // grouped with the Source of the last transition from the current 553 // binary. 554 PrevTrDst = Dst; 555 continue; 556 } 557 } 558 559 // TODO: filter out buggy duplicate branches on Skylake 560 561 LBRStack.emplace_back(LBREntry(Src, Dst, IsArtificial)); 562 } 563 TraceIt.advance(); 564 return !LBRStack.empty(); 565 } 566 567 bool PerfScriptReader::extractCallstack(TraceStream &TraceIt, 568 SmallVectorImpl<uint64_t> &CallStack) { 569 // The raw format of call stack is like: 570 // 4005dc # leaf frame 571 // 400634 572 // 400684 # root frame 573 // It's in bottom-up order with each frame in one line. 574 575 // Extract stack frames from sample 576 while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) { 577 StringRef FrameStr = TraceIt.getCurrentLine().ltrim(); 578 uint64_t FrameAddr = 0; 579 if (FrameStr.getAsInteger(16, FrameAddr)) { 580 // We might parse a non-perf sample line like empty line and comments, 581 // skip it 582 TraceIt.advance(); 583 return false; 584 } 585 TraceIt.advance(); 586 // Currently intermixed frame from different binaries is not supported. 587 // Ignore caller frames not from binary of interest. 588 if (!Binary->addressIsCode(FrameAddr)) 589 break; 590 591 // We need to translate return address to call address for non-leaf frames. 592 if (!CallStack.empty()) { 593 auto CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr); 594 if (!CallAddr) { 595 // Stop at an invalid return address caused by bad unwinding. This could 596 // happen to frame-pointer-based unwinding and the callee functions that 597 // do not have the frame pointer chain set up. 598 InvalidReturnAddresses.insert(FrameAddr); 599 break; 600 } 601 FrameAddr = CallAddr; 602 } 603 604 CallStack.emplace_back(FrameAddr); 605 } 606 607 // Skip other unrelated line, find the next valid LBR line 608 // Note that even for empty call stack, we should skip the address at the 609 // bottom, otherwise the following pass may generate a truncated callstack 610 while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) { 611 TraceIt.advance(); 612 } 613 // Filter out broken stack sample. We may not have complete frame info 614 // if sample end up in prolog/epilog, the result is dangling context not 615 // connected to entry point. This should be relatively rare thus not much 616 // impact on overall profile quality. However we do want to filter them 617 // out to reduce the number of different calling contexts. One instance 618 // of such case - when sample landed in prolog/epilog, somehow stack 619 // walking will be broken in an unexpected way that higher frames will be 620 // missing. 621 return !CallStack.empty() && 622 !Binary->addressInPrologEpilog(CallStack.front()); 623 } 624 625 void PerfScriptReader::warnIfMissingMMap() { 626 if (!Binary->getMissingMMapWarned() && !Binary->getIsLoadedByMMap()) { 627 WithColor::warning() << "No relevant mmap event is matched for " 628 << Binary->getName() 629 << ", will use preferred address (" 630 << format("0x%" PRIx64, 631 Binary->getPreferredBaseAddress()) 632 << ") as the base loading address!\n"; 633 // Avoid redundant warning, only warn at the first unmatched sample. 634 Binary->setMissingMMapWarned(true); 635 } 636 } 637 638 void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { 639 // The raw hybird sample started with call stack in FILO order and followed 640 // intermediately by LBR sample 641 // e.g. 642 // 4005dc # call stack leaf 643 // 400634 644 // 400684 # call stack root 645 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... 646 // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries 647 // 648 std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>(); 649 650 // Parsing call stack and populate into PerfSample.CallStack 651 if (!extractCallstack(TraceIt, Sample->CallStack)) { 652 // Skip the next LBR line matched current call stack 653 if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) 654 TraceIt.advance(); 655 return; 656 } 657 658 warnIfMissingMMap(); 659 660 if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) { 661 // Parsing LBR stack and populate into PerfSample.LBRStack 662 if (extractLBRStack(TraceIt, Sample->LBRStack)) { 663 if (IgnoreStackSamples) { 664 Sample->CallStack.clear(); 665 } else { 666 // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR 667 // ranges 668 Sample->CallStack.front() = Sample->LBRStack[0].Target; 669 } 670 // Record samples by aggregation 671 AggregatedSamples[Hashable<PerfSample>(Sample)] += Count; 672 } 673 } else { 674 // LBR sample is encoded in single line after stack sample 675 exitWithError("'Hybrid perf sample is corrupted, No LBR sample line"); 676 } 677 } 678 679 void PerfScriptReader::writeUnsymbolizedProfile(StringRef Filename) { 680 std::error_code EC; 681 raw_fd_ostream OS(Filename, EC, llvm::sys::fs::OF_TextWithCRLF); 682 if (EC) 683 exitWithError(EC, Filename); 684 writeUnsymbolizedProfile(OS); 685 } 686 687 // Use ordered map to make the output deterministic 688 using OrderedCounterForPrint = std::map<std::string, SampleCounter *>; 689 690 void PerfScriptReader::writeUnsymbolizedProfile(raw_fd_ostream &OS) { 691 OrderedCounterForPrint OrderedCounters; 692 for (auto &CI : SampleCounters) { 693 OrderedCounters[getContextKeyStr(CI.first.getPtr(), Binary)] = &CI.second; 694 } 695 696 auto SCounterPrinter = [&](RangeSample &Counter, StringRef Separator, 697 uint32_t Indent) { 698 OS.indent(Indent); 699 OS << Counter.size() << "\n"; 700 for (auto &I : Counter) { 701 uint64_t Start = UseOffset ? I.first.first 702 : Binary->offsetToVirtualAddr(I.first.first); 703 uint64_t End = UseOffset ? I.first.second 704 : Binary->offsetToVirtualAddr(I.first.second); 705 OS.indent(Indent); 706 OS << Twine::utohexstr(Start) << Separator << Twine::utohexstr(End) << ":" 707 << I.second << "\n"; 708 } 709 }; 710 711 for (auto &CI : OrderedCounters) { 712 uint32_t Indent = 0; 713 if (ProfileIsCS) { 714 // Context string key 715 OS << "[" << CI.first << "]\n"; 716 Indent = 2; 717 } 718 719 SampleCounter &Counter = *CI.second; 720 SCounterPrinter(Counter.RangeCounter, "-", Indent); 721 SCounterPrinter(Counter.BranchCounter, "->", Indent); 722 } 723 } 724 725 // Format of input: 726 // number of entries in RangeCounter 727 // from_1-to_1:count_1 728 // from_2-to_2:count_2 729 // ...... 730 // from_n-to_n:count_n 731 // number of entries in BranchCounter 732 // src_1->dst_1:count_1 733 // src_2->dst_2:count_2 734 // ...... 735 // src_n->dst_n:count_n 736 void UnsymbolizedProfileReader::readSampleCounters(TraceStream &TraceIt, 737 SampleCounter &SCounters) { 738 auto exitWithErrorForTraceLine = [](TraceStream &TraceIt) { 739 std::string Msg = TraceIt.isAtEoF() 740 ? "Invalid raw profile!" 741 : "Invalid raw profile at line " + 742 Twine(TraceIt.getLineNumber()).str() + ": " + 743 TraceIt.getCurrentLine().str(); 744 exitWithError(Msg); 745 }; 746 auto ReadNumber = [&](uint64_t &Num) { 747 if (TraceIt.isAtEoF()) 748 exitWithErrorForTraceLine(TraceIt); 749 if (TraceIt.getCurrentLine().ltrim().getAsInteger(10, Num)) 750 exitWithErrorForTraceLine(TraceIt); 751 TraceIt.advance(); 752 }; 753 754 auto ReadCounter = [&](RangeSample &Counter, StringRef Separator) { 755 uint64_t Num = 0; 756 ReadNumber(Num); 757 while (Num--) { 758 if (TraceIt.isAtEoF()) 759 exitWithErrorForTraceLine(TraceIt); 760 StringRef Line = TraceIt.getCurrentLine().ltrim(); 761 762 uint64_t Count = 0; 763 auto LineSplit = Line.split(":"); 764 if (LineSplit.second.empty() || LineSplit.second.getAsInteger(10, Count)) 765 exitWithErrorForTraceLine(TraceIt); 766 767 uint64_t Source = 0; 768 uint64_t Target = 0; 769 auto Range = LineSplit.first.split(Separator); 770 if (Range.second.empty() || Range.first.getAsInteger(16, Source) || 771 Range.second.getAsInteger(16, Target)) 772 exitWithErrorForTraceLine(TraceIt); 773 774 if (!UseOffset) { 775 Source = Binary->virtualAddrToOffset(Source); 776 Target = Binary->virtualAddrToOffset(Target); 777 } 778 779 Counter[{Source, Target}] += Count; 780 TraceIt.advance(); 781 } 782 }; 783 784 ReadCounter(SCounters.RangeCounter, "-"); 785 ReadCounter(SCounters.BranchCounter, "->"); 786 } 787 788 void UnsymbolizedProfileReader::readUnsymbolizedProfile(StringRef FileName) { 789 TraceStream TraceIt(FileName); 790 while (!TraceIt.isAtEoF()) { 791 std::shared_ptr<StringBasedCtxKey> Key = 792 std::make_shared<StringBasedCtxKey>(); 793 StringRef Line = TraceIt.getCurrentLine(); 794 // Read context stack for CS profile. 795 if (Line.startswith("[")) { 796 ProfileIsCS = true; 797 auto I = ContextStrSet.insert(Line.str()); 798 SampleContext::createCtxVectorFromStr(*I.first, Key->Context); 799 TraceIt.advance(); 800 } 801 auto Ret = 802 SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter()); 803 readSampleCounters(TraceIt, Ret.first->second); 804 } 805 } 806 807 void UnsymbolizedProfileReader::parsePerfTraces() { 808 readUnsymbolizedProfile(PerfTraceFile); 809 } 810 811 void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample, 812 uint64_t Repeat) { 813 SampleCounter &Counter = SampleCounters.begin()->second; 814 uint64_t EndOffeset = 0; 815 for (const LBREntry &LBR : Sample->LBRStack) { 816 uint64_t SourceOffset = Binary->virtualAddrToOffset(LBR.Source); 817 uint64_t TargetOffset = Binary->virtualAddrToOffset(LBR.Target); 818 819 if (!LBR.IsArtificial) { 820 Counter.recordBranchCount(SourceOffset, TargetOffset, Repeat); 821 } 822 823 // If this not the first LBR, update the range count between TO of current 824 // LBR and FROM of next LBR. 825 uint64_t StartOffset = TargetOffset; 826 if (EndOffeset != 0) 827 Counter.recordRangeCount(StartOffset, EndOffeset, Repeat); 828 EndOffeset = SourceOffset; 829 } 830 } 831 832 void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { 833 std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>(); 834 // Parsing LBR stack and populate into PerfSample.LBRStack 835 if (extractLBRStack(TraceIt, Sample->LBRStack)) { 836 warnIfMissingMMap(); 837 // Record LBR only samples by aggregation 838 AggregatedSamples[Hashable<PerfSample>(Sample)] += Count; 839 } 840 } 841 842 void PerfScriptReader::generateUnsymbolizedProfile() { 843 // There is no context for LBR only sample, so initialize one entry with 844 // fake "empty" context key. 845 assert(SampleCounters.empty() && 846 "Sample counter map should be empty before raw profile generation"); 847 std::shared_ptr<StringBasedCtxKey> Key = 848 std::make_shared<StringBasedCtxKey>(); 849 SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter()); 850 for (const auto &Item : AggregatedSamples) { 851 const PerfSample *Sample = Item.first.getPtr(); 852 computeCounterFromLBR(Sample, Item.second); 853 } 854 } 855 856 uint64_t PerfScriptReader::parseAggregatedCount(TraceStream &TraceIt) { 857 // The aggregated count is optional, so do not skip the line and return 1 if 858 // it's unmatched 859 uint64_t Count = 1; 860 if (!TraceIt.getCurrentLine().getAsInteger(10, Count)) 861 TraceIt.advance(); 862 return Count; 863 } 864 865 void PerfScriptReader::parseSample(TraceStream &TraceIt) { 866 uint64_t Count = parseAggregatedCount(TraceIt); 867 assert(Count >= 1 && "Aggregated count should be >= 1!"); 868 parseSample(TraceIt, Count); 869 } 870 871 bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary, 872 StringRef Line, 873 MMapEvent &MMap) { 874 // Parse a line like: 875 // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0 876 // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so 877 constexpr static const char *const Pattern = 878 "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: " 879 "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " 880 "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)"; 881 // Field 0 - whole line 882 // Field 1 - PID 883 // Field 2 - base address 884 // Field 3 - mmapped size 885 // Field 4 - page offset 886 // Field 5 - binary path 887 enum EventIndex { 888 WHOLE_LINE = 0, 889 PID = 1, 890 MMAPPED_ADDRESS = 2, 891 MMAPPED_SIZE = 3, 892 PAGE_OFFSET = 4, 893 BINARY_PATH = 5 894 }; 895 896 Regex RegMmap2(Pattern); 897 SmallVector<StringRef, 6> Fields; 898 bool R = RegMmap2.match(Line, &Fields); 899 if (!R) { 900 std::string ErrorMsg = "Cannot parse mmap event: " + Line.str() + " \n"; 901 exitWithError(ErrorMsg); 902 } 903 Fields[PID].getAsInteger(10, MMap.PID); 904 Fields[MMAPPED_ADDRESS].getAsInteger(0, MMap.Address); 905 Fields[MMAPPED_SIZE].getAsInteger(0, MMap.Size); 906 Fields[PAGE_OFFSET].getAsInteger(0, MMap.Offset); 907 MMap.BinaryPath = Fields[BINARY_PATH]; 908 if (ShowMmapEvents) { 909 outs() << "Mmap: Binary " << MMap.BinaryPath << " loaded at " 910 << format("0x%" PRIx64 ":", MMap.Address) << " \n"; 911 } 912 913 StringRef BinaryName = llvm::sys::path::filename(MMap.BinaryPath); 914 return Binary->getName() == BinaryName; 915 } 916 917 void PerfScriptReader::parseMMap2Event(TraceStream &TraceIt) { 918 MMapEvent MMap; 919 if (extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) 920 updateBinaryAddress(MMap); 921 TraceIt.advance(); 922 } 923 924 void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) { 925 if (isMMap2Event(TraceIt.getCurrentLine())) 926 parseMMap2Event(TraceIt); 927 else 928 parseSample(TraceIt); 929 } 930 931 void PerfScriptReader::parseAndAggregateTrace() { 932 // Trace line iterator 933 TraceStream TraceIt(PerfTraceFile); 934 while (!TraceIt.isAtEoF()) 935 parseEventOrSample(TraceIt); 936 } 937 938 // A LBR sample is like: 939 // 40062f 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ... 940 // A heuristic for fast detection by checking whether a 941 // leading " 0x" and the '/' exist. 942 bool PerfScriptReader::isLBRSample(StringRef Line) { 943 // Skip the leading instruction pointer 944 SmallVector<StringRef, 32> Records; 945 Line.trim().split(Records, " ", 2, false); 946 if (Records.size() < 2) 947 return false; 948 if (Records[1].startswith("0x") && Records[1].contains('/')) 949 return true; 950 return false; 951 } 952 953 bool PerfScriptReader::isMMap2Event(StringRef Line) { 954 // Short cut to avoid string find is possible. 955 if (Line.empty() || Line.size() < 50) 956 return false; 957 958 if (std::isdigit(Line[0])) 959 return false; 960 961 // PERF_RECORD_MMAP2 does not appear at the beginning of the line 962 // for ` perf script --show-mmap-events -i ...` 963 return Line.contains("PERF_RECORD_MMAP2"); 964 } 965 966 // The raw hybird sample is like 967 // e.g. 968 // 4005dc # call stack leaf 969 // 400634 970 // 400684 # call stack root 971 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... 972 // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries 973 // Determine the perfscript contains hybrid samples(call stack + LBRs) by 974 // checking whether there is a non-empty call stack immediately followed by 975 // a LBR sample 976 PerfContent PerfScriptReader::checkPerfScriptType(StringRef FileName) { 977 TraceStream TraceIt(FileName); 978 uint64_t FrameAddr = 0; 979 while (!TraceIt.isAtEoF()) { 980 // Skip the aggregated count 981 if (!TraceIt.getCurrentLine().getAsInteger(10, FrameAddr)) 982 TraceIt.advance(); 983 984 // Detect sample with call stack 985 int32_t Count = 0; 986 while (!TraceIt.isAtEoF() && 987 !TraceIt.getCurrentLine().ltrim().getAsInteger(16, FrameAddr)) { 988 Count++; 989 TraceIt.advance(); 990 } 991 if (!TraceIt.isAtEoF()) { 992 if (isLBRSample(TraceIt.getCurrentLine())) { 993 if (Count > 0) 994 return PerfContent::LBRStack; 995 else 996 return PerfContent::LBR; 997 } 998 TraceIt.advance(); 999 } 1000 } 1001 1002 exitWithError("Invalid perf script input!"); 1003 return PerfContent::UnknownContent; 1004 } 1005 1006 void HybridPerfReader::generateUnsymbolizedProfile() { 1007 ProfileIsCS = !IgnoreStackSamples; 1008 if (ProfileIsCS) 1009 unwindSamples(); 1010 else 1011 PerfScriptReader::generateUnsymbolizedProfile(); 1012 } 1013 1014 void PerfScriptReader::warnTruncatedStack() { 1015 if (ShowDetailedWarning) { 1016 for (auto Address : InvalidReturnAddresses) { 1017 WithColor::warning() 1018 << "Truncated stack sample due to invalid return address at " 1019 << format("0x%" PRIx64, Address) 1020 << ", likely caused by frame pointer omission\n"; 1021 } 1022 } 1023 emitWarningSummary( 1024 InvalidReturnAddresses.size(), AggregatedSamples.size(), 1025 "of truncated stack samples due to invalid return address, " 1026 "likely caused by frame pointer omission."); 1027 } 1028 1029 void PerfScriptReader::warnInvalidRange() { 1030 std::unordered_map<std::pair<uint64_t, uint64_t>, uint64_t, 1031 pair_hash<uint64_t, uint64_t>> 1032 Ranges; 1033 1034 for (const auto &Item : AggregatedSamples) { 1035 const PerfSample *Sample = Item.first.getPtr(); 1036 uint64_t Count = Item.second; 1037 uint64_t EndOffeset = 0; 1038 for (const LBREntry &LBR : Sample->LBRStack) { 1039 uint64_t SourceOffset = Binary->virtualAddrToOffset(LBR.Source); 1040 uint64_t StartOffset = Binary->virtualAddrToOffset(LBR.Target); 1041 if (EndOffeset != 0) 1042 Ranges[{StartOffset, EndOffeset}] += Count; 1043 EndOffeset = SourceOffset; 1044 } 1045 } 1046 1047 if (Ranges.empty()) { 1048 WithColor::warning() << "No samples in perf script!\n"; 1049 return; 1050 } 1051 1052 auto WarnInvalidRange = 1053 [&](uint64_t StartOffset, uint64_t EndOffset, StringRef Msg) { 1054 if (!ShowDetailedWarning) 1055 return; 1056 WithColor::warning() 1057 << "[" 1058 << format("%8" PRIx64, Binary->offsetToVirtualAddr(StartOffset)) 1059 << "," 1060 << format("%8" PRIx64, Binary->offsetToVirtualAddr(EndOffset)) 1061 << "]: " << Msg << "\n"; 1062 }; 1063 1064 const char *EndNotBoundaryMsg = "Range is not on instruction boundary, " 1065 "likely due to profile and binary mismatch."; 1066 const char *DanglingRangeMsg = "Range does not belong to any functions, " 1067 "likely from PLT, .init or .fini section."; 1068 const char *RangeCrossFuncMsg = 1069 "Fall through range should not cross function boundaries, likely due to " 1070 "profile and binary mismatch."; 1071 1072 uint64_t InstNotBoundary = 0; 1073 uint64_t UnmatchedRange = 0; 1074 uint64_t RangeCrossFunc = 0; 1075 1076 for (auto &I : Ranges) { 1077 uint64_t StartOffset = I.first.first; 1078 uint64_t EndOffset = I.first.second; 1079 1080 if (!Binary->offsetIsCode(StartOffset) || 1081 !Binary->offsetIsTransfer(EndOffset)) { 1082 InstNotBoundary++; 1083 WarnInvalidRange(StartOffset, EndOffset, EndNotBoundaryMsg); 1084 } 1085 1086 auto *FRange = Binary->findFuncRangeForOffset(StartOffset); 1087 if (!FRange) { 1088 UnmatchedRange++; 1089 WarnInvalidRange(StartOffset, EndOffset, DanglingRangeMsg); 1090 continue; 1091 } 1092 1093 if (EndOffset >= FRange->EndOffset) { 1094 RangeCrossFunc++; 1095 WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg); 1096 } 1097 } 1098 1099 uint64_t TotalRangeNum = Ranges.size(); 1100 emitWarningSummary(InstNotBoundary, TotalRangeNum, 1101 "of profiled ranges are not on instruction boundary."); 1102 emitWarningSummary(UnmatchedRange, TotalRangeNum, 1103 "of profiled ranges do not belong to any functions."); 1104 emitWarningSummary(RangeCrossFunc, TotalRangeNum, 1105 "of profiled ranges do cross function boundaries."); 1106 } 1107 1108 void PerfScriptReader::parsePerfTraces() { 1109 // Parse perf traces and do aggregation. 1110 parseAndAggregateTrace(); 1111 1112 // Generate unsymbolized profile. 1113 warnTruncatedStack(); 1114 warnInvalidRange(); 1115 generateUnsymbolizedProfile(); 1116 1117 if (SkipSymbolization) 1118 writeUnsymbolizedProfile(OutputFilename); 1119 } 1120 1121 } // end namespace sampleprof 1122 } // end namespace llvm 1123