1 //===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "PerfReader.h" 9 #include "ProfileGenerator.h" 10 #include "llvm/Support/FileSystem.h" 11 #include "llvm/Support/Process.h" 12 13 #define DEBUG_TYPE "perf-reader" 14 15 static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden, 16 cl::init(false), cl::ZeroOrMore, 17 cl::desc("Print binary load events.")); 18 19 cl::opt<bool> SkipSymbolization("skip-symbolization", cl::ReallyHidden, 20 cl::init(false), cl::ZeroOrMore, 21 cl::desc("Dump the unsymbolized profile to the " 22 "output file. It will show unwinder " 23 "output for CS profile generation.")); 24 cl::opt<bool> UseOffset("use-offset", cl::ReallyHidden, cl::init(true), 25 cl::ZeroOrMore, 26 cl::desc("Work with `--skip-symbolization` to dump the " 27 "offset instead of virtual address.")); 28 cl::opt<bool> 29 IgnoreStackSamples("ignore-stack-samples", cl::ReallyHidden, 30 cl::init(false), cl::ZeroOrMore, 31 cl::desc("Ignore call stack samples for hybrid samples " 32 "and produce context-insensitive profile.")); 33 34 extern cl::opt<std::string> PerfTraceFilename; 35 extern cl::opt<bool> ShowDisassemblyOnly; 36 extern cl::opt<bool> ShowSourceLocations; 37 extern cl::opt<std::string> OutputFilename; 38 39 namespace llvm { 40 namespace sampleprof { 41 42 void VirtualUnwinder::unwindCall(UnwindState &State) { 43 // The 2nd frame after leaf could be missing if stack sample is 44 // taken when IP is within prolog/epilog, as frame chain isn't 45 // setup yet. Fill in the missing frame in that case. 46 // TODO: Currently we just assume all the addr that can't match the 47 // 2nd frame is in prolog/epilog. In the future, we will switch to 48 // pro/epi tracker(Dwarf CFI) for the precise check. 49 uint64_t Source = State.getCurrentLBRSource(); 50 auto *ParentFrame = State.getParentFrame(); 51 if (ParentFrame == State.getDummyRootPtr() || 52 ParentFrame->Address != Source) { 53 State.switchToFrame(Source); 54 } else { 55 State.popFrame(); 56 } 57 State.InstPtr.update(Source); 58 } 59 60 void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) { 61 InstructionPointer &IP = State.InstPtr; 62 uint64_t Target = State.getCurrentLBRTarget(); 63 uint64_t End = IP.Address; 64 if (Binary->usePseudoProbes()) { 65 // We don't need to top frame probe since it should be extracted 66 // from the range. 67 // The outcome of the virtual unwinding with pseudo probes is a 68 // map from a context key to the address range being unwound. 69 // This means basically linear unwinding is not needed for pseudo 70 // probes. The range will be simply recorded here and will be 71 // converted to a list of pseudo probes to report in ProfileGenerator. 72 State.getParentFrame()->recordRangeCount(Target, End, Repeat); 73 } else { 74 // Unwind linear execution part. 75 // Split and record the range by different inline context. For example: 76 // [0x01] ... main:1 # Target 77 // [0x02] ... main:2 78 // [0x03] ... main:3 @ foo:1 79 // [0x04] ... main:3 @ foo:2 80 // [0x05] ... main:3 @ foo:3 81 // [0x06] ... main:4 82 // [0x07] ... main:5 # End 83 // It will be recorded: 84 // [main:*] : [0x06, 0x07], [0x01, 0x02] 85 // [main:3 @ foo:*] : [0x03, 0x05] 86 while (IP.Address > Target) { 87 uint64_t PrevIP = IP.Address; 88 IP.backward(); 89 // Break into segments for implicit call/return due to inlining 90 bool SameInlinee = Binary->inlineContextEqual(PrevIP, IP.Address); 91 if (!SameInlinee) { 92 State.switchToFrame(PrevIP); 93 State.CurrentLeafFrame->recordRangeCount(PrevIP, End, Repeat); 94 End = IP.Address; 95 } 96 } 97 assert(IP.Address == Target && "The last one must be the target address."); 98 // Record the remaining range, [0x01, 0x02] in the example 99 State.switchToFrame(IP.Address); 100 State.CurrentLeafFrame->recordRangeCount(IP.Address, End, Repeat); 101 } 102 } 103 104 void VirtualUnwinder::unwindReturn(UnwindState &State) { 105 // Add extra frame as we unwind through the return 106 const LBREntry &LBR = State.getCurrentLBR(); 107 uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(LBR.Target); 108 State.switchToFrame(CallAddr); 109 State.pushFrame(LBR.Source); 110 State.InstPtr.update(LBR.Source); 111 } 112 113 void VirtualUnwinder::unwindBranchWithinFrame(UnwindState &State) { 114 // TODO: Tolerate tail call for now, as we may see tail call from libraries. 115 // This is only for intra function branches, excluding tail calls. 116 uint64_t Source = State.getCurrentLBRSource(); 117 State.switchToFrame(Source); 118 State.InstPtr.update(Source); 119 } 120 121 std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() { 122 std::shared_ptr<StringBasedCtxKey> KeyStr = 123 std::make_shared<StringBasedCtxKey>(); 124 KeyStr->Context = Binary->getExpandedContext(Stack, KeyStr->WasLeafInlined); 125 if (KeyStr->Context.empty()) 126 return nullptr; 127 KeyStr->genHashCode(); 128 return KeyStr; 129 } 130 131 std::shared_ptr<ProbeBasedCtxKey> ProbeStack::getContextKey() { 132 std::shared_ptr<ProbeBasedCtxKey> ProbeBasedKey = 133 std::make_shared<ProbeBasedCtxKey>(); 134 for (auto CallProbe : Stack) { 135 ProbeBasedKey->Probes.emplace_back(CallProbe); 136 } 137 CSProfileGenerator::compressRecursionContext<const MCDecodedPseudoProbe *>( 138 ProbeBasedKey->Probes); 139 CSProfileGenerator::trimContext<const MCDecodedPseudoProbe *>( 140 ProbeBasedKey->Probes); 141 142 ProbeBasedKey->genHashCode(); 143 return ProbeBasedKey; 144 } 145 146 template <typename T> 147 void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, 148 T &Stack) { 149 if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty()) 150 return; 151 152 std::shared_ptr<ContextKey> Key = Stack.getContextKey(); 153 if (Key == nullptr) 154 return; 155 auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter()); 156 SampleCounter &SCounter = Ret.first->second; 157 for (auto &Item : Cur->RangeSamples) { 158 uint64_t StartOffset = Binary->virtualAddrToOffset(std::get<0>(Item)); 159 uint64_t EndOffset = Binary->virtualAddrToOffset(std::get<1>(Item)); 160 SCounter.recordRangeCount(StartOffset, EndOffset, std::get<2>(Item)); 161 } 162 163 for (auto &Item : Cur->BranchSamples) { 164 uint64_t SourceOffset = Binary->virtualAddrToOffset(std::get<0>(Item)); 165 uint64_t TargetOffset = Binary->virtualAddrToOffset(std::get<1>(Item)); 166 SCounter.recordBranchCount(SourceOffset, TargetOffset, std::get<2>(Item)); 167 } 168 } 169 170 template <typename T> 171 void VirtualUnwinder::collectSamplesFromFrameTrie( 172 UnwindState::ProfiledFrame *Cur, T &Stack) { 173 if (!Cur->isDummyRoot()) { 174 if (!Stack.pushFrame(Cur)) { 175 // Process truncated context 176 // Start a new traversal ignoring its bottom context 177 T EmptyStack(Binary); 178 collectSamplesFromFrame(Cur, EmptyStack); 179 for (const auto &Item : Cur->Children) { 180 collectSamplesFromFrameTrie(Item.second.get(), EmptyStack); 181 } 182 183 // Keep note of untracked call site and deduplicate them 184 // for warning later. 185 if (!Cur->isLeafFrame()) 186 UntrackedCallsites.insert(Cur->Address); 187 188 return; 189 } 190 } 191 192 collectSamplesFromFrame(Cur, Stack); 193 // Process children frame 194 for (const auto &Item : Cur->Children) { 195 collectSamplesFromFrameTrie(Item.second.get(), Stack); 196 } 197 // Recover the call stack 198 Stack.popFrame(); 199 } 200 201 void VirtualUnwinder::collectSamplesFromFrameTrie( 202 UnwindState::ProfiledFrame *Cur) { 203 if (Binary->usePseudoProbes()) { 204 ProbeStack Stack(Binary); 205 collectSamplesFromFrameTrie<ProbeStack>(Cur, Stack); 206 } else { 207 FrameStack Stack(Binary); 208 collectSamplesFromFrameTrie<FrameStack>(Cur, Stack); 209 } 210 } 211 212 void VirtualUnwinder::recordBranchCount(const LBREntry &Branch, 213 UnwindState &State, uint64_t Repeat) { 214 if (Branch.IsArtificial) 215 return; 216 217 if (Binary->usePseudoProbes()) { 218 // Same as recordRangeCount, We don't need to top frame probe since we will 219 // extract it from branch's source address 220 State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target, 221 Repeat); 222 } else { 223 State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target, 224 Repeat); 225 } 226 } 227 228 bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) { 229 // Capture initial state as starting point for unwinding. 230 UnwindState State(Sample, Binary); 231 232 // Sanity check - making sure leaf of LBR aligns with leaf of stack sample 233 // Stack sample sometimes can be unreliable, so filter out bogus ones. 234 if (!State.validateInitialState()) 235 return false; 236 237 // Also do not attempt linear unwind for the leaf range as it's incomplete. 238 bool IsLeaf = true; 239 240 // Now process the LBR samples in parrallel with stack sample 241 // Note that we do not reverse the LBR entry order so we can 242 // unwind the sample stack as we walk through LBR entries. 243 while (State.hasNextLBR()) { 244 State.checkStateConsistency(); 245 246 // Unwind implicit calls/returns from inlining, along the linear path, 247 // break into smaller sub section each with its own calling context. 248 if (!IsLeaf) { 249 unwindLinear(State, Repeat); 250 } 251 IsLeaf = false; 252 253 // Save the LBR branch before it gets unwound. 254 const LBREntry &Branch = State.getCurrentLBR(); 255 256 if (isCallState(State)) { 257 // Unwind calls - we know we encountered call if LBR overlaps with 258 // transition between leaf the 2nd frame. Note that for calls that 259 // were not in the original stack sample, we should have added the 260 // extra frame when processing the return paired with this call. 261 unwindCall(State); 262 } else if (isReturnState(State)) { 263 // Unwind returns - check whether the IP is indeed at a return instruction 264 unwindReturn(State); 265 } else { 266 // Unwind branches - for regular intra function branches, we only 267 // need to record branch with context. 268 unwindBranchWithinFrame(State); 269 } 270 State.advanceLBR(); 271 // Record `branch` with calling context after unwinding. 272 recordBranchCount(Branch, State, Repeat); 273 } 274 // As samples are aggregated on trie, record them into counter map 275 collectSamplesFromFrameTrie(State.getDummyRootPtr()); 276 277 return true; 278 } 279 280 std::unique_ptr<PerfReaderBase> PerfReaderBase::create(ProfiledBinary *Binary, 281 StringRef PerfInputFile, 282 bool IsPerfData) { 283 // For perf data input, we need to convert them into perf script first. 284 if (IsPerfData) { 285 std::string ConvertedPerfScript = 286 convertPerfDataToTrace(Binary, PerfInputFile); 287 // Let commoand opt own the string for converted perf trace file name 288 PerfTraceFilename = ConvertedPerfScript; 289 PerfInputFile = PerfTraceFilename; 290 } 291 292 PerfScriptType PerfType = checkPerfScriptType(PerfInputFile); 293 std::unique_ptr<PerfReaderBase> PerfReader; 294 if (PerfType == PERF_LBR_STACK) { 295 PerfReader.reset(new HybridPerfReader(Binary, PerfInputFile)); 296 } else if (PerfType == PERF_LBR) { 297 PerfReader.reset(new LBRPerfReader(Binary, PerfInputFile)); 298 } else { 299 exitWithError("Unsupported perfscript!"); 300 } 301 302 return PerfReader; 303 } 304 305 std::string PerfReaderBase::convertPerfDataToTrace(ProfiledBinary *Binary, 306 StringRef PerfData) { 307 // Run perf script to retrieve PIDs matching binary we're interested in. 308 auto PerfExecutable = sys::Process::FindInEnvPath("PATH", "perf"); 309 if (!PerfExecutable) { 310 exitWithError("Perf not found."); 311 } 312 std::string PerfPath = *PerfExecutable; 313 std::string PerfTraceFile = PerfData.str() + ".script.tmp"; 314 StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events", 315 "-F", "comm,pid", "-i", 316 PerfData}; 317 Optional<StringRef> Redirects[] = {llvm::None, // Stdin 318 StringRef(PerfTraceFile), // Stdout 319 StringRef(PerfTraceFile)}; // Stderr 320 sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, llvm::None, Redirects); 321 322 // Collect the PIDs 323 TraceStream TraceIt(PerfTraceFile); 324 std::string PIDs; 325 while (!TraceIt.isAtEoF()) { 326 MMapEvent MMap; 327 if (isMMap2Event(TraceIt.getCurrentLine()) && 328 extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) { 329 if (!PIDs.empty()) { 330 PIDs.append(","); 331 } 332 PIDs.append(utostr(MMap.PID)); 333 } 334 TraceIt.advance(); 335 } 336 337 // Run perf script again to retrieve events for PIDs collected above 338 StringRef ScriptSampleArgs[] = {PerfPath, "script", "--show-mmap-events", 339 "-F", "ip,brstack", "--pid", 340 PIDs, "-i", PerfData}; 341 sys::ExecuteAndWait(PerfPath, ScriptSampleArgs, llvm::None, Redirects); 342 343 return PerfTraceFile; 344 } 345 346 void PerfReaderBase::updateBinaryAddress(const MMapEvent &Event) { 347 // Drop the event which doesn't belong to user-provided binary 348 StringRef BinaryName = llvm::sys::path::filename(Event.BinaryPath); 349 if (Binary->getName() != BinaryName) 350 return; 351 352 // Drop the event if its image is loaded at the same address 353 if (Event.Address == Binary->getBaseAddress()) { 354 Binary->setIsLoadedByMMap(true); 355 return; 356 } 357 358 if (Event.Offset == Binary->getTextSegmentOffset()) { 359 // A binary image could be unloaded and then reloaded at different 360 // place, so update binary load address. 361 // Only update for the first executable segment and assume all other 362 // segments are loaded at consecutive memory addresses, which is the case on 363 // X64. 364 Binary->setBaseAddress(Event.Address); 365 Binary->setIsLoadedByMMap(true); 366 } else { 367 // Verify segments are loaded consecutively. 368 const auto &Offsets = Binary->getTextSegmentOffsets(); 369 auto It = std::lower_bound(Offsets.begin(), Offsets.end(), Event.Offset); 370 if (It != Offsets.end() && *It == Event.Offset) { 371 // The event is for loading a separate executable segment. 372 auto I = std::distance(Offsets.begin(), It); 373 const auto &PreferredAddrs = Binary->getPreferredTextSegmentAddresses(); 374 if (PreferredAddrs[I] - Binary->getPreferredBaseAddress() != 375 Event.Address - Binary->getBaseAddress()) 376 exitWithError("Executable segments not loaded consecutively"); 377 } else { 378 if (It == Offsets.begin()) 379 exitWithError("File offset not found"); 380 else { 381 // Find the segment the event falls in. A large segment could be loaded 382 // via multiple mmap calls with consecutive memory addresses. 383 --It; 384 assert(*It < Event.Offset); 385 if (Event.Offset - *It != Event.Address - Binary->getBaseAddress()) 386 exitWithError("Segment not loaded by consecutive mmaps"); 387 } 388 } 389 } 390 } 391 392 static std::string getContextKeyStr(ContextKey *K, 393 const ProfiledBinary *Binary) { 394 if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(K)) { 395 return SampleContext::getContextString(CtxKey->Context); 396 } else if (const auto *CtxKey = dyn_cast<ProbeBasedCtxKey>(K)) { 397 SampleContextFrameVector ContextStack; 398 for (const auto *Probe : CtxKey->Probes) { 399 Binary->getInlineContextForProbe(Probe, ContextStack, true); 400 } 401 // Probe context key at this point does not have leaf probe, so do not 402 // include the leaf inline location. 403 return SampleContext::getContextString(ContextStack, true); 404 } else { 405 llvm_unreachable("unexpected key type"); 406 } 407 } 408 409 void HybridPerfReader::unwindSamples() { 410 std::set<uint64_t> AllUntrackedCallsites; 411 for (const auto &Item : AggregatedSamples) { 412 const PerfSample *Sample = Item.first.getPtr(); 413 VirtualUnwinder Unwinder(&SampleCounters, Binary); 414 Unwinder.unwind(Sample, Item.second); 415 auto &CurrUntrackedCallsites = Unwinder.getUntrackedCallsites(); 416 AllUntrackedCallsites.insert(CurrUntrackedCallsites.begin(), 417 CurrUntrackedCallsites.end()); 418 } 419 420 // Warn about untracked frames due to missing probes. 421 for (auto Address : AllUntrackedCallsites) 422 WithColor::warning() << "Profile context truncated due to missing probe " 423 << "for call instruction at " 424 << format("0x%" PRIx64, Address) << "\n"; 425 } 426 427 bool PerfReaderBase::extractLBRStack(TraceStream &TraceIt, 428 SmallVectorImpl<LBREntry> &LBRStack) { 429 // The raw format of LBR stack is like: 430 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... 431 // ... 0x4005c8/0x4005dc/P/-/-/0 432 // It's in FIFO order and seperated by whitespace. 433 SmallVector<StringRef, 32> Records; 434 TraceIt.getCurrentLine().split(Records, " ", -1, false); 435 auto WarnInvalidLBR = [](TraceStream &TraceIt) { 436 WithColor::warning() << "Invalid address in LBR record at line " 437 << TraceIt.getLineNumber() << ": " 438 << TraceIt.getCurrentLine() << "\n"; 439 }; 440 441 // Skip the leading instruction pointer. 442 size_t Index = 0; 443 uint64_t LeadingAddr; 444 if (!Records.empty() && Records[0].find('/') == StringRef::npos) { 445 if (Records[0].getAsInteger(16, LeadingAddr)) { 446 WarnInvalidLBR(TraceIt); 447 TraceIt.advance(); 448 return false; 449 } 450 Index = 1; 451 } 452 // Now extract LBR samples - note that we do not reverse the 453 // LBR entry order so we can unwind the sample stack as we walk 454 // through LBR entries. 455 uint64_t PrevTrDst = 0; 456 457 while (Index < Records.size()) { 458 auto &Token = Records[Index++]; 459 if (Token.size() == 0) 460 continue; 461 462 SmallVector<StringRef, 8> Addresses; 463 Token.split(Addresses, "/"); 464 uint64_t Src; 465 uint64_t Dst; 466 467 // Stop at broken LBR records. 468 if (Addresses.size() < 2 || Addresses[0].substr(2).getAsInteger(16, Src) || 469 Addresses[1].substr(2).getAsInteger(16, Dst)) { 470 WarnInvalidLBR(TraceIt); 471 break; 472 } 473 474 bool SrcIsInternal = Binary->addressIsCode(Src); 475 bool DstIsInternal = Binary->addressIsCode(Dst); 476 bool IsExternal = !SrcIsInternal && !DstIsInternal; 477 bool IsIncoming = !SrcIsInternal && DstIsInternal; 478 bool IsOutgoing = SrcIsInternal && !DstIsInternal; 479 bool IsArtificial = false; 480 481 // Ignore branches outside the current binary. Ignore all remaining branches 482 // if there's no incoming branch before the external branch in reverse 483 // order. 484 if (IsExternal) { 485 if (PrevTrDst) 486 continue; 487 else if (!LBRStack.empty()) { 488 WithColor::warning() 489 << "Invalid transfer to external code in LBR record at line " 490 << TraceIt.getLineNumber() << ": " << TraceIt.getCurrentLine() 491 << "\n"; 492 break; 493 } 494 } 495 496 if (IsOutgoing) { 497 if (!PrevTrDst) { 498 // This is unpaired outgoing jump which is likely due to interrupt or 499 // incomplete LBR trace. Ignore current and subsequent entries since 500 // they are likely in different contexts. 501 break; 502 } 503 504 if (Binary->addressIsReturn(Src)) { 505 // In a callback case, a return from internal code, say A, to external 506 // runtime can happen. The external runtime can then call back to 507 // another internal routine, say B. Making an artificial branch that 508 // looks like a return from A to B can confuse the unwinder to treat 509 // the instruction before B as the call instruction. 510 break; 511 } 512 513 // For transition to external code, group the Source with the next 514 // availabe transition target. 515 Dst = PrevTrDst; 516 PrevTrDst = 0; 517 IsArtificial = true; 518 } else { 519 if (PrevTrDst) { 520 // If we have seen an incoming transition from external code to internal 521 // code, but not a following outgoing transition, the incoming 522 // transition is likely due to interrupt which is usually unpaired. 523 // Ignore current and subsequent entries since they are likely in 524 // different contexts. 525 break; 526 } 527 528 if (IsIncoming) { 529 // For transition from external code (such as dynamic libraries) to 530 // the current binary, keep track of the branch target which will be 531 // grouped with the Source of the last transition from the current 532 // binary. 533 PrevTrDst = Dst; 534 continue; 535 } 536 } 537 538 // TODO: filter out buggy duplicate branches on Skylake 539 540 LBRStack.emplace_back(LBREntry(Src, Dst, IsArtificial)); 541 } 542 TraceIt.advance(); 543 return !LBRStack.empty(); 544 } 545 546 bool PerfReaderBase::extractCallstack(TraceStream &TraceIt, 547 SmallVectorImpl<uint64_t> &CallStack) { 548 // The raw format of call stack is like: 549 // 4005dc # leaf frame 550 // 400634 551 // 400684 # root frame 552 // It's in bottom-up order with each frame in one line. 553 554 // Extract stack frames from sample 555 while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) { 556 StringRef FrameStr = TraceIt.getCurrentLine().ltrim(); 557 uint64_t FrameAddr = 0; 558 if (FrameStr.getAsInteger(16, FrameAddr)) { 559 // We might parse a non-perf sample line like empty line and comments, 560 // skip it 561 TraceIt.advance(); 562 return false; 563 } 564 TraceIt.advance(); 565 // Currently intermixed frame from different binaries is not supported. 566 // Ignore caller frames not from binary of interest. 567 if (!Binary->addressIsCode(FrameAddr)) 568 break; 569 570 // We need to translate return address to call address for non-leaf frames. 571 if (!CallStack.empty()) { 572 auto CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr); 573 if (!CallAddr) { 574 // Stop at an invalid return address caused by bad unwinding. This could 575 // happen to frame-pointer-based unwinding and the callee functions that 576 // do not have the frame pointer chain set up. 577 InvalidReturnAddresses.insert(FrameAddr); 578 break; 579 } 580 FrameAddr = CallAddr; 581 } 582 583 CallStack.emplace_back(FrameAddr); 584 } 585 586 // Skip other unrelated line, find the next valid LBR line 587 // Note that even for empty call stack, we should skip the address at the 588 // bottom, otherwise the following pass may generate a truncated callstack 589 while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) { 590 TraceIt.advance(); 591 } 592 // Filter out broken stack sample. We may not have complete frame info 593 // if sample end up in prolog/epilog, the result is dangling context not 594 // connected to entry point. This should be relatively rare thus not much 595 // impact on overall profile quality. However we do want to filter them 596 // out to reduce the number of different calling contexts. One instance 597 // of such case - when sample landed in prolog/epilog, somehow stack 598 // walking will be broken in an unexpected way that higher frames will be 599 // missing. 600 return !CallStack.empty() && 601 !Binary->addressInPrologEpilog(CallStack.front()); 602 } 603 604 void PerfReaderBase::warnIfMissingMMap() { 605 if (!Binary->getMissingMMapWarned() && !Binary->getIsLoadedByMMap()) { 606 WithColor::warning() << "No relevant mmap event is matched for " 607 << Binary->getName() 608 << ", will use preferred address (" 609 << format("0x%" PRIx64, 610 Binary->getPreferredBaseAddress()) 611 << ") as the base loading address!\n"; 612 // Avoid redundant warning, only warn at the first unmatched sample. 613 Binary->setMissingMMapWarned(true); 614 } 615 } 616 617 void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { 618 // The raw hybird sample started with call stack in FILO order and followed 619 // intermediately by LBR sample 620 // e.g. 621 // 4005dc # call stack leaf 622 // 400634 623 // 400684 # call stack root 624 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... 625 // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries 626 // 627 std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>(); 628 629 // Parsing call stack and populate into PerfSample.CallStack 630 if (!extractCallstack(TraceIt, Sample->CallStack)) { 631 // Skip the next LBR line matched current call stack 632 if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) 633 TraceIt.advance(); 634 return; 635 } 636 637 warnIfMissingMMap(); 638 639 if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) { 640 // Parsing LBR stack and populate into PerfSample.LBRStack 641 if (extractLBRStack(TraceIt, Sample->LBRStack)) { 642 // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR 643 // ranges 644 Sample->CallStack.front() = Sample->LBRStack[0].Target; 645 // Record samples by aggregation 646 AggregatedSamples[Hashable<PerfSample>(Sample)] += Count; 647 } 648 } else { 649 // LBR sample is encoded in single line after stack sample 650 exitWithError("'Hybrid perf sample is corrupted, No LBR sample line"); 651 } 652 } 653 654 void PerfReaderBase::writeRawProfile(StringRef Filename) { 655 std::error_code EC; 656 raw_fd_ostream OS(Filename, EC, llvm::sys::fs::OF_TextWithCRLF); 657 if (EC) 658 exitWithError(EC, Filename); 659 writeRawProfile(OS); 660 } 661 662 // Use ordered map to make the output deterministic 663 using OrderedCounterForPrint = std::map<std::string, SampleCounter *>; 664 665 void PerfReaderBase::writeRawProfile(raw_fd_ostream &OS) { 666 /* 667 Format: 668 [context string] 669 number of entries in RangeCounter 670 from_1-to_1:count_1 671 from_2-to_2:count_2 672 ...... 673 from_n-to_n:count_n 674 number of entries in BranchCounter 675 src_1->dst_1:count_1 676 src_2->dst_2:count_2 677 ...... 678 src_n->dst_n:count_n 679 */ 680 681 OrderedCounterForPrint OrderedCounters; 682 for (auto &CI : SampleCounters) { 683 OrderedCounters[getContextKeyStr(CI.first.getPtr(), Binary)] = &CI.second; 684 } 685 686 auto SCounterPrinter = [&](RangeSample Counter, StringRef Separator, 687 uint32_t Indent) { 688 OS.indent(Indent); 689 OS << Counter.size() << "\n"; 690 for (auto I : Counter) { 691 uint64_t Start = UseOffset ? I.first.first 692 : Binary->offsetToVirtualAddr(I.first.first); 693 uint64_t End = UseOffset ? I.first.second 694 : Binary->offsetToVirtualAddr(I.first.second); 695 OS.indent(Indent); 696 OS << Twine::utohexstr(Start) << Separator << Twine::utohexstr(End) << ":" 697 << I.second << "\n"; 698 } 699 }; 700 701 for (auto &CI : OrderedCounters) { 702 uint32_t Indent = 0; 703 if (!CI.first.empty()) { 704 // Context string key 705 OS << "[" << CI.first << "]\n"; 706 Indent = 2; 707 } 708 709 SampleCounter &Counter = *CI.second; 710 SCounterPrinter(Counter.RangeCounter, "-", Indent); 711 SCounterPrinter(Counter.BranchCounter, "->", Indent); 712 } 713 } 714 715 void LBRPerfReader::computeCounterFromLBR(const PerfSample *Sample, 716 uint64_t Repeat) { 717 SampleCounter &Counter = SampleCounters.begin()->second; 718 uint64_t EndOffeset = 0; 719 for (const LBREntry &LBR : Sample->LBRStack) { 720 uint64_t SourceOffset = Binary->virtualAddrToOffset(LBR.Source); 721 uint64_t TargetOffset = Binary->virtualAddrToOffset(LBR.Target); 722 723 if (!LBR.IsArtificial) { 724 Counter.recordBranchCount(SourceOffset, TargetOffset, Repeat); 725 } 726 727 // If this not the first LBR, update the range count between TO of current 728 // LBR and FROM of next LBR. 729 uint64_t StartOffset = TargetOffset; 730 if (EndOffeset != 0) 731 Counter.recordRangeCount(StartOffset, EndOffeset, Repeat); 732 EndOffeset = SourceOffset; 733 } 734 } 735 736 void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { 737 std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>(); 738 // Parsing LBR stack and populate into PerfSample.LBRStack 739 if (extractLBRStack(TraceIt, Sample->LBRStack)) { 740 warnIfMissingMMap(); 741 // Record LBR only samples by aggregation 742 AggregatedSamples[Hashable<PerfSample>(Sample)] += Count; 743 } 744 } 745 746 void LBRPerfReader::generateRawProfile() { 747 // There is no context for LBR only sample, so initialize one entry with 748 // fake "empty" context key. 749 assert(SampleCounters.empty() && 750 "Sample counter map should be empty before raw profile generation"); 751 std::shared_ptr<StringBasedCtxKey> Key = 752 std::make_shared<StringBasedCtxKey>(); 753 Key->genHashCode(); 754 SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter()); 755 for (const auto &Item : AggregatedSamples) { 756 const PerfSample *Sample = Item.first.getPtr(); 757 computeCounterFromLBR(Sample, Item.second); 758 } 759 } 760 761 uint64_t PerfReaderBase::parseAggregatedCount(TraceStream &TraceIt) { 762 // The aggregated count is optional, so do not skip the line and return 1 if 763 // it's unmatched 764 uint64_t Count = 1; 765 if (!TraceIt.getCurrentLine().getAsInteger(10, Count)) 766 TraceIt.advance(); 767 return Count; 768 } 769 770 void PerfReaderBase::parseSample(TraceStream &TraceIt) { 771 uint64_t Count = parseAggregatedCount(TraceIt); 772 assert(Count >= 1 && "Aggregated count should be >= 1!"); 773 parseSample(TraceIt, Count); 774 } 775 776 bool PerfReaderBase::extractMMap2EventForBinary(ProfiledBinary *Binary, 777 StringRef Line, 778 MMapEvent &MMap) { 779 // Parse a line like: 780 // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0 781 // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so 782 constexpr static const char *const Pattern = 783 "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: " 784 "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " 785 "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)"; 786 // Field 0 - whole line 787 // Field 1 - PID 788 // Field 2 - base address 789 // Field 3 - mmapped size 790 // Field 4 - page offset 791 // Field 5 - binary path 792 enum EventIndex { 793 WHOLE_LINE = 0, 794 PID = 1, 795 MMAPPED_ADDRESS = 2, 796 MMAPPED_SIZE = 3, 797 PAGE_OFFSET = 4, 798 BINARY_PATH = 5 799 }; 800 801 Regex RegMmap2(Pattern); 802 SmallVector<StringRef, 6> Fields; 803 bool R = RegMmap2.match(Line, &Fields); 804 if (!R) { 805 std::string ErrorMsg = "Cannot parse mmap event: " + Line.str() + " \n"; 806 exitWithError(ErrorMsg); 807 } 808 Fields[PID].getAsInteger(10, MMap.PID); 809 Fields[MMAPPED_ADDRESS].getAsInteger(0, MMap.Address); 810 Fields[MMAPPED_SIZE].getAsInteger(0, MMap.Size); 811 Fields[PAGE_OFFSET].getAsInteger(0, MMap.Offset); 812 MMap.BinaryPath = Fields[BINARY_PATH]; 813 if (ShowMmapEvents) { 814 outs() << "Mmap: Binary " << MMap.BinaryPath << " loaded at " 815 << format("0x%" PRIx64 ":", MMap.Address) << " \n"; 816 } 817 818 StringRef BinaryName = llvm::sys::path::filename(MMap.BinaryPath); 819 return Binary->getName() == BinaryName; 820 } 821 822 void PerfReaderBase::parseMMap2Event(TraceStream &TraceIt) { 823 MMapEvent MMap; 824 if (extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) 825 updateBinaryAddress(MMap); 826 TraceIt.advance(); 827 } 828 829 void PerfReaderBase::parseEventOrSample(TraceStream &TraceIt) { 830 if (isMMap2Event(TraceIt.getCurrentLine())) 831 parseMMap2Event(TraceIt); 832 else 833 parseSample(TraceIt); 834 } 835 836 void PerfReaderBase::parseAndAggregateTrace() { 837 // Trace line iterator 838 TraceStream TraceIt(PerfTraceFile); 839 while (!TraceIt.isAtEoF()) 840 parseEventOrSample(TraceIt); 841 } 842 843 // A LBR sample is like: 844 // 40062f 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ... 845 // A heuristic for fast detection by checking whether a 846 // leading " 0x" and the '/' exist. 847 bool PerfReaderBase::isLBRSample(StringRef Line) { 848 // Skip the leading instruction pointer 849 SmallVector<StringRef, 32> Records; 850 Line.trim().split(Records, " ", 2, false); 851 if (Records.size() < 2) 852 return false; 853 if (Records[1].startswith("0x") && Records[1].find('/') != StringRef::npos) 854 return true; 855 return false; 856 } 857 858 bool PerfReaderBase::isMMap2Event(StringRef Line) { 859 // Short cut to avoid string find is possible. 860 if (Line.empty() || Line.size() < 50) 861 return false; 862 863 if (std::isdigit(Line[0])) 864 return false; 865 866 // PERF_RECORD_MMAP2 does not appear at the beginning of the line 867 // for ` perf script --show-mmap-events -i ...` 868 return Line.find("PERF_RECORD_MMAP2") != StringRef::npos; 869 } 870 871 // The raw hybird sample is like 872 // e.g. 873 // 4005dc # call stack leaf 874 // 400634 875 // 400684 # call stack root 876 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... 877 // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries 878 // Determine the perfscript contains hybrid samples(call stack + LBRs) by 879 // checking whether there is a non-empty call stack immediately followed by 880 // a LBR sample 881 PerfScriptType PerfReaderBase::checkPerfScriptType(StringRef FileName) { 882 TraceStream TraceIt(FileName); 883 uint64_t FrameAddr = 0; 884 while (!TraceIt.isAtEoF()) { 885 // Skip the aggregated count 886 if (!TraceIt.getCurrentLine().getAsInteger(10, FrameAddr)) 887 TraceIt.advance(); 888 889 // Detect sample with call stack 890 int32_t Count = 0; 891 while (!TraceIt.isAtEoF() && 892 !TraceIt.getCurrentLine().ltrim().getAsInteger(16, FrameAddr)) { 893 Count++; 894 TraceIt.advance(); 895 } 896 if (!TraceIt.isAtEoF()) { 897 if (isLBRSample(TraceIt.getCurrentLine())) { 898 if (Count > 0) 899 return PERF_LBR_STACK; 900 else 901 return PERF_LBR; 902 } 903 TraceIt.advance(); 904 } 905 } 906 907 exitWithError("Invalid perf script input!"); 908 return PERF_INVALID; 909 } 910 911 void HybridPerfReader::generateRawProfile() { 912 ProfileIsCS = !IgnoreStackSamples; 913 if (ProfileIsCS) 914 unwindSamples(); 915 else 916 LBRPerfReader::generateRawProfile(); 917 } 918 919 void PerfReaderBase::warnTruncatedStack() { 920 for (auto Address : InvalidReturnAddresses) { 921 WithColor::warning() 922 << "Truncated stack sample due to invalid return address at " 923 << format("0x%" PRIx64, Address) 924 << ", likely caused by frame pointer omission\n"; 925 } 926 } 927 928 void PerfReaderBase::parsePerfTraces() { 929 // Parse perf traces and do aggregation. 930 parseAndAggregateTrace(); 931 932 // Generate unsymbolized profile. 933 warnTruncatedStack(); 934 generateRawProfile(); 935 936 if (SkipSymbolization) 937 writeRawProfile(OutputFilename); 938 } 939 940 } // end namespace sampleprof 941 } // end namespace llvm 942