1 //===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "PerfReader.h" 9 #include "ProfileGenerator.h" 10 #include "llvm/Support/FileSystem.h" 11 12 static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden, 13 cl::init(false), cl::ZeroOrMore, 14 cl::desc("Print binary load events.")); 15 16 static cl::opt<bool> ShowUnwinderOutput("show-unwinder-output", 17 cl::ReallyHidden, cl::init(false), 18 cl::ZeroOrMore, 19 cl::desc("Print unwinder output")); 20 21 extern cl::opt<bool> ShowDisassemblyOnly; 22 extern cl::opt<bool> ShowSourceLocations; 23 24 namespace llvm { 25 namespace sampleprof { 26 27 void VirtualUnwinder::unwindCall(UnwindState &State) { 28 // The 2nd frame after leaf could be missing if stack sample is 29 // taken when IP is within prolog/epilog, as frame chain isn't 30 // setup yet. Fill in the missing frame in that case. 31 // TODO: Currently we just assume all the addr that can't match the 32 // 2nd frame is in prolog/epilog. In the future, we will switch to 33 // pro/epi tracker(Dwarf CFI) for the precise check. 34 uint64_t Source = State.getCurrentLBRSource(); 35 auto *ParentFrame = State.getParentFrame(); 36 if (ParentFrame == State.getDummyRootPtr() || 37 ParentFrame->Address != Source) { 38 State.switchToFrame(Source); 39 } else { 40 State.popFrame(); 41 } 42 State.InstPtr.update(Source); 43 } 44 45 void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) { 46 InstructionPointer &IP = State.InstPtr; 47 uint64_t Target = State.getCurrentLBRTarget(); 48 uint64_t End = IP.Address; 49 if (Binary->usePseudoProbes()) { 50 // We don't need to top frame probe since it should be extracted 51 // from the range. 52 // The outcome of the virtual unwinding with pseudo probes is a 53 // map from a context key to the address range being unwound. 54 // This means basically linear unwinding is not needed for pseudo 55 // probes. The range will be simply recorded here and will be 56 // converted to a list of pseudo probes to report in ProfileGenerator. 57 State.getParentFrame()->recordRangeCount(Target, End, Repeat); 58 } else { 59 // Unwind linear execution part 60 uint64_t LeafAddr = State.CurrentLeafFrame->Address; 61 while (IP.Address >= Target) { 62 uint64_t PrevIP = IP.Address; 63 IP.backward(); 64 // Break into segments for implicit call/return due to inlining 65 bool SameInlinee = Binary->inlineContextEqual(PrevIP, IP.Address); 66 if (!SameInlinee || PrevIP == Target) { 67 State.switchToFrame(LeafAddr); 68 State.CurrentLeafFrame->recordRangeCount(PrevIP, End, Repeat); 69 End = IP.Address; 70 } 71 LeafAddr = IP.Address; 72 } 73 } 74 } 75 76 void VirtualUnwinder::unwindReturn(UnwindState &State) { 77 // Add extra frame as we unwind through the return 78 const LBREntry &LBR = State.getCurrentLBR(); 79 uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(LBR.Target); 80 State.switchToFrame(CallAddr); 81 State.pushFrame(LBR.Source); 82 State.InstPtr.update(LBR.Source); 83 } 84 85 void VirtualUnwinder::unwindBranchWithinFrame(UnwindState &State) { 86 // TODO: Tolerate tail call for now, as we may see tail call from libraries. 87 // This is only for intra function branches, excluding tail calls. 88 uint64_t Source = State.getCurrentLBRSource(); 89 State.switchToFrame(Source); 90 State.InstPtr.update(Source); 91 } 92 93 std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() { 94 std::shared_ptr<StringBasedCtxKey> KeyStr = 95 std::make_shared<StringBasedCtxKey>(); 96 KeyStr->Context = 97 Binary->getExpandedContextStr(Stack, KeyStr->WasLeafInlined); 98 if (KeyStr->Context.empty()) 99 return nullptr; 100 KeyStr->genHashCode(); 101 return KeyStr; 102 } 103 104 std::shared_ptr<ProbeBasedCtxKey> ProbeStack::getContextKey() { 105 std::shared_ptr<ProbeBasedCtxKey> ProbeBasedKey = 106 std::make_shared<ProbeBasedCtxKey>(); 107 for (auto CallProbe : Stack) { 108 ProbeBasedKey->Probes.emplace_back(CallProbe); 109 } 110 CSProfileGenerator::compressRecursionContext<const PseudoProbe *>( 111 ProbeBasedKey->Probes); 112 ProbeBasedKey->genHashCode(); 113 return ProbeBasedKey; 114 } 115 116 template <typename T> 117 void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, 118 T &Stack) { 119 if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty()) 120 return; 121 122 std::shared_ptr<ContextKey> Key = Stack.getContextKey(); 123 if (Key == nullptr) 124 return; 125 auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter()); 126 SampleCounter &SCounter = Ret.first->second; 127 for (auto &Item : Cur->RangeSamples) { 128 uint64_t StartOffset = Binary->virtualAddrToOffset(std::get<0>(Item)); 129 uint64_t EndOffset = Binary->virtualAddrToOffset(std::get<1>(Item)); 130 SCounter.recordRangeCount(StartOffset, EndOffset, std::get<2>(Item)); 131 } 132 133 for (auto &Item : Cur->BranchSamples) { 134 uint64_t SourceOffset = Binary->virtualAddrToOffset(std::get<0>(Item)); 135 uint64_t TargetOffset = Binary->virtualAddrToOffset(std::get<1>(Item)); 136 SCounter.recordBranchCount(SourceOffset, TargetOffset, std::get<2>(Item)); 137 } 138 } 139 140 template <typename T> 141 void VirtualUnwinder::collectSamplesFromFrameTrie( 142 UnwindState::ProfiledFrame *Cur, T &Stack) { 143 if (!Cur->isDummyRoot()) { 144 if (!Stack.pushFrame(Cur)) { 145 // Process truncated context 146 // Start a new traversal ignoring its bottom context 147 T EmptyStack(Binary); 148 collectSamplesFromFrame(Cur, EmptyStack); 149 for (const auto &Item : Cur->Children) { 150 collectSamplesFromFrameTrie(Item.second.get(), EmptyStack); 151 } 152 return; 153 } 154 } 155 156 collectSamplesFromFrame(Cur, Stack); 157 // Process children frame 158 for (const auto &Item : Cur->Children) { 159 collectSamplesFromFrameTrie(Item.second.get(), Stack); 160 } 161 // Recover the call stack 162 Stack.popFrame(); 163 } 164 165 void VirtualUnwinder::collectSamplesFromFrameTrie( 166 UnwindState::ProfiledFrame *Cur) { 167 if (Binary->usePseudoProbes()) { 168 ProbeStack Stack(Binary); 169 collectSamplesFromFrameTrie<ProbeStack>(Cur, Stack); 170 } else { 171 FrameStack Stack(Binary); 172 collectSamplesFromFrameTrie<FrameStack>(Cur, Stack); 173 } 174 } 175 176 void VirtualUnwinder::recordBranchCount(const LBREntry &Branch, 177 UnwindState &State, uint64_t Repeat) { 178 if (Branch.IsArtificial) 179 return; 180 181 if (Binary->usePseudoProbes()) { 182 // Same as recordRangeCount, We don't need to top frame probe since we will 183 // extract it from branch's source address 184 State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target, 185 Repeat); 186 } else { 187 State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target, 188 Repeat); 189 } 190 } 191 192 bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) { 193 // Capture initial state as starting point for unwinding. 194 UnwindState State(Sample); 195 196 // Sanity check - making sure leaf of LBR aligns with leaf of stack sample 197 // Stack sample sometimes can be unreliable, so filter out bogus ones. 198 if (!State.validateInitialState()) 199 return false; 200 201 // Also do not attempt linear unwind for the leaf range as it's incomplete. 202 bool IsLeaf = true; 203 204 // Now process the LBR samples in parrallel with stack sample 205 // Note that we do not reverse the LBR entry order so we can 206 // unwind the sample stack as we walk through LBR entries. 207 while (State.hasNextLBR()) { 208 State.checkStateConsistency(); 209 210 // Unwind implicit calls/returns from inlining, along the linear path, 211 // break into smaller sub section each with its own calling context. 212 if (!IsLeaf) { 213 unwindLinear(State, Repeat); 214 } 215 IsLeaf = false; 216 217 // Save the LBR branch before it gets unwound. 218 const LBREntry &Branch = State.getCurrentLBR(); 219 220 if (isCallState(State)) { 221 // Unwind calls - we know we encountered call if LBR overlaps with 222 // transition between leaf the 2nd frame. Note that for calls that 223 // were not in the original stack sample, we should have added the 224 // extra frame when processing the return paired with this call. 225 unwindCall(State); 226 } else if (isReturnState(State)) { 227 // Unwind returns - check whether the IP is indeed at a return instruction 228 unwindReturn(State); 229 } else { 230 // Unwind branches - for regular intra function branches, we only 231 // need to record branch with context. 232 unwindBranchWithinFrame(State); 233 } 234 State.advanceLBR(); 235 // Record `branch` with calling context after unwinding. 236 recordBranchCount(Branch, State, Repeat); 237 } 238 // As samples are aggregated on trie, record them into counter map 239 collectSamplesFromFrameTrie(State.getDummyRootPtr()); 240 241 return true; 242 } 243 244 void PerfReader::validateCommandLine( 245 cl::list<std::string> &BinaryFilenames, 246 cl::list<std::string> &PerfTraceFilenames) { 247 // Allow the invalid perfscript if we only use to show binary disassembly 248 if (!ShowDisassemblyOnly) { 249 for (auto &File : PerfTraceFilenames) { 250 if (!llvm::sys::fs::exists(File)) { 251 std::string Msg = "Input perf script(" + File + ") doesn't exist!"; 252 exitWithError(Msg); 253 } 254 } 255 } 256 if (BinaryFilenames.size() > 1) { 257 // TODO: remove this if everything is ready to support multiple binaries. 258 exitWithError( 259 "Currently only support one input binary, multiple binaries' " 260 "profile will be merged in one profile and make profile " 261 "summary info inaccurate. Please use `llvm-perfdata` to merge " 262 "profiles from multiple binaries."); 263 } 264 for (auto &Binary : BinaryFilenames) { 265 if (!llvm::sys::fs::exists(Binary)) { 266 std::string Msg = "Input binary(" + Binary + ") doesn't exist!"; 267 exitWithError(Msg); 268 } 269 } 270 if (CSProfileGenerator::MaxCompressionSize < -1) { 271 exitWithError("Value of --compress-recursion should >= -1"); 272 } 273 if (ShowSourceLocations && !ShowDisassemblyOnly) { 274 exitWithError("--show-source-locations should work together with " 275 "--show-disassembly-only!"); 276 } 277 } 278 279 PerfReader::PerfReader(cl::list<std::string> &BinaryFilenames, 280 cl::list<std::string> &PerfTraceFilenames) { 281 validateCommandLine(BinaryFilenames, PerfTraceFilenames); 282 // Load the binaries. 283 for (auto Filename : BinaryFilenames) 284 loadBinary(Filename, /*AllowNameConflict*/ false); 285 } 286 287 ProfiledBinary &PerfReader::loadBinary(const StringRef BinaryPath, 288 bool AllowNameConflict) { 289 // The binary table is currently indexed by the binary name not the full 290 // binary path. This is because the user-given path may not match the one 291 // that was actually executed. 292 StringRef BinaryName = llvm::sys::path::filename(BinaryPath); 293 294 // Call to load the binary in the ctor of ProfiledBinary. 295 auto Ret = BinaryTable.insert({BinaryName, ProfiledBinary(BinaryPath)}); 296 297 if (!Ret.second && !AllowNameConflict) { 298 std::string ErrorMsg = "Binary name conflict: " + BinaryPath.str() + 299 " and " + Ret.first->second.getPath().str() + " \n"; 300 exitWithError(ErrorMsg); 301 } 302 303 return Ret.first->second; 304 } 305 306 void PerfReader::updateBinaryAddress(const MMapEvent &Event) { 307 // Load the binary. 308 StringRef BinaryPath = Event.BinaryPath; 309 StringRef BinaryName = llvm::sys::path::filename(BinaryPath); 310 311 auto I = BinaryTable.find(BinaryName); 312 // Drop the event which doesn't belong to user-provided binaries 313 // or if its image is loaded at the same address 314 if (I == BinaryTable.end() || Event.Address == I->second.getBaseAddress()) 315 return; 316 317 ProfiledBinary &Binary = I->second; 318 319 if (Event.Offset == Binary.getTextSegmentOffset()) { 320 // A binary image could be unloaded and then reloaded at different 321 // place, so update the address map here. 322 // Only update for the first executable segment and assume all other 323 // segments are loaded at consecutive memory addresses, which is the case on 324 // X64. 325 AddrToBinaryMap.erase(Binary.getBaseAddress()); 326 AddrToBinaryMap[Event.Address] = &Binary; 327 328 // Update binary load address. 329 Binary.setBaseAddress(Event.Address); 330 } else { 331 // Verify segments are loaded consecutively. 332 const auto &Offsets = Binary.getTextSegmentOffsets(); 333 auto It = std::lower_bound(Offsets.begin(), Offsets.end(), Event.Offset); 334 if (It != Offsets.end() && *It == Event.Offset) { 335 // The event is for loading a separate executable segment. 336 auto I = std::distance(Offsets.begin(), It); 337 const auto &PreferredAddrs = Binary.getPreferredTextSegmentAddresses(); 338 if (PreferredAddrs[I] - Binary.getPreferredBaseAddress() != 339 Event.Address - Binary.getBaseAddress()) 340 exitWithError("Executable segments not loaded consecutively"); 341 } else { 342 if (It == Offsets.begin()) 343 exitWithError("File offset not found"); 344 else { 345 // Find the segment the event falls in. A large segment could be loaded 346 // via multiple mmap calls with consecutive memory addresses. 347 --It; 348 assert(*It < Event.Offset); 349 if (Event.Offset - *It != Event.Address - Binary.getBaseAddress()) 350 exitWithError("Segment not loaded by consecutive mmaps"); 351 } 352 } 353 } 354 } 355 356 ProfiledBinary *PerfReader::getBinary(uint64_t Address) { 357 auto Iter = AddrToBinaryMap.lower_bound(Address); 358 if (Iter == AddrToBinaryMap.end() || Iter->first != Address) { 359 if (Iter == AddrToBinaryMap.begin()) 360 return nullptr; 361 Iter--; 362 } 363 return Iter->second; 364 } 365 366 // Use ordered map to make the output deterministic 367 using OrderedCounterForPrint = std::map<std::string, RangeSample>; 368 369 static void printSampleCounter(OrderedCounterForPrint &OrderedCounter) { 370 for (auto Range : OrderedCounter) { 371 outs() << Range.first << "\n"; 372 for (auto I : Range.second) { 373 outs() << " (" << format("%" PRIx64, I.first.first) << ", " 374 << format("%" PRIx64, I.first.second) << "): " << I.second << "\n"; 375 } 376 } 377 } 378 379 static std::string getContextKeyStr(ContextKey *K, 380 const ProfiledBinary *Binary) { 381 std::string ContextStr; 382 if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(K)) { 383 return CtxKey->Context; 384 } else if (const auto *CtxKey = dyn_cast<ProbeBasedCtxKey>(K)) { 385 SmallVector<std::string, 16> ContextStack; 386 for (const auto *Probe : CtxKey->Probes) { 387 Binary->getInlineContextForProbe(Probe, ContextStack, true); 388 } 389 for (const auto &Context : ContextStack) { 390 if (ContextStr.size()) 391 ContextStr += " @ "; 392 ContextStr += Context; 393 } 394 } 395 return ContextStr; 396 } 397 398 static void printRangeCounter(ContextSampleCounterMap &Counter, 399 const ProfiledBinary *Binary) { 400 OrderedCounterForPrint OrderedCounter; 401 for (auto &CI : Counter) { 402 OrderedCounter[getContextKeyStr(CI.first.getPtr(), Binary)] = 403 CI.second.RangeCounter; 404 } 405 printSampleCounter(OrderedCounter); 406 } 407 408 static void printBranchCounter(ContextSampleCounterMap &Counter, 409 const ProfiledBinary *Binary) { 410 OrderedCounterForPrint OrderedCounter; 411 for (auto &CI : Counter) { 412 OrderedCounter[getContextKeyStr(CI.first.getPtr(), Binary)] = 413 CI.second.BranchCounter; 414 } 415 printSampleCounter(OrderedCounter); 416 } 417 418 void PerfReader::printUnwinderOutput() { 419 for (auto I : BinarySampleCounters) { 420 const ProfiledBinary *Binary = I.first; 421 outs() << "Binary(" << Binary->getName().str() << ")'s Range Counter:\n"; 422 printRangeCounter(I.second, Binary); 423 outs() << "\nBinary(" << Binary->getName().str() << ")'s Branch Counter:\n"; 424 printBranchCounter(I.second, Binary); 425 } 426 } 427 428 void PerfReader::unwindSamples() { 429 for (const auto &Item : AggregatedSamples) { 430 const HybridSample *Sample = dyn_cast<HybridSample>(Item.first.getPtr()); 431 VirtualUnwinder Unwinder(&BinarySampleCounters[Sample->Binary], 432 Sample->Binary); 433 Unwinder.unwind(Sample, Item.second); 434 } 435 436 if (ShowUnwinderOutput) 437 printUnwinderOutput(); 438 } 439 440 bool PerfReader::extractLBRStack(TraceStream &TraceIt, 441 SmallVectorImpl<LBREntry> &LBRStack, 442 ProfiledBinary *Binary) { 443 // The raw format of LBR stack is like: 444 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... 445 // ... 0x4005c8/0x4005dc/P/-/-/0 446 // It's in FIFO order and seperated by whitespace. 447 SmallVector<StringRef, 32> Records; 448 TraceIt.getCurrentLine().split(Records, " "); 449 450 // Extract leading instruction pointer if present, use single 451 // list to pass out as reference. 452 size_t Index = 0; 453 if (!Records.empty() && Records[0].find('/') == StringRef::npos) { 454 Index = 1; 455 } 456 // Now extract LBR samples - note that we do not reverse the 457 // LBR entry order so we can unwind the sample stack as we walk 458 // through LBR entries. 459 uint64_t PrevTrDst = 0; 460 461 while (Index < Records.size()) { 462 auto &Token = Records[Index++]; 463 if (Token.size() == 0) 464 continue; 465 466 SmallVector<StringRef, 8> Addresses; 467 Token.split(Addresses, "/"); 468 uint64_t Src; 469 uint64_t Dst; 470 Addresses[0].substr(2).getAsInteger(16, Src); 471 Addresses[1].substr(2).getAsInteger(16, Dst); 472 473 bool SrcIsInternal = Binary->addressIsCode(Src); 474 bool DstIsInternal = Binary->addressIsCode(Dst); 475 bool IsExternal = !SrcIsInternal && !DstIsInternal; 476 bool IsIncoming = !SrcIsInternal && DstIsInternal; 477 bool IsOutgoing = SrcIsInternal && !DstIsInternal; 478 bool IsArtificial = false; 479 480 // Ignore branches outside the current binary. 481 if (IsExternal) 482 continue; 483 484 if (IsOutgoing) { 485 if (!PrevTrDst) { 486 // This is unpaired outgoing jump which is likely due to interrupt or 487 // incomplete LBR trace. Ignore current and subsequent entries since 488 // they are likely in different contexts. 489 break; 490 } 491 492 if (Binary->addressIsReturn(Src)) { 493 // In a callback case, a return from internal code, say A, to external 494 // runtime can happen. The external runtime can then call back to 495 // another internal routine, say B. Making an artificial branch that 496 // looks like a return from A to B can confuse the unwinder to treat 497 // the instruction before B as the call instruction. 498 break; 499 } 500 501 // For transition to external code, group the Source with the next 502 // availabe transition target. 503 Dst = PrevTrDst; 504 PrevTrDst = 0; 505 IsArtificial = true; 506 } else { 507 if (PrevTrDst) { 508 // If we have seen an incoming transition from external code to internal 509 // code, but not a following outgoing transition, the incoming 510 // transition is likely due to interrupt which is usually unpaired. 511 // Ignore current and subsequent entries since they are likely in 512 // different contexts. 513 break; 514 } 515 516 if (IsIncoming) { 517 // For transition from external code (such as dynamic libraries) to 518 // the current binary, keep track of the branch target which will be 519 // grouped with the Source of the last transition from the current 520 // binary. 521 PrevTrDst = Dst; 522 continue; 523 } 524 } 525 526 // TODO: filter out buggy duplicate branches on Skylake 527 528 LBRStack.emplace_back(LBREntry(Src, Dst, IsArtificial)); 529 } 530 TraceIt.advance(); 531 return !LBRStack.empty(); 532 } 533 534 bool PerfReader::extractCallstack(TraceStream &TraceIt, 535 SmallVectorImpl<uint64_t> &CallStack) { 536 // The raw format of call stack is like: 537 // 4005dc # leaf frame 538 // 400634 539 // 400684 # root frame 540 // It's in bottom-up order with each frame in one line. 541 542 // Extract stack frames from sample 543 ProfiledBinary *Binary = nullptr; 544 while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) { 545 StringRef FrameStr = TraceIt.getCurrentLine().ltrim(); 546 uint64_t FrameAddr = 0; 547 if (FrameStr.getAsInteger(16, FrameAddr)) { 548 // We might parse a non-perf sample line like empty line and comments, 549 // skip it 550 TraceIt.advance(); 551 return false; 552 } 553 TraceIt.advance(); 554 if (!Binary) { 555 Binary = getBinary(FrameAddr); 556 // we might have addr not match the MMAP, skip it 557 if (!Binary) { 558 if (AddrToBinaryMap.size() == 0) 559 WithColor::warning() << "No MMAP event in the perfscript, create it " 560 "with '--show-mmap-events'\n"; 561 break; 562 } 563 } 564 // Currently intermixed frame from different binaries is not supported. 565 // Ignore bottom frames not from binary of interest. 566 if (!Binary->addressIsCode(FrameAddr)) 567 break; 568 569 // We need to translate return address to call address 570 // for non-leaf frames 571 if (!CallStack.empty()) { 572 FrameAddr = Binary->getCallAddrFromFrameAddr(FrameAddr); 573 } 574 575 CallStack.emplace_back(FrameAddr); 576 } 577 578 // Skip other unrelated line, find the next valid LBR line 579 // Note that even for empty call stack, we should skip the address at the 580 // bottom, otherwise the following pass may generate a truncated callstack 581 while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) { 582 TraceIt.advance(); 583 } 584 // Filter out broken stack sample. We may not have complete frame info 585 // if sample end up in prolog/epilog, the result is dangling context not 586 // connected to entry point. This should be relatively rare thus not much 587 // impact on overall profile quality. However we do want to filter them 588 // out to reduce the number of different calling contexts. One instance 589 // of such case - when sample landed in prolog/epilog, somehow stack 590 // walking will be broken in an unexpected way that higher frames will be 591 // missing. 592 return !CallStack.empty() && 593 !Binary->addressInPrologEpilog(CallStack.front()); 594 } 595 596 void PerfReader::parseHybridSample(TraceStream &TraceIt) { 597 // The raw hybird sample started with call stack in FILO order and followed 598 // intermediately by LBR sample 599 // e.g. 600 // 4005dc # call stack leaf 601 // 400634 602 // 400684 # call stack root 603 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... 604 // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries 605 // 606 std::shared_ptr<HybridSample> Sample = std::make_shared<HybridSample>(); 607 608 // Parsing call stack and populate into HybridSample.CallStack 609 if (!extractCallstack(TraceIt, Sample->CallStack)) { 610 // Skip the next LBR line matched current call stack 611 if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) 612 TraceIt.advance(); 613 return; 614 } 615 // Set the binary current sample belongs to 616 Sample->Binary = getBinary(Sample->CallStack.front()); 617 618 if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) { 619 // Parsing LBR stack and populate into HybridSample.LBRStack 620 if (extractLBRStack(TraceIt, Sample->LBRStack, Sample->Binary)) { 621 // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR 622 // ranges 623 Sample->CallStack.front() = Sample->LBRStack[0].Target; 624 // Record samples by aggregation 625 Sample->genHashCode(); 626 AggregatedSamples[Hashable<PerfSample>(Sample)]++; 627 } 628 } else { 629 // LBR sample is encoded in single line after stack sample 630 exitWithError("'Hybrid perf sample is corrupted, No LBR sample line"); 631 } 632 } 633 634 void PerfReader::parseMMap2Event(TraceStream &TraceIt) { 635 // Parse a line like: 636 // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0 637 // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so 638 constexpr static const char *const Pattern = 639 "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: " 640 "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " 641 "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)"; 642 // Field 0 - whole line 643 // Field 1 - PID 644 // Field 2 - base address 645 // Field 3 - mmapped size 646 // Field 4 - page offset 647 // Field 5 - binary path 648 enum EventIndex { 649 WHOLE_LINE = 0, 650 PID = 1, 651 MMAPPED_ADDRESS = 2, 652 MMAPPED_SIZE = 3, 653 PAGE_OFFSET = 4, 654 BINARY_PATH = 5 655 }; 656 657 Regex RegMmap2(Pattern); 658 SmallVector<StringRef, 6> Fields; 659 bool R = RegMmap2.match(TraceIt.getCurrentLine(), &Fields); 660 if (!R) { 661 std::string ErrorMsg = "Cannot parse mmap event: Line" + 662 Twine(TraceIt.getLineNumber()).str() + ": " + 663 TraceIt.getCurrentLine().str() + " \n"; 664 exitWithError(ErrorMsg); 665 } 666 MMapEvent Event; 667 Fields[PID].getAsInteger(10, Event.PID); 668 Fields[MMAPPED_ADDRESS].getAsInteger(0, Event.Address); 669 Fields[MMAPPED_SIZE].getAsInteger(0, Event.Size); 670 Fields[PAGE_OFFSET].getAsInteger(0, Event.Offset); 671 Event.BinaryPath = Fields[BINARY_PATH]; 672 updateBinaryAddress(Event); 673 if (ShowMmapEvents) { 674 outs() << "Mmap: Binary " << Event.BinaryPath << " loaded at " 675 << format("0x%" PRIx64 ":", Event.Address) << " \n"; 676 } 677 TraceIt.advance(); 678 } 679 680 void PerfReader::parseEventOrSample(TraceStream &TraceIt) { 681 if (TraceIt.getCurrentLine().startswith("PERF_RECORD_MMAP2")) 682 parseMMap2Event(TraceIt); 683 else if (getPerfScriptType() == PERF_LBR_STACK) 684 parseHybridSample(TraceIt); 685 else { 686 // TODO: parse other type sample 687 TraceIt.advance(); 688 } 689 } 690 691 void PerfReader::parseAndAggregateTrace(StringRef Filename) { 692 // Trace line iterator 693 TraceStream TraceIt(Filename); 694 while (!TraceIt.isAtEoF()) 695 parseEventOrSample(TraceIt); 696 } 697 698 void PerfReader::checkAndSetPerfType( 699 cl::list<std::string> &PerfTraceFilenames) { 700 for (auto FileName : PerfTraceFilenames) { 701 PerfScriptType Type = checkPerfScriptType(FileName); 702 if (Type == PERF_INVALID) 703 exitWithError("Invalid perf script input!"); 704 if (PerfType != PERF_UNKNOWN && PerfType != Type) 705 exitWithError("Inconsistent sample among different perf scripts"); 706 PerfType = Type; 707 } 708 } 709 710 void PerfReader::generateRawProfile() { 711 if (getPerfScriptType() == PERF_LBR_STACK) { 712 // Unwind samples if it's hybird sample 713 unwindSamples(); 714 } else if (getPerfScriptType() == PERF_LBR) { 715 // TODO: range overlap computation for regular AutoFDO 716 } 717 } 718 719 void PerfReader::parsePerfTraces(cl::list<std::string> &PerfTraceFilenames) { 720 // Check and set current perfscript type 721 checkAndSetPerfType(PerfTraceFilenames); 722 // Parse perf traces and do aggregation. 723 for (auto Filename : PerfTraceFilenames) 724 parseAndAggregateTrace(Filename); 725 726 generateRawProfile(); 727 } 728 729 } // end namespace sampleprof 730 } // end namespace llvm 731