1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading MemProf profiling data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <algorithm> 14 #include <cstdint> 15 #include <memory> 16 #include <type_traits> 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/DenseMapInfo.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/ADT/SmallSet.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/StringExtras.h" 25 #include "llvm/ADT/Twine.h" 26 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 27 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 28 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 29 #include "llvm/Object/Binary.h" 30 #include "llvm/Object/BuildID.h" 31 #include "llvm/Object/ELFObjectFile.h" 32 #include "llvm/Object/ObjectFile.h" 33 #include "llvm/ProfileData/InstrProf.h" 34 #include "llvm/ProfileData/MemProf.h" 35 #include "llvm/ProfileData/MemProfData.inc" 36 #include "llvm/ProfileData/RawMemProfReader.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/Endian.h" 39 #include "llvm/Support/Error.h" 40 #include "llvm/Support/MemoryBuffer.h" 41 #include "llvm/Support/Path.h" 42 43 #define DEBUG_TYPE "memprof" 44 45 namespace llvm { 46 namespace memprof { 47 namespace { 48 template <class T = uint64_t> inline T alignedRead(const char *Ptr) { 49 static_assert(std::is_pod<T>::value, "Not a pod type."); 50 assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read"); 51 return *reinterpret_cast<const T *>(Ptr); 52 } 53 54 Error checkBuffer(const MemoryBuffer &Buffer) { 55 if (!RawMemProfReader::hasFormat(Buffer)) 56 return make_error<InstrProfError>(instrprof_error::bad_magic); 57 58 if (Buffer.getBufferSize() == 0) 59 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 60 61 if (Buffer.getBufferSize() < sizeof(Header)) { 62 return make_error<InstrProfError>(instrprof_error::truncated); 63 } 64 65 // The size of the buffer can be > header total size since we allow repeated 66 // serialization of memprof profiles to the same file. 67 uint64_t TotalSize = 0; 68 const char *Next = Buffer.getBufferStart(); 69 while (Next < Buffer.getBufferEnd()) { 70 auto *H = reinterpret_cast<const Header *>(Next); 71 if (H->Version != MEMPROF_RAW_VERSION) { 72 return make_error<InstrProfError>(instrprof_error::unsupported_version); 73 } 74 75 TotalSize += H->TotalSize; 76 Next += H->TotalSize; 77 } 78 79 if (Buffer.getBufferSize() != TotalSize) { 80 return make_error<InstrProfError>(instrprof_error::malformed); 81 } 82 return Error::success(); 83 } 84 85 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) { 86 using namespace support; 87 88 const uint64_t NumItemsToRead = 89 endian::readNext<uint64_t, little, unaligned>(Ptr); 90 llvm::SmallVector<SegmentEntry> Items; 91 for (uint64_t I = 0; I < NumItemsToRead; I++) { 92 Items.push_back(*reinterpret_cast<const SegmentEntry *>( 93 Ptr + I * sizeof(SegmentEntry))); 94 } 95 return Items; 96 } 97 98 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 99 readMemInfoBlocks(const char *Ptr) { 100 using namespace support; 101 102 const uint64_t NumItemsToRead = 103 endian::readNext<uint64_t, little, unaligned>(Ptr); 104 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; 105 for (uint64_t I = 0; I < NumItemsToRead; I++) { 106 const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr); 107 const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); 108 Items.push_back({Id, MIB}); 109 // Only increment by size of MIB since readNext implicitly increments. 110 Ptr += sizeof(MemInfoBlock); 111 } 112 return Items; 113 } 114 115 CallStackMap readStackInfo(const char *Ptr) { 116 using namespace support; 117 118 const uint64_t NumItemsToRead = 119 endian::readNext<uint64_t, little, unaligned>(Ptr); 120 CallStackMap Items; 121 122 for (uint64_t I = 0; I < NumItemsToRead; I++) { 123 const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr); 124 const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr); 125 126 SmallVector<uint64_t> CallStack; 127 for (uint64_t J = 0; J < NumPCs; J++) { 128 CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr)); 129 } 130 131 Items[StackId] = CallStack; 132 } 133 return Items; 134 } 135 136 // Merges the contents of stack information in \p From to \p To. Returns true if 137 // any stack ids observed previously map to a different set of program counter 138 // addresses. 139 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { 140 for (const auto &IdStack : From) { 141 auto I = To.find(IdStack.first); 142 if (I == To.end()) { 143 To[IdStack.first] = IdStack.second; 144 } else { 145 // Check that the PCs are the same (in order). 146 if (IdStack.second != I->second) 147 return true; 148 } 149 } 150 return false; 151 } 152 153 Error report(Error E, const StringRef Context) { 154 return joinErrors(createStringError(inconvertibleErrorCode(), Context), 155 std::move(E)); 156 } 157 158 bool isRuntimePath(const StringRef Path) { 159 const StringRef Filename = llvm::sys::path::filename(Path); 160 // This list should be updated in case new files with additional interceptors 161 // are added to the memprof runtime. 162 return Filename.equals("memprof_malloc_linux.cpp") || 163 Filename.equals("memprof_interceptors.cpp") || 164 Filename.equals("memprof_new_delete.cpp"); 165 } 166 167 std::string getBuildIdString(const SegmentEntry &Entry) { 168 // If the build id is unset print a helpful string instead of all zeros. 169 if (Entry.BuildIdSize == 0) 170 return "<None>"; 171 172 std::string Str; 173 raw_string_ostream OS(Str); 174 for (size_t I = 0; I < Entry.BuildIdSize; I++) { 175 OS << format_hex_no_prefix(Entry.BuildId[I], 2); 176 } 177 return OS.str(); 178 } 179 } // namespace 180 181 Expected<std::unique_ptr<RawMemProfReader>> 182 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, 183 bool KeepName) { 184 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 185 if (std::error_code EC = BufferOr.getError()) 186 return report(errorCodeToError(EC), Path.getSingleStringRef()); 187 188 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 189 return create(std::move(Buffer), ProfiledBinary, KeepName); 190 } 191 192 Expected<std::unique_ptr<RawMemProfReader>> 193 RawMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 194 const StringRef ProfiledBinary, bool KeepName) { 195 if (Error E = checkBuffer(*Buffer)) 196 return report(std::move(E), Buffer->getBufferIdentifier()); 197 198 if (ProfiledBinary.empty()) { 199 // Peek the build ids to print a helpful error message. 200 const std::vector<std::string> BuildIds = peekBuildIds(Buffer.get()); 201 std::string ErrorMessage( 202 R"(Path to profiled binary is empty, expected binary with one of the following build ids: 203 )"); 204 for (const auto &Id : BuildIds) { 205 ErrorMessage += "\n BuildId: "; 206 ErrorMessage += Id; 207 } 208 return report( 209 make_error<StringError>(ErrorMessage, inconvertibleErrorCode()), 210 /*Context=*/""); 211 } 212 213 auto BinaryOr = llvm::object::createBinary(ProfiledBinary); 214 if (!BinaryOr) { 215 return report(BinaryOr.takeError(), ProfiledBinary); 216 } 217 218 // Use new here since constructor is private. 219 std::unique_ptr<RawMemProfReader> Reader( 220 new RawMemProfReader(std::move(BinaryOr.get()), KeepName)); 221 if (Error E = Reader->initialize(std::move(Buffer))) { 222 return std::move(E); 223 } 224 return std::move(Reader); 225 } 226 227 bool RawMemProfReader::hasFormat(const StringRef Path) { 228 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 229 if (!BufferOr) 230 return false; 231 232 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 233 return hasFormat(*Buffer); 234 } 235 236 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { 237 if (Buffer.getBufferSize() < sizeof(uint64_t)) 238 return false; 239 // Aligned read to sanity check that the buffer was allocated with at least 8b 240 // alignment. 241 const uint64_t Magic = alignedRead(Buffer.getBufferStart()); 242 return Magic == MEMPROF_RAW_MAGIC_64; 243 } 244 245 void RawMemProfReader::printYAML(raw_ostream &OS) { 246 uint64_t NumAllocFunctions = 0, NumMibInfo = 0; 247 for (const auto &KV : FunctionProfileData) { 248 const size_t NumAllocSites = KV.second.AllocSites.size(); 249 if (NumAllocSites > 0) { 250 NumAllocFunctions++; 251 NumMibInfo += NumAllocSites; 252 } 253 } 254 255 OS << "MemprofProfile:\n"; 256 OS << " Summary:\n"; 257 OS << " Version: " << MEMPROF_RAW_VERSION << "\n"; 258 OS << " NumSegments: " << SegmentInfo.size() << "\n"; 259 OS << " NumMibInfo: " << NumMibInfo << "\n"; 260 OS << " NumAllocFunctions: " << NumAllocFunctions << "\n"; 261 OS << " NumStackOffsets: " << StackMap.size() << "\n"; 262 // Print out the segment information. 263 OS << " Segments:\n"; 264 for (const auto &Entry : SegmentInfo) { 265 OS << " -\n"; 266 OS << " BuildId: " << getBuildIdString(Entry) << "\n"; 267 OS << " Start: 0x" << llvm::utohexstr(Entry.Start) << "\n"; 268 OS << " End: 0x" << llvm::utohexstr(Entry.End) << "\n"; 269 OS << " Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n"; 270 } 271 // Print out the merged contents of the profiles. 272 OS << " Records:\n"; 273 for (const auto &Entry : *this) { 274 OS << " -\n"; 275 OS << " FunctionGUID: " << Entry.first << "\n"; 276 Entry.second.print(OS); 277 } 278 } 279 280 Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) { 281 const StringRef FileName = Binary.getBinary()->getFileName(); 282 283 auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary()); 284 if (!ElfObject) { 285 return report(make_error<StringError>(Twine("Not an ELF file: "), 286 inconvertibleErrorCode()), 287 FileName); 288 } 289 290 // Check whether the profiled binary was built with position independent code 291 // (PIC). Perform sanity checks for assumptions we rely on to simplify 292 // symbolization. 293 auto* Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(ElfObject); 294 const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile(); 295 auto PHdrsOr = ElfFile.program_headers(); 296 if (!PHdrsOr) 297 return report( 298 make_error<StringError>(Twine("Could not read program headers: "), 299 inconvertibleErrorCode()), 300 FileName); 301 302 int NumExecutableSegments = 0; 303 for (const auto &Phdr : *PHdrsOr) { 304 if (Phdr.p_type == ELF::PT_LOAD) { 305 if (Phdr.p_flags & ELF::PF_X) { 306 // We assume only one text segment in the main binary for simplicity and 307 // reduce the overhead of checking multiple ranges during symbolization. 308 if (++NumExecutableSegments > 1) { 309 return report( 310 make_error<StringError>( 311 "Expect only one executable load segment in the binary", 312 inconvertibleErrorCode()), 313 FileName); 314 } 315 // Segment will always be loaded at a page boundary, expect it to be 316 // aligned already. Assume 4K pagesize for the machine from which the 317 // profile has been collected. This should be fine for now, in case we 318 // want to support other pagesizes it can be recorded in the raw profile 319 // during collection. 320 PreferredTextSegmentAddress = Phdr.p_vaddr; 321 assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) && 322 "Expect p_vaddr to always be page aligned"); 323 assert(Phdr.p_offset == 0 && "Expect p_offset = 0 for symbolization."); 324 } 325 } 326 } 327 328 auto Triple = ElfObject->makeTriple(); 329 if (!Triple.isX86()) 330 return report(make_error<StringError>(Twine("Unsupported target: ") + 331 Triple.getArchName(), 332 inconvertibleErrorCode()), 333 FileName); 334 335 auto *Object = cast<object::ObjectFile>(Binary.getBinary()); 336 std::unique_ptr<DIContext> Context = DWARFContext::create( 337 *Object, DWARFContext::ProcessDebugRelocations::Process); 338 339 auto SOFOr = symbolize::SymbolizableObjectFile::create( 340 Object, std::move(Context), /*UntagAddresses=*/false); 341 if (!SOFOr) 342 return report(SOFOr.takeError(), FileName); 343 Symbolizer = std::move(SOFOr.get()); 344 345 // Process the raw profile. 346 if (Error E = readRawProfile(std::move(DataBuffer))) 347 return E; 348 349 if (Error E = setupForSymbolization()) 350 return E; 351 352 if (Error E = symbolizeAndFilterStackFrames()) 353 return E; 354 355 return mapRawProfileToRecords(); 356 } 357 358 Error RawMemProfReader::setupForSymbolization() { 359 auto *Object = cast<object::ObjectFile>(Binary.getBinary()); 360 object::BuildIDRef BinaryId = object::getBuildID(Object); 361 if (BinaryId.empty()) 362 return make_error<StringError>(Twine("No build id found in binary ") + 363 Binary.getBinary()->getFileName(), 364 inconvertibleErrorCode()); 365 366 int NumMatched = 0; 367 for (const auto &Entry : SegmentInfo) { 368 llvm::ArrayRef<uint8_t> SegmentId(Entry.BuildId, Entry.BuildIdSize); 369 if (BinaryId == SegmentId) { 370 // We assume only one text segment in the main binary for simplicity and 371 // reduce the overhead of checking multiple ranges during symbolization. 372 if (++NumMatched > 1) { 373 return make_error<StringError>( 374 "We expect only one executable segment in the profiled binary", 375 inconvertibleErrorCode()); 376 } 377 ProfiledTextSegmentStart = Entry.Start; 378 ProfiledTextSegmentEnd = Entry.End; 379 } 380 } 381 assert(NumMatched != 0 && "No matching executable segments in segment info."); 382 assert((PreferredTextSegmentAddress == 0 || 383 (PreferredTextSegmentAddress == ProfiledTextSegmentStart)) && 384 "Expect text segment address to be 0 or equal to profiled text " 385 "segment start."); 386 return Error::success(); 387 } 388 389 Error RawMemProfReader::mapRawProfileToRecords() { 390 // Hold a mapping from function to each callsite location we encounter within 391 // it that is part of some dynamic allocation context. The location is stored 392 // as a pointer to a symbolized list of inline frames. 393 using LocationPtr = const llvm::SmallVector<FrameId> *; 394 llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>> 395 PerFunctionCallSites; 396 397 // Convert the raw profile callstack data into memprof records. While doing so 398 // keep track of related contexts so that we can fill these in later. 399 for (const auto &Entry : CallstackProfileData) { 400 const uint64_t StackId = Entry.first; 401 402 auto It = StackMap.find(StackId); 403 if (It == StackMap.end()) 404 return make_error<InstrProfError>( 405 instrprof_error::malformed, 406 "memprof callstack record does not contain id: " + Twine(StackId)); 407 408 // Construct the symbolized callstack. 409 llvm::SmallVector<FrameId> Callstack; 410 Callstack.reserve(It->getSecond().size()); 411 412 llvm::ArrayRef<uint64_t> Addresses = It->getSecond(); 413 for (size_t I = 0; I < Addresses.size(); I++) { 414 const uint64_t Address = Addresses[I]; 415 assert(SymbolizedFrame.count(Address) > 0 && 416 "Address not found in SymbolizedFrame map"); 417 const SmallVector<FrameId> &Frames = SymbolizedFrame[Address]; 418 419 assert(!idToFrame(Frames.back()).IsInlineFrame && 420 "The last frame should not be inlined"); 421 422 // Record the callsites for each function. Skip the first frame of the 423 // first address since it is the allocation site itself that is recorded 424 // as an alloc site. 425 for (size_t J = 0; J < Frames.size(); J++) { 426 if (I == 0 && J == 0) 427 continue; 428 // We attach the entire bottom-up frame here for the callsite even 429 // though we only need the frames up to and including the frame for 430 // Frames[J].Function. This will enable better deduplication for 431 // compression in the future. 432 const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function; 433 PerFunctionCallSites[Guid].insert(&Frames); 434 } 435 436 // Add all the frames to the current allocation callstack. 437 Callstack.append(Frames.begin(), Frames.end()); 438 } 439 440 // We attach the memprof record to each function bottom-up including the 441 // first non-inline frame. 442 for (size_t I = 0; /*Break out using the condition below*/; I++) { 443 const Frame &F = idToFrame(Callstack[I]); 444 auto Result = 445 FunctionProfileData.insert({F.Function, IndexedMemProfRecord()}); 446 IndexedMemProfRecord &Record = Result.first->second; 447 Record.AllocSites.emplace_back(Callstack, Entry.second); 448 449 if (!F.IsInlineFrame) 450 break; 451 } 452 } 453 454 // Fill in the related callsites per function. 455 for (const auto &[Id, Locs] : PerFunctionCallSites) { 456 // Some functions may have only callsite data and no allocation data. Here 457 // we insert a new entry for callsite data if we need to. 458 auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()}); 459 IndexedMemProfRecord &Record = Result.first->second; 460 for (LocationPtr Loc : Locs) { 461 Record.CallSites.push_back(*Loc); 462 } 463 } 464 465 return Error::success(); 466 } 467 468 Error RawMemProfReader::symbolizeAndFilterStackFrames() { 469 // The specifier to use when symbolization is requested. 470 const DILineInfoSpecifier Specifier( 471 DILineInfoSpecifier::FileLineInfoKind::RawValue, 472 DILineInfoSpecifier::FunctionNameKind::LinkageName); 473 474 // For entries where all PCs in the callstack are discarded, we erase the 475 // entry from the stack map. 476 llvm::SmallVector<uint64_t> EntriesToErase; 477 // We keep track of all prior discarded entries so that we can avoid invoking 478 // the symbolizer for such entries. 479 llvm::DenseSet<uint64_t> AllVAddrsToDiscard; 480 for (auto &Entry : StackMap) { 481 for (const uint64_t VAddr : Entry.getSecond()) { 482 // Check if we have already symbolized and cached the result or if we 483 // don't want to attempt symbolization since we know this address is bad. 484 // In this case the address is also removed from the current callstack. 485 if (SymbolizedFrame.count(VAddr) > 0 || 486 AllVAddrsToDiscard.contains(VAddr)) 487 continue; 488 489 Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode( 490 getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false); 491 if (!DIOr) 492 return DIOr.takeError(); 493 DIInliningInfo DI = DIOr.get(); 494 495 // Drop frames which we can't symbolize or if they belong to the runtime. 496 if (DI.getFrame(0).FunctionName == DILineInfo::BadString || 497 isRuntimePath(DI.getFrame(0).FileName)) { 498 AllVAddrsToDiscard.insert(VAddr); 499 continue; 500 } 501 502 for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; 503 I++) { 504 const auto &DIFrame = DI.getFrame(I); 505 const uint64_t Guid = 506 IndexedMemProfRecord::getGUID(DIFrame.FunctionName); 507 const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, 508 // Only the last entry is not an inlined location. 509 I != NumFrames - 1); 510 // Here we retain a mapping from the GUID to symbol name instead of 511 // adding it to the frame object directly to reduce memory overhead. 512 // This is because there can be many unique frames, particularly for 513 // callsite frames. 514 if (KeepSymbolName) 515 GuidToSymbolName.insert({Guid, DIFrame.FunctionName}); 516 517 const FrameId Hash = F.hash(); 518 IdToFrame.insert({Hash, F}); 519 SymbolizedFrame[VAddr].push_back(Hash); 520 } 521 } 522 523 auto &CallStack = Entry.getSecond(); 524 llvm::erase_if(CallStack, [&AllVAddrsToDiscard](const uint64_t A) { 525 return AllVAddrsToDiscard.contains(A); 526 }); 527 if (CallStack.empty()) 528 EntriesToErase.push_back(Entry.getFirst()); 529 } 530 531 // Drop the entries where the callstack is empty. 532 for (const uint64_t Id : EntriesToErase) { 533 StackMap.erase(Id); 534 CallstackProfileData.erase(Id); 535 } 536 537 if (StackMap.empty()) 538 return make_error<InstrProfError>( 539 instrprof_error::malformed, 540 "no entries in callstack map after symbolization"); 541 542 return Error::success(); 543 } 544 545 std::vector<std::string> 546 RawMemProfReader::peekBuildIds(MemoryBuffer *DataBuffer) { 547 const char *Next = DataBuffer->getBufferStart(); 548 // Use a set + vector since a profile file may contain multiple raw profile 549 // dumps, each with segment information. We want them unique and in order they 550 // were stored in the profile; the profiled binary should be the first entry. 551 // The runtime uses dl_iterate_phdr and the "... first object visited by 552 // callback is the main program." 553 // https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html 554 std::vector<std::string> BuildIds; 555 llvm::SmallSet<std::string, 10> BuildIdsSet; 556 while (Next < DataBuffer->getBufferEnd()) { 557 auto *Header = reinterpret_cast<const memprof::Header *>(Next); 558 559 const llvm::SmallVector<SegmentEntry> Entries = 560 readSegmentEntries(Next + Header->SegmentOffset); 561 562 for (const auto &Entry : Entries) { 563 const std::string Id = getBuildIdString(Entry); 564 if (BuildIdsSet.contains(Id)) 565 continue; 566 BuildIds.push_back(Id); 567 BuildIdsSet.insert(Id); 568 } 569 570 Next += Header->TotalSize; 571 } 572 return BuildIds; 573 } 574 575 Error RawMemProfReader::readRawProfile( 576 std::unique_ptr<MemoryBuffer> DataBuffer) { 577 const char *Next = DataBuffer->getBufferStart(); 578 579 while (Next < DataBuffer->getBufferEnd()) { 580 auto *Header = reinterpret_cast<const memprof::Header *>(Next); 581 582 // Read in the segment information, check whether its the same across all 583 // profiles in this binary file. 584 const llvm::SmallVector<SegmentEntry> Entries = 585 readSegmentEntries(Next + Header->SegmentOffset); 586 if (!SegmentInfo.empty() && SegmentInfo != Entries) { 587 // We do not expect segment information to change when deserializing from 588 // the same binary profile file. This can happen if dynamic libraries are 589 // loaded/unloaded between profile dumping. 590 return make_error<InstrProfError>( 591 instrprof_error::malformed, 592 "memprof raw profile has different segment information"); 593 } 594 SegmentInfo.assign(Entries.begin(), Entries.end()); 595 596 // Read in the MemInfoBlocks. Merge them based on stack id - we assume that 597 // raw profiles in the same binary file are from the same process so the 598 // stackdepot ids are the same. 599 for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) { 600 if (CallstackProfileData.count(Value.first)) { 601 CallstackProfileData[Value.first].Merge(Value.second); 602 } else { 603 CallstackProfileData[Value.first] = Value.second; 604 } 605 } 606 607 // Read in the callstack for each ids. For multiple raw profiles in the same 608 // file, we expect that the callstack is the same for a unique id. 609 const CallStackMap CSM = readStackInfo(Next + Header->StackOffset); 610 if (StackMap.empty()) { 611 StackMap = CSM; 612 } else { 613 if (mergeStackMap(CSM, StackMap)) 614 return make_error<InstrProfError>( 615 instrprof_error::malformed, 616 "memprof raw profile got different call stack for same id"); 617 } 618 619 Next += Header->TotalSize; 620 } 621 622 return Error::success(); 623 } 624 625 object::SectionedAddress 626 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { 627 if (VirtualAddress > ProfiledTextSegmentStart && 628 VirtualAddress <= ProfiledTextSegmentEnd) { 629 // For PIE binaries, the preferred address is zero and we adjust the virtual 630 // address by start of the profiled segment assuming that the offset of the 631 // segment in the binary is zero. For non-PIE binaries the preferred and 632 // profiled segment addresses should be equal and this is a no-op. 633 const uint64_t AdjustedAddress = 634 VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart; 635 return object::SectionedAddress{AdjustedAddress}; 636 } 637 // Addresses which do not originate from the profiled text segment in the 638 // binary are not adjusted. These will fail symbolization and be filtered out 639 // during processing. 640 return object::SectionedAddress{VirtualAddress}; 641 } 642 643 Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) { 644 if (FunctionProfileData.empty()) 645 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 646 647 if (Iter == FunctionProfileData.end()) 648 return make_error<InstrProfError>(instrprof_error::eof); 649 650 auto IdToFrameCallback = [this](const FrameId Id) { 651 Frame F = this->idToFrame(Id); 652 if (!this->KeepSymbolName) 653 return F; 654 auto Iter = this->GuidToSymbolName.find(F.Function); 655 assert(Iter != this->GuidToSymbolName.end()); 656 F.SymbolName = Iter->getSecond(); 657 return F; 658 }; 659 660 const IndexedMemProfRecord &IndexedRecord = Iter->second; 661 GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)}; 662 Iter++; 663 return Error::success(); 664 } 665 } // namespace memprof 666 } // namespace llvm 667