1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading MemProf profiling data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <algorithm>
14 #include <cstdint>
15 #include <memory>
16 #include <type_traits>
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/DenseMapInfo.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
27 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
28 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
29 #include "llvm/Object/Binary.h"
30 #include "llvm/Object/BuildID.h"
31 #include "llvm/Object/ELFObjectFile.h"
32 #include "llvm/Object/ObjectFile.h"
33 #include "llvm/ProfileData/InstrProf.h"
34 #include "llvm/ProfileData/MemProf.h"
35 #include "llvm/ProfileData/MemProfData.inc"
36 #include "llvm/ProfileData/RawMemProfReader.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/Endian.h"
39 #include "llvm/Support/Error.h"
40 #include "llvm/Support/MemoryBuffer.h"
41 #include "llvm/Support/Path.h"
42 
43 #define DEBUG_TYPE "memprof"
44 
45 namespace llvm {
46 namespace memprof {
47 namespace {
48 template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
49   static_assert(std::is_pod<T>::value, "Not a pod type.");
50   assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
51   return *reinterpret_cast<const T *>(Ptr);
52 }
53 
54 Error checkBuffer(const MemoryBuffer &Buffer) {
55   if (!RawMemProfReader::hasFormat(Buffer))
56     return make_error<InstrProfError>(instrprof_error::bad_magic);
57 
58   if (Buffer.getBufferSize() == 0)
59     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
60 
61   if (Buffer.getBufferSize() < sizeof(Header)) {
62     return make_error<InstrProfError>(instrprof_error::truncated);
63   }
64 
65   // The size of the buffer can be > header total size since we allow repeated
66   // serialization of memprof profiles to the same file.
67   uint64_t TotalSize = 0;
68   const char *Next = Buffer.getBufferStart();
69   while (Next < Buffer.getBufferEnd()) {
70     auto *H = reinterpret_cast<const Header *>(Next);
71     if (H->Version != MEMPROF_RAW_VERSION) {
72       return make_error<InstrProfError>(instrprof_error::unsupported_version);
73     }
74 
75     TotalSize += H->TotalSize;
76     Next += H->TotalSize;
77   }
78 
79   if (Buffer.getBufferSize() != TotalSize) {
80     return make_error<InstrProfError>(instrprof_error::malformed);
81   }
82   return Error::success();
83 }
84 
85 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
86   using namespace support;
87 
88   const uint64_t NumItemsToRead =
89       endian::readNext<uint64_t, little, unaligned>(Ptr);
90   llvm::SmallVector<SegmentEntry> Items;
91   for (uint64_t I = 0; I < NumItemsToRead; I++) {
92     Items.push_back(*reinterpret_cast<const SegmentEntry *>(
93         Ptr + I * sizeof(SegmentEntry)));
94   }
95   return Items;
96 }
97 
98 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
99 readMemInfoBlocks(const char *Ptr) {
100   using namespace support;
101 
102   const uint64_t NumItemsToRead =
103       endian::readNext<uint64_t, little, unaligned>(Ptr);
104   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
105   for (uint64_t I = 0; I < NumItemsToRead; I++) {
106     const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr);
107     const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
108     Items.push_back({Id, MIB});
109     // Only increment by size of MIB since readNext implicitly increments.
110     Ptr += sizeof(MemInfoBlock);
111   }
112   return Items;
113 }
114 
115 CallStackMap readStackInfo(const char *Ptr) {
116   using namespace support;
117 
118   const uint64_t NumItemsToRead =
119       endian::readNext<uint64_t, little, unaligned>(Ptr);
120   CallStackMap Items;
121 
122   for (uint64_t I = 0; I < NumItemsToRead; I++) {
123     const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr);
124     const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr);
125 
126     SmallVector<uint64_t> CallStack;
127     for (uint64_t J = 0; J < NumPCs; J++) {
128       CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr));
129     }
130 
131     Items[StackId] = CallStack;
132   }
133   return Items;
134 }
135 
136 // Merges the contents of stack information in \p From to \p To. Returns true if
137 // any stack ids observed previously map to a different set of program counter
138 // addresses.
139 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
140   for (const auto &IdStack : From) {
141     auto I = To.find(IdStack.first);
142     if (I == To.end()) {
143       To[IdStack.first] = IdStack.second;
144     } else {
145       // Check that the PCs are the same (in order).
146       if (IdStack.second != I->second)
147         return true;
148     }
149   }
150   return false;
151 }
152 
153 Error report(Error E, const StringRef Context) {
154   return joinErrors(createStringError(inconvertibleErrorCode(), Context),
155                     std::move(E));
156 }
157 
158 bool isRuntimePath(const StringRef Path) {
159   const StringRef Filename = llvm::sys::path::filename(Path);
160   // This list should be updated in case new files with additional interceptors
161   // are added to the memprof runtime.
162   return Filename.equals("memprof_malloc_linux.cpp") ||
163          Filename.equals("memprof_interceptors.cpp") ||
164          Filename.equals("memprof_new_delete.cpp");
165 }
166 
167 std::string getBuildIdString(const SegmentEntry &Entry) {
168   // If the build id is unset print a helpful string instead of all zeros.
169   if (Entry.BuildIdSize == 0)
170     return "<None>";
171 
172   std::string Str;
173   raw_string_ostream OS(Str);
174   for (size_t I = 0; I < Entry.BuildIdSize; I++) {
175     OS << format_hex_no_prefix(Entry.BuildId[I], 2);
176   }
177   return OS.str();
178 }
179 } // namespace
180 
181 Expected<std::unique_ptr<RawMemProfReader>>
182 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
183                          bool KeepName) {
184   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
185   if (std::error_code EC = BufferOr.getError())
186     return report(errorCodeToError(EC), Path.getSingleStringRef());
187 
188   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
189   return create(std::move(Buffer), ProfiledBinary, KeepName);
190 }
191 
192 Expected<std::unique_ptr<RawMemProfReader>>
193 RawMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
194                          const StringRef ProfiledBinary, bool KeepName) {
195   if (Error E = checkBuffer(*Buffer))
196     return report(std::move(E), Buffer->getBufferIdentifier());
197 
198   if (ProfiledBinary.empty()) {
199     // Peek the build ids to print a helpful error message.
200     const std::vector<std::string> BuildIds = peekBuildIds(Buffer.get());
201     std::string ErrorMessage(
202         R"(Path to profiled binary is empty, expected binary with one of the following build ids:
203 )");
204     for (const auto &Id : BuildIds) {
205       ErrorMessage += "\n BuildId: ";
206       ErrorMessage += Id;
207     }
208     return report(
209         make_error<StringError>(ErrorMessage, inconvertibleErrorCode()),
210         /*Context=*/"");
211   }
212 
213   auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
214   if (!BinaryOr) {
215     return report(BinaryOr.takeError(), ProfiledBinary);
216   }
217 
218   // Use new here since constructor is private.
219   std::unique_ptr<RawMemProfReader> Reader(
220       new RawMemProfReader(std::move(BinaryOr.get()), KeepName));
221   if (Error E = Reader->initialize(std::move(Buffer))) {
222     return std::move(E);
223   }
224   return std::move(Reader);
225 }
226 
227 bool RawMemProfReader::hasFormat(const StringRef Path) {
228   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
229   if (!BufferOr)
230     return false;
231 
232   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
233   return hasFormat(*Buffer);
234 }
235 
236 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
237   if (Buffer.getBufferSize() < sizeof(uint64_t))
238     return false;
239   // Aligned read to sanity check that the buffer was allocated with at least 8b
240   // alignment.
241   const uint64_t Magic = alignedRead(Buffer.getBufferStart());
242   return Magic == MEMPROF_RAW_MAGIC_64;
243 }
244 
245 void RawMemProfReader::printYAML(raw_ostream &OS) {
246   uint64_t NumAllocFunctions = 0, NumMibInfo = 0;
247   for (const auto &KV : FunctionProfileData) {
248     const size_t NumAllocSites = KV.second.AllocSites.size();
249     if (NumAllocSites > 0) {
250       NumAllocFunctions++;
251       NumMibInfo += NumAllocSites;
252     }
253   }
254 
255   OS << "MemprofProfile:\n";
256   OS << "  Summary:\n";
257   OS << "    Version: " << MEMPROF_RAW_VERSION << "\n";
258   OS << "    NumSegments: " << SegmentInfo.size() << "\n";
259   OS << "    NumMibInfo: " << NumMibInfo << "\n";
260   OS << "    NumAllocFunctions: " << NumAllocFunctions << "\n";
261   OS << "    NumStackOffsets: " << StackMap.size() << "\n";
262   // Print out the segment information.
263   OS << "  Segments:\n";
264   for (const auto &Entry : SegmentInfo) {
265     OS << "  -\n";
266     OS << "    BuildId: " << getBuildIdString(Entry) << "\n";
267     OS << "    Start: 0x" << llvm::utohexstr(Entry.Start) << "\n";
268     OS << "    End: 0x" << llvm::utohexstr(Entry.End) << "\n";
269     OS << "    Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n";
270   }
271   // Print out the merged contents of the profiles.
272   OS << "  Records:\n";
273   for (const auto &Entry : *this) {
274     OS << "  -\n";
275     OS << "    FunctionGUID: " << Entry.first << "\n";
276     Entry.second.print(OS);
277   }
278 }
279 
280 Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
281   const StringRef FileName = Binary.getBinary()->getFileName();
282 
283   auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
284   if (!ElfObject) {
285     return report(make_error<StringError>(Twine("Not an ELF file: "),
286                                           inconvertibleErrorCode()),
287                   FileName);
288   }
289 
290   // Check whether the profiled binary was built with position independent code
291   // (PIC). Perform sanity checks for assumptions we rely on to simplify
292   // symbolization.
293   auto* Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(ElfObject);
294   const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile();
295   auto PHdrsOr = ElfFile.program_headers();
296   if (!PHdrsOr)
297     return report(
298         make_error<StringError>(Twine("Could not read program headers: "),
299                                 inconvertibleErrorCode()),
300         FileName);
301 
302   int NumExecutableSegments = 0;
303   for (const auto &Phdr : *PHdrsOr) {
304     if (Phdr.p_type == ELF::PT_LOAD) {
305       if (Phdr.p_flags & ELF::PF_X) {
306         // We assume only one text segment in the main binary for simplicity and
307         // reduce the overhead of checking multiple ranges during symbolization.
308         if (++NumExecutableSegments > 1) {
309           return report(
310               make_error<StringError>(
311                   "Expect only one executable load segment in the binary",
312                   inconvertibleErrorCode()),
313               FileName);
314         }
315         // Segment will always be loaded at a page boundary, expect it to be
316         // aligned already. Assume 4K pagesize for the machine from which the
317         // profile has been collected. This should be fine for now, in case we
318         // want to support other pagesizes it can be recorded in the raw profile
319         // during collection.
320         PreferredTextSegmentAddress = Phdr.p_vaddr;
321         assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) &&
322                "Expect p_vaddr to always be page aligned");
323         assert(Phdr.p_offset == 0 && "Expect p_offset = 0 for symbolization.");
324       }
325     }
326   }
327 
328   auto Triple = ElfObject->makeTriple();
329   if (!Triple.isX86())
330     return report(make_error<StringError>(Twine("Unsupported target: ") +
331                                               Triple.getArchName(),
332                                           inconvertibleErrorCode()),
333                   FileName);
334 
335   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
336   std::unique_ptr<DIContext> Context = DWARFContext::create(
337       *Object, DWARFContext::ProcessDebugRelocations::Process);
338 
339   auto SOFOr = symbolize::SymbolizableObjectFile::create(
340       Object, std::move(Context), /*UntagAddresses=*/false);
341   if (!SOFOr)
342     return report(SOFOr.takeError(), FileName);
343   Symbolizer = std::move(SOFOr.get());
344 
345   // Process the raw profile.
346   if (Error E = readRawProfile(std::move(DataBuffer)))
347     return E;
348 
349   if (Error E = setupForSymbolization())
350     return E;
351 
352   if (Error E = symbolizeAndFilterStackFrames())
353     return E;
354 
355   return mapRawProfileToRecords();
356 }
357 
358 Error RawMemProfReader::setupForSymbolization() {
359   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
360   object::BuildIDRef BinaryId = object::getBuildID(Object);
361   if (BinaryId.empty())
362     return make_error<StringError>(Twine("No build id found in binary ") +
363                                        Binary.getBinary()->getFileName(),
364                                    inconvertibleErrorCode());
365 
366   int NumMatched = 0;
367   for (const auto &Entry : SegmentInfo) {
368     llvm::ArrayRef<uint8_t> SegmentId(Entry.BuildId, Entry.BuildIdSize);
369     if (BinaryId == SegmentId) {
370       // We assume only one text segment in the main binary for simplicity and
371       // reduce the overhead of checking multiple ranges during symbolization.
372       if (++NumMatched > 1) {
373         return make_error<StringError>(
374             "We expect only one executable segment in the profiled binary",
375             inconvertibleErrorCode());
376       }
377       ProfiledTextSegmentStart = Entry.Start;
378       ProfiledTextSegmentEnd = Entry.End;
379     }
380   }
381   assert(NumMatched != 0 && "No matching executable segments in segment info.");
382   assert((PreferredTextSegmentAddress == 0 ||
383           (PreferredTextSegmentAddress == ProfiledTextSegmentStart)) &&
384          "Expect text segment address to be 0 or equal to profiled text "
385          "segment start.");
386   return Error::success();
387 }
388 
389 Error RawMemProfReader::mapRawProfileToRecords() {
390   // Hold a mapping from function to each callsite location we encounter within
391   // it that is part of some dynamic allocation context. The location is stored
392   // as a pointer to a symbolized list of inline frames.
393   using LocationPtr = const llvm::SmallVector<FrameId> *;
394   llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
395       PerFunctionCallSites;
396 
397   // Convert the raw profile callstack data into memprof records. While doing so
398   // keep track of related contexts so that we can fill these in later.
399   for (const auto &Entry : CallstackProfileData) {
400     const uint64_t StackId = Entry.first;
401 
402     auto It = StackMap.find(StackId);
403     if (It == StackMap.end())
404       return make_error<InstrProfError>(
405           instrprof_error::malformed,
406           "memprof callstack record does not contain id: " + Twine(StackId));
407 
408     // Construct the symbolized callstack.
409     llvm::SmallVector<FrameId> Callstack;
410     Callstack.reserve(It->getSecond().size());
411 
412     llvm::ArrayRef<uint64_t> Addresses = It->getSecond();
413     for (size_t I = 0; I < Addresses.size(); I++) {
414       const uint64_t Address = Addresses[I];
415       assert(SymbolizedFrame.count(Address) > 0 &&
416              "Address not found in SymbolizedFrame map");
417       const SmallVector<FrameId> &Frames = SymbolizedFrame[Address];
418 
419       assert(!idToFrame(Frames.back()).IsInlineFrame &&
420              "The last frame should not be inlined");
421 
422       // Record the callsites for each function. Skip the first frame of the
423       // first address since it is the allocation site itself that is recorded
424       // as an alloc site.
425       for (size_t J = 0; J < Frames.size(); J++) {
426         if (I == 0 && J == 0)
427           continue;
428         // We attach the entire bottom-up frame here for the callsite even
429         // though we only need the frames up to and including the frame for
430         // Frames[J].Function. This will enable better deduplication for
431         // compression in the future.
432         const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function;
433         PerFunctionCallSites[Guid].insert(&Frames);
434       }
435 
436       // Add all the frames to the current allocation callstack.
437       Callstack.append(Frames.begin(), Frames.end());
438     }
439 
440     // We attach the memprof record to each function bottom-up including the
441     // first non-inline frame.
442     for (size_t I = 0; /*Break out using the condition below*/; I++) {
443       const Frame &F = idToFrame(Callstack[I]);
444       auto Result =
445           FunctionProfileData.insert({F.Function, IndexedMemProfRecord()});
446       IndexedMemProfRecord &Record = Result.first->second;
447       Record.AllocSites.emplace_back(Callstack, Entry.second);
448 
449       if (!F.IsInlineFrame)
450         break;
451     }
452   }
453 
454   // Fill in the related callsites per function.
455   for (const auto &[Id, Locs] : PerFunctionCallSites) {
456     // Some functions may have only callsite data and no allocation data. Here
457     // we insert a new entry for callsite data if we need to.
458     auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()});
459     IndexedMemProfRecord &Record = Result.first->second;
460     for (LocationPtr Loc : Locs) {
461       Record.CallSites.push_back(*Loc);
462     }
463   }
464 
465   return Error::success();
466 }
467 
468 Error RawMemProfReader::symbolizeAndFilterStackFrames() {
469   // The specifier to use when symbolization is requested.
470   const DILineInfoSpecifier Specifier(
471       DILineInfoSpecifier::FileLineInfoKind::RawValue,
472       DILineInfoSpecifier::FunctionNameKind::LinkageName);
473 
474   // For entries where all PCs in the callstack are discarded, we erase the
475   // entry from the stack map.
476   llvm::SmallVector<uint64_t> EntriesToErase;
477   // We keep track of all prior discarded entries so that we can avoid invoking
478   // the symbolizer for such entries.
479   llvm::DenseSet<uint64_t> AllVAddrsToDiscard;
480   for (auto &Entry : StackMap) {
481     for (const uint64_t VAddr : Entry.getSecond()) {
482       // Check if we have already symbolized and cached the result or if we
483       // don't want to attempt symbolization since we know this address is bad.
484       // In this case the address is also removed from the current callstack.
485       if (SymbolizedFrame.count(VAddr) > 0 ||
486           AllVAddrsToDiscard.contains(VAddr))
487         continue;
488 
489       Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
490           getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false);
491       if (!DIOr)
492         return DIOr.takeError();
493       DIInliningInfo DI = DIOr.get();
494 
495       // Drop frames which we can't symbolize or if they belong to the runtime.
496       if (DI.getFrame(0).FunctionName == DILineInfo::BadString ||
497           isRuntimePath(DI.getFrame(0).FileName)) {
498         AllVAddrsToDiscard.insert(VAddr);
499         continue;
500       }
501 
502       for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
503            I++) {
504         const auto &DIFrame = DI.getFrame(I);
505         const uint64_t Guid =
506             IndexedMemProfRecord::getGUID(DIFrame.FunctionName);
507         const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
508                       // Only the last entry is not an inlined location.
509                       I != NumFrames - 1);
510         // Here we retain a mapping from the GUID to symbol name instead of
511         // adding it to the frame object directly to reduce memory overhead.
512         // This is because there can be many unique frames, particularly for
513         // callsite frames.
514         if (KeepSymbolName)
515           GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
516 
517         const FrameId Hash = F.hash();
518         IdToFrame.insert({Hash, F});
519         SymbolizedFrame[VAddr].push_back(Hash);
520       }
521     }
522 
523     auto &CallStack = Entry.getSecond();
524     llvm::erase_if(CallStack, [&AllVAddrsToDiscard](const uint64_t A) {
525       return AllVAddrsToDiscard.contains(A);
526     });
527     if (CallStack.empty())
528       EntriesToErase.push_back(Entry.getFirst());
529   }
530 
531   // Drop the entries where the callstack is empty.
532   for (const uint64_t Id : EntriesToErase) {
533     StackMap.erase(Id);
534     CallstackProfileData.erase(Id);
535   }
536 
537   if (StackMap.empty())
538     return make_error<InstrProfError>(
539         instrprof_error::malformed,
540         "no entries in callstack map after symbolization");
541 
542   return Error::success();
543 }
544 
545 std::vector<std::string>
546 RawMemProfReader::peekBuildIds(MemoryBuffer *DataBuffer) {
547   const char *Next = DataBuffer->getBufferStart();
548   // Use a set + vector since a profile file may contain multiple raw profile
549   // dumps, each with segment information. We want them unique and in order they
550   // were stored in the profile; the profiled binary should be the first entry.
551   // The runtime uses dl_iterate_phdr and the "... first object visited by
552   // callback is the main program."
553   // https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html
554   std::vector<std::string> BuildIds;
555   llvm::SmallSet<std::string, 10> BuildIdsSet;
556   while (Next < DataBuffer->getBufferEnd()) {
557     auto *Header = reinterpret_cast<const memprof::Header *>(Next);
558 
559     const llvm::SmallVector<SegmentEntry> Entries =
560         readSegmentEntries(Next + Header->SegmentOffset);
561 
562     for (const auto &Entry : Entries) {
563       const std::string Id = getBuildIdString(Entry);
564       if (BuildIdsSet.contains(Id))
565         continue;
566       BuildIds.push_back(Id);
567       BuildIdsSet.insert(Id);
568     }
569 
570     Next += Header->TotalSize;
571   }
572   return BuildIds;
573 }
574 
575 Error RawMemProfReader::readRawProfile(
576     std::unique_ptr<MemoryBuffer> DataBuffer) {
577   const char *Next = DataBuffer->getBufferStart();
578 
579   while (Next < DataBuffer->getBufferEnd()) {
580     auto *Header = reinterpret_cast<const memprof::Header *>(Next);
581 
582     // Read in the segment information, check whether its the same across all
583     // profiles in this binary file.
584     const llvm::SmallVector<SegmentEntry> Entries =
585         readSegmentEntries(Next + Header->SegmentOffset);
586     if (!SegmentInfo.empty() && SegmentInfo != Entries) {
587       // We do not expect segment information to change when deserializing from
588       // the same binary profile file. This can happen if dynamic libraries are
589       // loaded/unloaded between profile dumping.
590       return make_error<InstrProfError>(
591           instrprof_error::malformed,
592           "memprof raw profile has different segment information");
593     }
594     SegmentInfo.assign(Entries.begin(), Entries.end());
595 
596     // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
597     // raw profiles in the same binary file are from the same process so the
598     // stackdepot ids are the same.
599     for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) {
600       if (CallstackProfileData.count(Value.first)) {
601         CallstackProfileData[Value.first].Merge(Value.second);
602       } else {
603         CallstackProfileData[Value.first] = Value.second;
604       }
605     }
606 
607     // Read in the callstack for each ids. For multiple raw profiles in the same
608     // file, we expect that the callstack is the same for a unique id.
609     const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
610     if (StackMap.empty()) {
611       StackMap = CSM;
612     } else {
613       if (mergeStackMap(CSM, StackMap))
614         return make_error<InstrProfError>(
615             instrprof_error::malformed,
616             "memprof raw profile got different call stack for same id");
617     }
618 
619     Next += Header->TotalSize;
620   }
621 
622   return Error::success();
623 }
624 
625 object::SectionedAddress
626 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
627   if (VirtualAddress > ProfiledTextSegmentStart &&
628       VirtualAddress <= ProfiledTextSegmentEnd) {
629     // For PIE binaries, the preferred address is zero and we adjust the virtual
630     // address by start of the profiled segment assuming that the offset of the
631     // segment in the binary is zero. For non-PIE binaries the preferred and
632     // profiled segment addresses should be equal and this is a no-op.
633     const uint64_t AdjustedAddress =
634         VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart;
635     return object::SectionedAddress{AdjustedAddress};
636   }
637   // Addresses which do not originate from the profiled text segment in the
638   // binary are not adjusted. These will fail symbolization and be filtered out
639   // during processing.
640   return object::SectionedAddress{VirtualAddress};
641 }
642 
643 Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
644   if (FunctionProfileData.empty())
645     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
646 
647   if (Iter == FunctionProfileData.end())
648     return make_error<InstrProfError>(instrprof_error::eof);
649 
650   auto IdToFrameCallback = [this](const FrameId Id) {
651     Frame F = this->idToFrame(Id);
652     if (!this->KeepSymbolName)
653       return F;
654     auto Iter = this->GuidToSymbolName.find(F.Function);
655     assert(Iter != this->GuidToSymbolName.end());
656     F.SymbolName = Iter->getSecond();
657     return F;
658   };
659 
660   const IndexedMemProfRecord &IndexedRecord = Iter->second;
661   GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)};
662   Iter++;
663   return Error::success();
664 }
665 } // namespace memprof
666 } // namespace llvm
667