1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading MemProf profiling data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <algorithm>
14 #include <cstdint>
15 #include <memory>
16 #include <type_traits>
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
23 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
24 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
25 #include "llvm/Object/Binary.h"
26 #include "llvm/Object/ELFObjectFile.h"
27 #include "llvm/Object/ObjectFile.h"
28 #include "llvm/ProfileData/InstrProf.h"
29 #include "llvm/ProfileData/MemProf.h"
30 #include "llvm/ProfileData/MemProfData.inc"
31 #include "llvm/ProfileData/RawMemProfReader.h"
32 #include "llvm/Support/Endian.h"
33 #include "llvm/Support/Path.h"
34 
35 #define DEBUG_TYPE "memprof"
36 
37 namespace llvm {
38 namespace memprof {
39 namespace {
40 template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
41   static_assert(std::is_pod<T>::value, "Not a pod type.");
42   assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
43   return *reinterpret_cast<const T *>(Ptr);
44 }
45 
46 Error checkBuffer(const MemoryBuffer &Buffer) {
47   if (!RawMemProfReader::hasFormat(Buffer))
48     return make_error<InstrProfError>(instrprof_error::bad_magic);
49 
50   if (Buffer.getBufferSize() == 0)
51     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
52 
53   if (Buffer.getBufferSize() < sizeof(Header)) {
54     return make_error<InstrProfError>(instrprof_error::truncated);
55   }
56 
57   // The size of the buffer can be > header total size since we allow repeated
58   // serialization of memprof profiles to the same file.
59   uint64_t TotalSize = 0;
60   const char *Next = Buffer.getBufferStart();
61   while (Next < Buffer.getBufferEnd()) {
62     auto *H = reinterpret_cast<const Header *>(Next);
63     if (H->Version != MEMPROF_RAW_VERSION) {
64       return make_error<InstrProfError>(instrprof_error::unsupported_version);
65     }
66 
67     TotalSize += H->TotalSize;
68     Next += H->TotalSize;
69   }
70 
71   if (Buffer.getBufferSize() != TotalSize) {
72     return make_error<InstrProfError>(instrprof_error::malformed);
73   }
74   return Error::success();
75 }
76 
77 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
78   using namespace support;
79 
80   const uint64_t NumItemsToRead =
81       endian::readNext<uint64_t, little, unaligned>(Ptr);
82   llvm::SmallVector<SegmentEntry> Items;
83   for (uint64_t I = 0; I < NumItemsToRead; I++) {
84     Items.push_back(*reinterpret_cast<const SegmentEntry *>(
85         Ptr + I * sizeof(SegmentEntry)));
86   }
87   return Items;
88 }
89 
90 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
91 readMemInfoBlocks(const char *Ptr) {
92   using namespace support;
93 
94   const uint64_t NumItemsToRead =
95       endian::readNext<uint64_t, little, unaligned>(Ptr);
96   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
97   for (uint64_t I = 0; I < NumItemsToRead; I++) {
98     const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr);
99     const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
100     Items.push_back({Id, MIB});
101     // Only increment by size of MIB since readNext implicitly increments.
102     Ptr += sizeof(MemInfoBlock);
103   }
104   return Items;
105 }
106 
107 CallStackMap readStackInfo(const char *Ptr) {
108   using namespace support;
109 
110   const uint64_t NumItemsToRead =
111       endian::readNext<uint64_t, little, unaligned>(Ptr);
112   CallStackMap Items;
113 
114   for (uint64_t I = 0; I < NumItemsToRead; I++) {
115     const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr);
116     const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr);
117 
118     SmallVector<uint64_t> CallStack;
119     for (uint64_t J = 0; J < NumPCs; J++) {
120       CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr));
121     }
122 
123     Items[StackId] = CallStack;
124   }
125   return Items;
126 }
127 
128 // Merges the contents of stack information in \p From to \p To. Returns true if
129 // any stack ids observed previously map to a different set of program counter
130 // addresses.
131 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
132   for (const auto &IdStack : From) {
133     auto I = To.find(IdStack.first);
134     if (I == To.end()) {
135       To[IdStack.first] = IdStack.second;
136     } else {
137       // Check that the PCs are the same (in order).
138       if (IdStack.second != I->second)
139         return true;
140     }
141   }
142   return false;
143 }
144 
145 Error report(Error E, const StringRef Context) {
146   return joinErrors(createStringError(inconvertibleErrorCode(), Context),
147                     std::move(E));
148 }
149 
150 bool isRuntimePath(const StringRef Path) {
151   return StringRef(llvm::sys::path::convert_to_slash(Path))
152       .contains("memprof/memprof_");
153 }
154 
155 std::string getBuildIdString(const SegmentEntry &Entry) {
156   constexpr size_t Size = sizeof(Entry.BuildId) / sizeof(uint8_t);
157   constexpr uint8_t Zeros[Size] = {0};
158   // If the build id is unset print a helpful string instead of all zeros.
159   if (memcmp(Entry.BuildId, Zeros, Size) == 0)
160     return "<None>";
161 
162   std::string Str;
163   raw_string_ostream OS(Str);
164   for (size_t I = 0; I < Size; I++) {
165     OS << format_hex_no_prefix(Entry.BuildId[I], 2);
166   }
167   return OS.str();
168 }
169 } // namespace
170 
171 Expected<std::unique_ptr<RawMemProfReader>>
172 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
173                          bool KeepName) {
174   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
175   if (std::error_code EC = BufferOr.getError())
176     return report(errorCodeToError(EC), Path.getSingleStringRef());
177 
178   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
179   if (Error E = checkBuffer(*Buffer))
180     return report(std::move(E), Path.getSingleStringRef());
181 
182   if (ProfiledBinary.empty())
183     return report(
184         errorCodeToError(make_error_code(std::errc::invalid_argument)),
185         "Path to profiled binary is empty!");
186 
187   auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
188   if (!BinaryOr) {
189     return report(BinaryOr.takeError(), ProfiledBinary);
190   }
191 
192   // Use new here since constructor is private.
193   std::unique_ptr<RawMemProfReader> Reader(
194       new RawMemProfReader(std::move(BinaryOr.get()), KeepName));
195   if (Error E = Reader->initialize(std::move(Buffer))) {
196     return std::move(E);
197   }
198   return std::move(Reader);
199 }
200 
201 bool RawMemProfReader::hasFormat(const StringRef Path) {
202   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
203   if (!BufferOr)
204     return false;
205 
206   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
207   return hasFormat(*Buffer);
208 }
209 
210 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
211   if (Buffer.getBufferSize() < sizeof(uint64_t))
212     return false;
213   // Aligned read to sanity check that the buffer was allocated with at least 8b
214   // alignment.
215   const uint64_t Magic = alignedRead(Buffer.getBufferStart());
216   return Magic == MEMPROF_RAW_MAGIC_64;
217 }
218 
219 void RawMemProfReader::printYAML(raw_ostream &OS) {
220   uint64_t NumAllocFunctions = 0, NumMibInfo = 0;
221   for (const auto &KV : FunctionProfileData) {
222     const size_t NumAllocSites = KV.second.AllocSites.size();
223     if (NumAllocSites > 0) {
224       NumAllocFunctions++;
225       NumMibInfo += NumAllocSites;
226     }
227   }
228 
229   OS << "MemprofProfile:\n";
230   OS << "  Summary:\n";
231   OS << "    Version: " << MEMPROF_RAW_VERSION << "\n";
232   OS << "    NumSegments: " << SegmentInfo.size() << "\n";
233   OS << "    NumMibInfo: " << NumMibInfo << "\n";
234   OS << "    NumAllocFunctions: " << NumAllocFunctions << "\n";
235   OS << "    NumStackOffsets: " << StackMap.size() << "\n";
236   // Print out the segment information.
237   OS << "  Segments:\n";
238   for (const auto &Entry : SegmentInfo) {
239     OS << "  -\n";
240     OS << "    BuildId: " << getBuildIdString(Entry) << "\n";
241     OS << "    Start: 0x" << llvm::utohexstr(Entry.Start) << "\n";
242     OS << "    End: 0x" << llvm::utohexstr(Entry.End) << "\n";
243     OS << "    Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n";
244   }
245   // Print out the merged contents of the profiles.
246   OS << "  Records:\n";
247   for (const auto &Entry : *this) {
248     OS << "  -\n";
249     OS << "    FunctionGUID: " << Entry.first << "\n";
250     Entry.second.print(OS);
251   }
252 }
253 
254 Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
255   const StringRef FileName = Binary.getBinary()->getFileName();
256 
257   auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
258   if (!ElfObject) {
259     return report(make_error<StringError>(Twine("Not an ELF file: "),
260                                           inconvertibleErrorCode()),
261                   FileName);
262   }
263 
264   auto Triple = ElfObject->makeTriple();
265   if (!Triple.isX86())
266     return report(make_error<StringError>(Twine("Unsupported target: ") +
267                                               Triple.getArchName(),
268                                           inconvertibleErrorCode()),
269                   FileName);
270 
271   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
272   std::unique_ptr<DIContext> Context = DWARFContext::create(
273       *Object, DWARFContext::ProcessDebugRelocations::Process);
274 
275   auto SOFOr = symbolize::SymbolizableObjectFile::create(
276       Object, std::move(Context), /*UntagAddresses=*/false);
277   if (!SOFOr)
278     return report(SOFOr.takeError(), FileName);
279   Symbolizer = std::move(SOFOr.get());
280 
281   if (Error E = readRawProfile(std::move(DataBuffer)))
282     return E;
283 
284   if (Error E = symbolizeAndFilterStackFrames())
285     return E;
286 
287   return mapRawProfileToRecords();
288 }
289 
290 Error RawMemProfReader::mapRawProfileToRecords() {
291   // Hold a mapping from function to each callsite location we encounter within
292   // it that is part of some dynamic allocation context. The location is stored
293   // as a pointer to a symbolized list of inline frames.
294   using LocationPtr = const llvm::SmallVector<FrameId> *;
295   llvm::DenseMap<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
296       PerFunctionCallSites;
297 
298   // Convert the raw profile callstack data into memprof records. While doing so
299   // keep track of related contexts so that we can fill these in later.
300   for (const auto &Entry : CallstackProfileData) {
301     const uint64_t StackId = Entry.first;
302 
303     auto It = StackMap.find(StackId);
304     if (It == StackMap.end())
305       return make_error<InstrProfError>(
306           instrprof_error::malformed,
307           "memprof callstack record does not contain id: " + Twine(StackId));
308 
309     // Construct the symbolized callstack.
310     llvm::SmallVector<FrameId> Callstack;
311     Callstack.reserve(It->getSecond().size());
312 
313     llvm::ArrayRef<uint64_t> Addresses = It->getSecond();
314     for (size_t I = 0; I < Addresses.size(); I++) {
315       const uint64_t Address = Addresses[I];
316       assert(SymbolizedFrame.count(Address) > 0 &&
317              "Address not found in SymbolizedFrame map");
318       const SmallVector<FrameId> &Frames = SymbolizedFrame[Address];
319 
320       assert(!idToFrame(Frames.back()).IsInlineFrame &&
321              "The last frame should not be inlined");
322 
323       // Record the callsites for each function. Skip the first frame of the
324       // first address since it is the allocation site itself that is recorded
325       // as an alloc site.
326       for (size_t J = 0; J < Frames.size(); J++) {
327         if (I == 0 && J == 0)
328           continue;
329         // We attach the entire bottom-up frame here for the callsite even
330         // though we only need the frames up to and including the frame for
331         // Frames[J].Function. This will enable better deduplication for
332         // compression in the future.
333         const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function;
334         PerFunctionCallSites[Guid].insert(&Frames);
335       }
336 
337       // Add all the frames to the current allocation callstack.
338       Callstack.append(Frames.begin(), Frames.end());
339     }
340 
341     // We attach the memprof record to each function bottom-up including the
342     // first non-inline frame.
343     for (size_t I = 0; /*Break out using the condition below*/; I++) {
344       const Frame &F = idToFrame(Callstack[I]);
345       auto Result =
346           FunctionProfileData.insert({F.Function, IndexedMemProfRecord()});
347       IndexedMemProfRecord &Record = Result.first->second;
348       Record.AllocSites.emplace_back(Callstack, Entry.second);
349 
350       if (!F.IsInlineFrame)
351         break;
352     }
353   }
354 
355   // Fill in the related callsites per function.
356   for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end();
357        I != E; I++) {
358     const GlobalValue::GUID Id = I->first;
359     // Some functions may have only callsite data and no allocation data. Here
360     // we insert a new entry for callsite data if we need to.
361     auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()});
362     IndexedMemProfRecord &Record = Result.first->second;
363     for (LocationPtr Loc : I->getSecond()) {
364       Record.CallSites.push_back(*Loc);
365     }
366   }
367 
368   return Error::success();
369 }
370 
371 Error RawMemProfReader::symbolizeAndFilterStackFrames() {
372   // The specifier to use when symbolization is requested.
373   const DILineInfoSpecifier Specifier(
374       DILineInfoSpecifier::FileLineInfoKind::RawValue,
375       DILineInfoSpecifier::FunctionNameKind::LinkageName);
376 
377   // For entries where all PCs in the callstack are discarded, we erase the
378   // entry from the stack map.
379   llvm::SmallVector<uint64_t> EntriesToErase;
380   // We keep track of all prior discarded entries so that we can avoid invoking
381   // the symbolizer for such entries.
382   llvm::DenseSet<uint64_t> AllVAddrsToDiscard;
383   for (auto &Entry : StackMap) {
384     for (const uint64_t VAddr : Entry.getSecond()) {
385       // Check if we have already symbolized and cached the result or if we
386       // don't want to attempt symbolization since we know this address is bad.
387       // In this case the address is also removed from the current callstack.
388       if (SymbolizedFrame.count(VAddr) > 0 ||
389           AllVAddrsToDiscard.contains(VAddr))
390         continue;
391 
392       Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
393           getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false);
394       if (!DIOr)
395         return DIOr.takeError();
396       DIInliningInfo DI = DIOr.get();
397 
398       // Drop frames which we can't symbolize or if they belong to the runtime.
399       if (DI.getFrame(0).FunctionName == DILineInfo::BadString ||
400           isRuntimePath(DI.getFrame(0).FileName)) {
401         AllVAddrsToDiscard.insert(VAddr);
402         continue;
403       }
404 
405       for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
406            I++) {
407         const auto &DIFrame = DI.getFrame(I);
408         const uint64_t Guid =
409             IndexedMemProfRecord::getGUID(DIFrame.FunctionName);
410         const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
411                       // Only the last entry is not an inlined location.
412                       I != NumFrames - 1);
413         // Here we retain a mapping from the GUID to symbol name instead of
414         // adding it to the frame object directly to reduce memory overhead.
415         // This is because there can be many unique frames, particularly for
416         // callsite frames.
417         if (KeepSymbolName)
418           GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
419 
420         const FrameId Hash = F.hash();
421         IdToFrame.insert({Hash, F});
422         SymbolizedFrame[VAddr].push_back(Hash);
423       }
424     }
425 
426     auto &CallStack = Entry.getSecond();
427     CallStack.erase(std::remove_if(CallStack.begin(), CallStack.end(),
428                                    [&AllVAddrsToDiscard](const uint64_t A) {
429                                      return AllVAddrsToDiscard.contains(A);
430                                    }),
431                     CallStack.end());
432     if (CallStack.empty())
433       EntriesToErase.push_back(Entry.getFirst());
434   }
435 
436   // Drop the entries where the callstack is empty.
437   for (const uint64_t Id : EntriesToErase) {
438     StackMap.erase(Id);
439     CallstackProfileData.erase(Id);
440   }
441 
442   if (StackMap.empty())
443     return make_error<InstrProfError>(
444         instrprof_error::malformed,
445         "no entries in callstack map after symbolization");
446 
447   return Error::success();
448 }
449 
450 Error RawMemProfReader::readRawProfile(
451     std::unique_ptr<MemoryBuffer> DataBuffer) {
452   const char *Next = DataBuffer->getBufferStart();
453 
454   while (Next < DataBuffer->getBufferEnd()) {
455     auto *Header = reinterpret_cast<const memprof::Header *>(Next);
456 
457     // Read in the segment information, check whether its the same across all
458     // profiles in this binary file.
459     const llvm::SmallVector<SegmentEntry> Entries =
460         readSegmentEntries(Next + Header->SegmentOffset);
461     if (!SegmentInfo.empty() && SegmentInfo != Entries) {
462       // We do not expect segment information to change when deserializing from
463       // the same binary profile file. This can happen if dynamic libraries are
464       // loaded/unloaded between profile dumping.
465       return make_error<InstrProfError>(
466           instrprof_error::malformed,
467           "memprof raw profile has different segment information");
468     }
469     SegmentInfo.assign(Entries.begin(), Entries.end());
470 
471     // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
472     // raw profiles in the same binary file are from the same process so the
473     // stackdepot ids are the same.
474     for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) {
475       if (CallstackProfileData.count(Value.first)) {
476         CallstackProfileData[Value.first].Merge(Value.second);
477       } else {
478         CallstackProfileData[Value.first] = Value.second;
479       }
480     }
481 
482     // Read in the callstack for each ids. For multiple raw profiles in the same
483     // file, we expect that the callstack is the same for a unique id.
484     const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
485     if (StackMap.empty()) {
486       StackMap = CSM;
487     } else {
488       if (mergeStackMap(CSM, StackMap))
489         return make_error<InstrProfError>(
490             instrprof_error::malformed,
491             "memprof raw profile got different call stack for same id");
492     }
493 
494     Next += Header->TotalSize;
495   }
496 
497   return Error::success();
498 }
499 
500 object::SectionedAddress
501 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
502   LLVM_DEBUG({
503   SegmentEntry *ContainingSegment = nullptr;
504   for (auto &SE : SegmentInfo) {
505     if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) {
506       ContainingSegment = &SE;
507     }
508   }
509 
510   // Ensure that the virtual address is valid.
511   assert(ContainingSegment && "Could not find a segment entry");
512   });
513 
514   // TODO: Compute the file offset based on the maps and program headers. For
515   // now this only works for non PIE binaries.
516   return object::SectionedAddress{VirtualAddress};
517 }
518 
519 Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
520   if (FunctionProfileData.empty())
521     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
522 
523   if (Iter == FunctionProfileData.end())
524     return make_error<InstrProfError>(instrprof_error::eof);
525 
526   auto IdToFrameCallback = [this](const FrameId Id) {
527     Frame F = this->idToFrame(Id);
528     if (!this->KeepSymbolName)
529       return F;
530     auto Iter = this->GuidToSymbolName.find(F.Function);
531     assert(Iter != this->GuidToSymbolName.end());
532     F.SymbolName = Iter->getSecond();
533     return F;
534   };
535 
536   const IndexedMemProfRecord &IndexedRecord = Iter->second;
537   GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)};
538   Iter++;
539   return Error::success();
540 }
541 } // namespace memprof
542 } // namespace llvm
543