1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading MemProf profiling data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <algorithm>
14 #include <cstdint>
15 #include <memory>
16 #include <type_traits>
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
22 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
23 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
24 #include "llvm/Object/Binary.h"
25 #include "llvm/Object/ELFObjectFile.h"
26 #include "llvm/Object/ObjectFile.h"
27 #include "llvm/ProfileData/InstrProf.h"
28 #include "llvm/ProfileData/MemProf.h"
29 #include "llvm/ProfileData/MemProfData.inc"
30 #include "llvm/ProfileData/RawMemProfReader.h"
31 #include "llvm/Support/Endian.h"
32 #include "llvm/Support/Path.h"
33 
34 #define DEBUG_TYPE "memprof"
35 
36 namespace llvm {
37 namespace memprof {
38 namespace {
39 
40 struct Summary {
41   uint64_t Version;
42   uint64_t TotalSizeBytes;
43   uint64_t NumSegments;
44   uint64_t NumMIBInfo;
45   uint64_t NumStackOffsets;
46 };
47 
48 template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
49   static_assert(std::is_pod<T>::value, "Not a pod type.");
50   assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
51   return *reinterpret_cast<const T *>(Ptr);
52 }
53 
54 Summary computeSummary(const char *Start) {
55   auto *H = reinterpret_cast<const Header *>(Start);
56 
57   // Check alignment while reading the number of items in each section.
58   return Summary{
59       H->Version,
60       H->TotalSize,
61       alignedRead(Start + H->SegmentOffset),
62       alignedRead(Start + H->MIBOffset),
63       alignedRead(Start + H->StackOffset),
64   };
65 }
66 
67 Error checkBuffer(const MemoryBuffer &Buffer) {
68   if (!RawMemProfReader::hasFormat(Buffer))
69     return make_error<InstrProfError>(instrprof_error::bad_magic);
70 
71   if (Buffer.getBufferSize() == 0)
72     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
73 
74   if (Buffer.getBufferSize() < sizeof(Header)) {
75     return make_error<InstrProfError>(instrprof_error::truncated);
76   }
77 
78   // The size of the buffer can be > header total size since we allow repeated
79   // serialization of memprof profiles to the same file.
80   uint64_t TotalSize = 0;
81   const char *Next = Buffer.getBufferStart();
82   while (Next < Buffer.getBufferEnd()) {
83     auto *H = reinterpret_cast<const Header *>(Next);
84     if (H->Version != MEMPROF_RAW_VERSION) {
85       return make_error<InstrProfError>(instrprof_error::unsupported_version);
86     }
87 
88     TotalSize += H->TotalSize;
89     Next += H->TotalSize;
90   }
91 
92   if (Buffer.getBufferSize() != TotalSize) {
93     return make_error<InstrProfError>(instrprof_error::malformed);
94   }
95   return Error::success();
96 }
97 
98 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
99   using namespace support;
100 
101   const uint64_t NumItemsToRead =
102       endian::readNext<uint64_t, little, unaligned>(Ptr);
103   llvm::SmallVector<SegmentEntry> Items;
104   for (uint64_t I = 0; I < NumItemsToRead; I++) {
105     Items.push_back(*reinterpret_cast<const SegmentEntry *>(
106         Ptr + I * sizeof(SegmentEntry)));
107   }
108   return Items;
109 }
110 
111 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
112 readMemInfoBlocks(const char *Ptr) {
113   using namespace support;
114 
115   const uint64_t NumItemsToRead =
116       endian::readNext<uint64_t, little, unaligned>(Ptr);
117   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
118   for (uint64_t I = 0; I < NumItemsToRead; I++) {
119     const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr);
120     const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
121     Items.push_back({Id, MIB});
122     // Only increment by size of MIB since readNext implicitly increments.
123     Ptr += sizeof(MemInfoBlock);
124   }
125   return Items;
126 }
127 
128 CallStackMap readStackInfo(const char *Ptr) {
129   using namespace support;
130 
131   const uint64_t NumItemsToRead =
132       endian::readNext<uint64_t, little, unaligned>(Ptr);
133   CallStackMap Items;
134 
135   for (uint64_t I = 0; I < NumItemsToRead; I++) {
136     const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr);
137     const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr);
138 
139     SmallVector<uint64_t> CallStack;
140     for (uint64_t J = 0; J < NumPCs; J++) {
141       CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr));
142     }
143 
144     Items[StackId] = CallStack;
145   }
146   return Items;
147 }
148 
149 // Merges the contents of stack information in \p From to \p To. Returns true if
150 // any stack ids observed previously map to a different set of program counter
151 // addresses.
152 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
153   for (const auto &IdStack : From) {
154     auto I = To.find(IdStack.first);
155     if (I == To.end()) {
156       To[IdStack.first] = IdStack.second;
157     } else {
158       // Check that the PCs are the same (in order).
159       if (IdStack.second != I->second)
160         return true;
161     }
162   }
163   return false;
164 }
165 
166 Error report(Error E, const StringRef Context) {
167   return joinErrors(createStringError(inconvertibleErrorCode(), Context),
168                     std::move(E));
169 }
170 
171 bool isRuntimePath(const StringRef Path) {
172   return StringRef(llvm::sys::path::convert_to_slash(Path))
173       .contains("memprof/memprof_");
174 }
175 } // namespace
176 
177 Expected<std::unique_ptr<RawMemProfReader>>
178 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
179                          bool KeepName) {
180   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
181   if (std::error_code EC = BufferOr.getError())
182     return report(errorCodeToError(EC), Path.getSingleStringRef());
183 
184   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
185   if (Error E = checkBuffer(*Buffer))
186     return report(std::move(E), Path.getSingleStringRef());
187 
188   if (ProfiledBinary.empty())
189     return report(
190         errorCodeToError(make_error_code(std::errc::invalid_argument)),
191         "Path to profiled binary is empty!");
192 
193   auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
194   if (!BinaryOr) {
195     return report(BinaryOr.takeError(), ProfiledBinary);
196   }
197 
198   // Use new here since constructor is private.
199   std::unique_ptr<RawMemProfReader> Reader(new RawMemProfReader(
200       std::move(Buffer), std::move(BinaryOr.get()), KeepName));
201   if (Error E = Reader->initialize()) {
202     return std::move(E);
203   }
204   return std::move(Reader);
205 }
206 
207 bool RawMemProfReader::hasFormat(const StringRef Path) {
208   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
209   if (!BufferOr)
210     return false;
211 
212   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
213   return hasFormat(*Buffer);
214 }
215 
216 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
217   if (Buffer.getBufferSize() < sizeof(uint64_t))
218     return false;
219   // Aligned read to sanity check that the buffer was allocated with at least 8b
220   // alignment.
221   const uint64_t Magic = alignedRead(Buffer.getBufferStart());
222   return Magic == MEMPROF_RAW_MAGIC_64;
223 }
224 
225 void RawMemProfReader::printYAML(raw_ostream &OS) {
226   OS << "MemprofProfile:\n";
227   // TODO: Update printSummaries to print out the data after the profile has
228   // been symbolized and pruned. We can parse some raw profile characteristics
229   // from the data buffer for additional information.
230   printSummaries(OS);
231   // Print out the merged contents of the profiles.
232   OS << "  Records:\n";
233   for (const auto &Entry : *this) {
234     OS << "  -\n";
235     OS << "    FunctionGUID: " << Entry.first << "\n";
236     Entry.second.print(OS);
237   }
238 }
239 
240 void RawMemProfReader::printSummaries(raw_ostream &OS) const {
241   const char *Next = DataBuffer->getBufferStart();
242   while (Next < DataBuffer->getBufferEnd()) {
243     auto Summary = computeSummary(Next);
244     OS << "  -\n";
245     OS << "  Header:\n";
246     OS << "    Version: " << Summary.Version << "\n";
247     OS << "    TotalSizeBytes: " << Summary.TotalSizeBytes << "\n";
248     OS << "    NumSegments: " << Summary.NumSegments << "\n";
249     OS << "    NumMibInfo: " << Summary.NumMIBInfo << "\n";
250     OS << "    NumStackOffsets: " << Summary.NumStackOffsets << "\n";
251     // TODO: Print the build ids once we can record them using the
252     // sanitizer_procmaps library for linux.
253 
254     auto *H = reinterpret_cast<const Header *>(Next);
255     Next += H->TotalSize;
256   }
257 }
258 
259 Error RawMemProfReader::initialize() {
260   const StringRef FileName = Binary.getBinary()->getFileName();
261 
262   auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
263   if (!ElfObject) {
264     return report(make_error<StringError>(Twine("Not an ELF file: "),
265                                           inconvertibleErrorCode()),
266                   FileName);
267   }
268 
269   auto Triple = ElfObject->makeTriple();
270   if (!Triple.isX86())
271     return report(make_error<StringError>(Twine("Unsupported target: ") +
272                                               Triple.getArchName(),
273                                           inconvertibleErrorCode()),
274                   FileName);
275 
276   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
277   std::unique_ptr<DIContext> Context = DWARFContext::create(
278       *Object, DWARFContext::ProcessDebugRelocations::Process);
279 
280   auto SOFOr = symbolize::SymbolizableObjectFile::create(
281       Object, std::move(Context), /*UntagAddresses=*/false);
282   if (!SOFOr)
283     return report(SOFOr.takeError(), FileName);
284   Symbolizer = std::move(SOFOr.get());
285 
286   if (Error E = readRawProfile())
287     return E;
288 
289   if (Error E = symbolizeAndFilterStackFrames())
290     return E;
291 
292   return mapRawProfileToRecords();
293 }
294 
295 Error RawMemProfReader::mapRawProfileToRecords() {
296   // Hold a mapping from function to each callsite location we encounter within
297   // it that is part of some dynamic allocation context. The location is stored
298   // as a pointer to a symbolized list of inline frames.
299   using LocationPtr = const llvm::SmallVector<FrameId> *;
300   llvm::DenseMap<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
301       PerFunctionCallSites;
302 
303   // Convert the raw profile callstack data into memprof records. While doing so
304   // keep track of related contexts so that we can fill these in later.
305   for (const auto &Entry : CallstackProfileData) {
306     const uint64_t StackId = Entry.first;
307 
308     auto It = StackMap.find(StackId);
309     if (It == StackMap.end())
310       return make_error<InstrProfError>(
311           instrprof_error::malformed,
312           "memprof callstack record does not contain id: " + Twine(StackId));
313 
314     // Construct the symbolized callstack.
315     llvm::SmallVector<FrameId> Callstack;
316     Callstack.reserve(It->getSecond().size());
317 
318     llvm::ArrayRef<uint64_t> Addresses = It->getSecond();
319     for (size_t I = 0; I < Addresses.size(); I++) {
320       const uint64_t Address = Addresses[I];
321       assert(SymbolizedFrame.count(Address) > 0 &&
322              "Address not found in SymbolizedFrame map");
323       const SmallVector<FrameId> &Frames = SymbolizedFrame[Address];
324 
325       assert(!idToFrame(Frames.back()).IsInlineFrame &&
326              "The last frame should not be inlined");
327 
328       // Record the callsites for each function. Skip the first frame of the
329       // first address since it is the allocation site itself that is recorded
330       // as an alloc site.
331       for (size_t J = 0; J < Frames.size(); J++) {
332         if (I == 0 && J == 0)
333           continue;
334         // We attach the entire bottom-up frame here for the callsite even
335         // though we only need the frames up to and including the frame for
336         // Frames[J].Function. This will enable better deduplication for
337         // compression in the future.
338         const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function;
339         PerFunctionCallSites[Guid].insert(&Frames);
340       }
341 
342       // Add all the frames to the current allocation callstack.
343       Callstack.append(Frames.begin(), Frames.end());
344     }
345 
346     // We attach the memprof record to each function bottom-up including the
347     // first non-inline frame.
348     for (size_t I = 0; /*Break out using the condition below*/; I++) {
349       const Frame &F = idToFrame(Callstack[I]);
350       auto Result =
351           FunctionProfileData.insert({F.Function, IndexedMemProfRecord()});
352       IndexedMemProfRecord &Record = Result.first->second;
353       Record.AllocSites.emplace_back(Callstack, Entry.second);
354 
355       if (!F.IsInlineFrame)
356         break;
357     }
358   }
359 
360   // Fill in the related callsites per function.
361   for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end();
362        I != E; I++) {
363     const GlobalValue::GUID Id = I->first;
364     // Some functions may have only callsite data and no allocation data. Here
365     // we insert a new entry for callsite data if we need to.
366     auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()});
367     IndexedMemProfRecord &Record = Result.first->second;
368     for (LocationPtr Loc : I->getSecond()) {
369       Record.CallSites.push_back(*Loc);
370     }
371   }
372 
373   return Error::success();
374 }
375 
376 Error RawMemProfReader::symbolizeAndFilterStackFrames() {
377   // The specifier to use when symbolization is requested.
378   const DILineInfoSpecifier Specifier(
379       DILineInfoSpecifier::FileLineInfoKind::RawValue,
380       DILineInfoSpecifier::FunctionNameKind::LinkageName);
381 
382   // For entries where all PCs in the callstack are discarded, we erase the
383   // entry from the stack map.
384   llvm::SmallVector<uint64_t> EntriesToErase;
385   // We keep track of all prior discarded entries so that we can avoid invoking
386   // the symbolizer for such entries.
387   llvm::DenseSet<uint64_t> AllVAddrsToDiscard;
388   for (auto &Entry : StackMap) {
389     for (const uint64_t VAddr : Entry.getSecond()) {
390       // Check if we have already symbolized and cached the result or if we
391       // don't want to attempt symbolization since we know this address is bad.
392       // In this case the address is also removed from the current callstack.
393       if (SymbolizedFrame.count(VAddr) > 0 ||
394           AllVAddrsToDiscard.contains(VAddr))
395         continue;
396 
397       Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
398           getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false);
399       if (!DIOr)
400         return DIOr.takeError();
401       DIInliningInfo DI = DIOr.get();
402 
403       // Drop frames which we can't symbolize or if they belong to the runtime.
404       if (DI.getFrame(0).FunctionName == DILineInfo::BadString ||
405           isRuntimePath(DI.getFrame(0).FileName)) {
406         AllVAddrsToDiscard.insert(VAddr);
407         continue;
408       }
409 
410       for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
411            I++) {
412         const auto &DIFrame = DI.getFrame(I);
413         const uint64_t Guid =
414             IndexedMemProfRecord::getGUID(DIFrame.FunctionName);
415         const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
416                       // Only the last entry is not an inlined location.
417                       I != NumFrames - 1);
418         // Here we retain a mapping from the GUID to symbol name instead of
419         // adding it to the frame object directly to reduce memory overhead.
420         // This is because there can be many unique frames, particularly for
421         // callsite frames.
422         if (KeepSymbolName)
423           GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
424 
425         const FrameId Hash = F.hash();
426         IdToFrame.insert({Hash, F});
427         SymbolizedFrame[VAddr].push_back(Hash);
428       }
429     }
430 
431     auto &CallStack = Entry.getSecond();
432     CallStack.erase(std::remove_if(CallStack.begin(), CallStack.end(),
433                                    [&AllVAddrsToDiscard](const uint64_t A) {
434                                      return AllVAddrsToDiscard.contains(A);
435                                    }),
436                     CallStack.end());
437     if (CallStack.empty())
438       EntriesToErase.push_back(Entry.getFirst());
439   }
440 
441   // Drop the entries where the callstack is empty.
442   for (const uint64_t Id : EntriesToErase) {
443     StackMap.erase(Id);
444     CallstackProfileData.erase(Id);
445   }
446 
447   if (StackMap.empty())
448     return make_error<InstrProfError>(
449         instrprof_error::malformed,
450         "no entries in callstack map after symbolization");
451 
452   return Error::success();
453 }
454 
455 Error RawMemProfReader::readRawProfile() {
456   const char *Next = DataBuffer->getBufferStart();
457 
458   while (Next < DataBuffer->getBufferEnd()) {
459     auto *Header = reinterpret_cast<const memprof::Header *>(Next);
460 
461     // Read in the segment information, check whether its the same across all
462     // profiles in this binary file.
463     const llvm::SmallVector<SegmentEntry> Entries =
464         readSegmentEntries(Next + Header->SegmentOffset);
465     if (!SegmentInfo.empty() && SegmentInfo != Entries) {
466       // We do not expect segment information to change when deserializing from
467       // the same binary profile file. This can happen if dynamic libraries are
468       // loaded/unloaded between profile dumping.
469       return make_error<InstrProfError>(
470           instrprof_error::malformed,
471           "memprof raw profile has different segment information");
472     }
473     SegmentInfo.assign(Entries.begin(), Entries.end());
474 
475     // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
476     // raw profiles in the same binary file are from the same process so the
477     // stackdepot ids are the same.
478     for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) {
479       if (CallstackProfileData.count(Value.first)) {
480         CallstackProfileData[Value.first].Merge(Value.second);
481       } else {
482         CallstackProfileData[Value.first] = Value.second;
483       }
484     }
485 
486     // Read in the callstack for each ids. For multiple raw profiles in the same
487     // file, we expect that the callstack is the same for a unique id.
488     const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
489     if (StackMap.empty()) {
490       StackMap = CSM;
491     } else {
492       if (mergeStackMap(CSM, StackMap))
493         return make_error<InstrProfError>(
494             instrprof_error::malformed,
495             "memprof raw profile got different call stack for same id");
496     }
497 
498     Next += Header->TotalSize;
499   }
500 
501   return Error::success();
502 }
503 
504 object::SectionedAddress
505 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
506   LLVM_DEBUG({
507   SegmentEntry *ContainingSegment = nullptr;
508   for (auto &SE : SegmentInfo) {
509     if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) {
510       ContainingSegment = &SE;
511     }
512   }
513 
514   // Ensure that the virtual address is valid.
515   assert(ContainingSegment && "Could not find a segment entry");
516   });
517 
518   // TODO: Compute the file offset based on the maps and program headers. For
519   // now this only works for non PIE binaries.
520   return object::SectionedAddress{VirtualAddress};
521 }
522 
523 Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
524   if (FunctionProfileData.empty())
525     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
526 
527   if (Iter == FunctionProfileData.end())
528     return make_error<InstrProfError>(instrprof_error::eof);
529 
530   auto IdToFrameCallback = [this](const FrameId Id) {
531     Frame F = this->idToFrame(Id);
532     if (!this->KeepSymbolName)
533       return F;
534     auto Iter = this->GuidToSymbolName.find(F.Function);
535     assert(Iter != this->GuidToSymbolName.end());
536     F.SymbolName = Iter->getSecond();
537     return F;
538   };
539 
540   const IndexedMemProfRecord &IndexedRecord = Iter->second;
541   GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)};
542   Iter++;
543   return Error::success();
544 }
545 } // namespace memprof
546 } // namespace llvm
547