1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading MemProf profiling data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <algorithm>
14 #include <cstdint>
15 #include <memory>
16 #include <type_traits>
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
22 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
23 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
24 #include "llvm/Object/Binary.h"
25 #include "llvm/Object/ELFObjectFile.h"
26 #include "llvm/Object/ObjectFile.h"
27 #include "llvm/ProfileData/InstrProf.h"
28 #include "llvm/ProfileData/MemProf.h"
29 #include "llvm/ProfileData/MemProfData.inc"
30 #include "llvm/ProfileData/RawMemProfReader.h"
31 #include "llvm/Support/Endian.h"
32 #include "llvm/Support/Path.h"
33 
34 #define DEBUG_TYPE "memprof"
35 
36 namespace llvm {
37 namespace memprof {
38 namespace {
39 template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
40   static_assert(std::is_pod<T>::value, "Not a pod type.");
41   assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
42   return *reinterpret_cast<const T *>(Ptr);
43 }
44 
45 Error checkBuffer(const MemoryBuffer &Buffer) {
46   if (!RawMemProfReader::hasFormat(Buffer))
47     return make_error<InstrProfError>(instrprof_error::bad_magic);
48 
49   if (Buffer.getBufferSize() == 0)
50     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
51 
52   if (Buffer.getBufferSize() < sizeof(Header)) {
53     return make_error<InstrProfError>(instrprof_error::truncated);
54   }
55 
56   // The size of the buffer can be > header total size since we allow repeated
57   // serialization of memprof profiles to the same file.
58   uint64_t TotalSize = 0;
59   const char *Next = Buffer.getBufferStart();
60   while (Next < Buffer.getBufferEnd()) {
61     auto *H = reinterpret_cast<const Header *>(Next);
62     if (H->Version != MEMPROF_RAW_VERSION) {
63       return make_error<InstrProfError>(instrprof_error::unsupported_version);
64     }
65 
66     TotalSize += H->TotalSize;
67     Next += H->TotalSize;
68   }
69 
70   if (Buffer.getBufferSize() != TotalSize) {
71     return make_error<InstrProfError>(instrprof_error::malformed);
72   }
73   return Error::success();
74 }
75 
76 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
77   using namespace support;
78 
79   const uint64_t NumItemsToRead =
80       endian::readNext<uint64_t, little, unaligned>(Ptr);
81   llvm::SmallVector<SegmentEntry> Items;
82   for (uint64_t I = 0; I < NumItemsToRead; I++) {
83     Items.push_back(*reinterpret_cast<const SegmentEntry *>(
84         Ptr + I * sizeof(SegmentEntry)));
85   }
86   return Items;
87 }
88 
89 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
90 readMemInfoBlocks(const char *Ptr) {
91   using namespace support;
92 
93   const uint64_t NumItemsToRead =
94       endian::readNext<uint64_t, little, unaligned>(Ptr);
95   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
96   for (uint64_t I = 0; I < NumItemsToRead; I++) {
97     const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr);
98     const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
99     Items.push_back({Id, MIB});
100     // Only increment by size of MIB since readNext implicitly increments.
101     Ptr += sizeof(MemInfoBlock);
102   }
103   return Items;
104 }
105 
106 CallStackMap readStackInfo(const char *Ptr) {
107   using namespace support;
108 
109   const uint64_t NumItemsToRead =
110       endian::readNext<uint64_t, little, unaligned>(Ptr);
111   CallStackMap Items;
112 
113   for (uint64_t I = 0; I < NumItemsToRead; I++) {
114     const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr);
115     const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr);
116 
117     SmallVector<uint64_t> CallStack;
118     for (uint64_t J = 0; J < NumPCs; J++) {
119       CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr));
120     }
121 
122     Items[StackId] = CallStack;
123   }
124   return Items;
125 }
126 
127 // Merges the contents of stack information in \p From to \p To. Returns true if
128 // any stack ids observed previously map to a different set of program counter
129 // addresses.
130 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
131   for (const auto &IdStack : From) {
132     auto I = To.find(IdStack.first);
133     if (I == To.end()) {
134       To[IdStack.first] = IdStack.second;
135     } else {
136       // Check that the PCs are the same (in order).
137       if (IdStack.second != I->second)
138         return true;
139     }
140   }
141   return false;
142 }
143 
144 Error report(Error E, const StringRef Context) {
145   return joinErrors(createStringError(inconvertibleErrorCode(), Context),
146                     std::move(E));
147 }
148 
149 bool isRuntimePath(const StringRef Path) {
150   return StringRef(llvm::sys::path::convert_to_slash(Path))
151       .contains("memprof/memprof_");
152 }
153 } // namespace
154 
155 Expected<std::unique_ptr<RawMemProfReader>>
156 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
157                          bool KeepName) {
158   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
159   if (std::error_code EC = BufferOr.getError())
160     return report(errorCodeToError(EC), Path.getSingleStringRef());
161 
162   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
163   if (Error E = checkBuffer(*Buffer))
164     return report(std::move(E), Path.getSingleStringRef());
165 
166   if (ProfiledBinary.empty())
167     return report(
168         errorCodeToError(make_error_code(std::errc::invalid_argument)),
169         "Path to profiled binary is empty!");
170 
171   auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
172   if (!BinaryOr) {
173     return report(BinaryOr.takeError(), ProfiledBinary);
174   }
175 
176   // Use new here since constructor is private.
177   std::unique_ptr<RawMemProfReader> Reader(
178       new RawMemProfReader(std::move(BinaryOr.get()), KeepName));
179   if (Error E = Reader->initialize(std::move(Buffer))) {
180     return std::move(E);
181   }
182   return std::move(Reader);
183 }
184 
185 bool RawMemProfReader::hasFormat(const StringRef Path) {
186   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
187   if (!BufferOr)
188     return false;
189 
190   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
191   return hasFormat(*Buffer);
192 }
193 
194 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
195   if (Buffer.getBufferSize() < sizeof(uint64_t))
196     return false;
197   // Aligned read to sanity check that the buffer was allocated with at least 8b
198   // alignment.
199   const uint64_t Magic = alignedRead(Buffer.getBufferStart());
200   return Magic == MEMPROF_RAW_MAGIC_64;
201 }
202 
203 void RawMemProfReader::printYAML(raw_ostream &OS) {
204   uint64_t NumAllocFunctions = 0, NumMibInfo = 0;
205   for (const auto &KV : FunctionProfileData) {
206     const size_t NumAllocSites = KV.second.AllocSites.size();
207     if (NumAllocSites > 0) {
208       NumAllocFunctions++;
209       NumMibInfo += NumAllocSites;
210     }
211   }
212 
213   OS << "MemprofProfile:\n";
214   OS << "  Summary:\n";
215   OS << "    Version: " << MEMPROF_RAW_VERSION << "\n";
216   OS << "    NumSegments: " << SegmentInfo.size() << "\n";
217   OS << "    NumMibInfo: " << NumMibInfo << "\n";
218   OS << "    NumAllocFunctions: " << NumAllocFunctions << "\n";
219   OS << "    NumStackOffsets: " << StackMap.size() << "\n";
220   // Print out the merged contents of the profiles.
221   OS << "  Records:\n";
222   for (const auto &Entry : *this) {
223     OS << "  -\n";
224     OS << "    FunctionGUID: " << Entry.first << "\n";
225     Entry.second.print(OS);
226   }
227 }
228 
229 Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
230   const StringRef FileName = Binary.getBinary()->getFileName();
231 
232   auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
233   if (!ElfObject) {
234     return report(make_error<StringError>(Twine("Not an ELF file: "),
235                                           inconvertibleErrorCode()),
236                   FileName);
237   }
238 
239   auto Triple = ElfObject->makeTriple();
240   if (!Triple.isX86())
241     return report(make_error<StringError>(Twine("Unsupported target: ") +
242                                               Triple.getArchName(),
243                                           inconvertibleErrorCode()),
244                   FileName);
245 
246   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
247   std::unique_ptr<DIContext> Context = DWARFContext::create(
248       *Object, DWARFContext::ProcessDebugRelocations::Process);
249 
250   auto SOFOr = symbolize::SymbolizableObjectFile::create(
251       Object, std::move(Context), /*UntagAddresses=*/false);
252   if (!SOFOr)
253     return report(SOFOr.takeError(), FileName);
254   Symbolizer = std::move(SOFOr.get());
255 
256   if (Error E = readRawProfile(std::move(DataBuffer)))
257     return E;
258 
259   if (Error E = symbolizeAndFilterStackFrames())
260     return E;
261 
262   return mapRawProfileToRecords();
263 }
264 
265 Error RawMemProfReader::mapRawProfileToRecords() {
266   // Hold a mapping from function to each callsite location we encounter within
267   // it that is part of some dynamic allocation context. The location is stored
268   // as a pointer to a symbolized list of inline frames.
269   using LocationPtr = const llvm::SmallVector<FrameId> *;
270   llvm::DenseMap<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
271       PerFunctionCallSites;
272 
273   // Convert the raw profile callstack data into memprof records. While doing so
274   // keep track of related contexts so that we can fill these in later.
275   for (const auto &Entry : CallstackProfileData) {
276     const uint64_t StackId = Entry.first;
277 
278     auto It = StackMap.find(StackId);
279     if (It == StackMap.end())
280       return make_error<InstrProfError>(
281           instrprof_error::malformed,
282           "memprof callstack record does not contain id: " + Twine(StackId));
283 
284     // Construct the symbolized callstack.
285     llvm::SmallVector<FrameId> Callstack;
286     Callstack.reserve(It->getSecond().size());
287 
288     llvm::ArrayRef<uint64_t> Addresses = It->getSecond();
289     for (size_t I = 0; I < Addresses.size(); I++) {
290       const uint64_t Address = Addresses[I];
291       assert(SymbolizedFrame.count(Address) > 0 &&
292              "Address not found in SymbolizedFrame map");
293       const SmallVector<FrameId> &Frames = SymbolizedFrame[Address];
294 
295       assert(!idToFrame(Frames.back()).IsInlineFrame &&
296              "The last frame should not be inlined");
297 
298       // Record the callsites for each function. Skip the first frame of the
299       // first address since it is the allocation site itself that is recorded
300       // as an alloc site.
301       for (size_t J = 0; J < Frames.size(); J++) {
302         if (I == 0 && J == 0)
303           continue;
304         // We attach the entire bottom-up frame here for the callsite even
305         // though we only need the frames up to and including the frame for
306         // Frames[J].Function. This will enable better deduplication for
307         // compression in the future.
308         const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function;
309         PerFunctionCallSites[Guid].insert(&Frames);
310       }
311 
312       // Add all the frames to the current allocation callstack.
313       Callstack.append(Frames.begin(), Frames.end());
314     }
315 
316     // We attach the memprof record to each function bottom-up including the
317     // first non-inline frame.
318     for (size_t I = 0; /*Break out using the condition below*/; I++) {
319       const Frame &F = idToFrame(Callstack[I]);
320       auto Result =
321           FunctionProfileData.insert({F.Function, IndexedMemProfRecord()});
322       IndexedMemProfRecord &Record = Result.first->second;
323       Record.AllocSites.emplace_back(Callstack, Entry.second);
324 
325       if (!F.IsInlineFrame)
326         break;
327     }
328   }
329 
330   // Fill in the related callsites per function.
331   for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end();
332        I != E; I++) {
333     const GlobalValue::GUID Id = I->first;
334     // Some functions may have only callsite data and no allocation data. Here
335     // we insert a new entry for callsite data if we need to.
336     auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()});
337     IndexedMemProfRecord &Record = Result.first->second;
338     for (LocationPtr Loc : I->getSecond()) {
339       Record.CallSites.push_back(*Loc);
340     }
341   }
342 
343   return Error::success();
344 }
345 
346 Error RawMemProfReader::symbolizeAndFilterStackFrames() {
347   // The specifier to use when symbolization is requested.
348   const DILineInfoSpecifier Specifier(
349       DILineInfoSpecifier::FileLineInfoKind::RawValue,
350       DILineInfoSpecifier::FunctionNameKind::LinkageName);
351 
352   // For entries where all PCs in the callstack are discarded, we erase the
353   // entry from the stack map.
354   llvm::SmallVector<uint64_t> EntriesToErase;
355   // We keep track of all prior discarded entries so that we can avoid invoking
356   // the symbolizer for such entries.
357   llvm::DenseSet<uint64_t> AllVAddrsToDiscard;
358   for (auto &Entry : StackMap) {
359     for (const uint64_t VAddr : Entry.getSecond()) {
360       // Check if we have already symbolized and cached the result or if we
361       // don't want to attempt symbolization since we know this address is bad.
362       // In this case the address is also removed from the current callstack.
363       if (SymbolizedFrame.count(VAddr) > 0 ||
364           AllVAddrsToDiscard.contains(VAddr))
365         continue;
366 
367       Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
368           getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false);
369       if (!DIOr)
370         return DIOr.takeError();
371       DIInliningInfo DI = DIOr.get();
372 
373       // Drop frames which we can't symbolize or if they belong to the runtime.
374       if (DI.getFrame(0).FunctionName == DILineInfo::BadString ||
375           isRuntimePath(DI.getFrame(0).FileName)) {
376         AllVAddrsToDiscard.insert(VAddr);
377         continue;
378       }
379 
380       for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
381            I++) {
382         const auto &DIFrame = DI.getFrame(I);
383         const uint64_t Guid =
384             IndexedMemProfRecord::getGUID(DIFrame.FunctionName);
385         const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
386                       // Only the last entry is not an inlined location.
387                       I != NumFrames - 1);
388         // Here we retain a mapping from the GUID to symbol name instead of
389         // adding it to the frame object directly to reduce memory overhead.
390         // This is because there can be many unique frames, particularly for
391         // callsite frames.
392         if (KeepSymbolName)
393           GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
394 
395         const FrameId Hash = F.hash();
396         IdToFrame.insert({Hash, F});
397         SymbolizedFrame[VAddr].push_back(Hash);
398       }
399     }
400 
401     auto &CallStack = Entry.getSecond();
402     CallStack.erase(std::remove_if(CallStack.begin(), CallStack.end(),
403                                    [&AllVAddrsToDiscard](const uint64_t A) {
404                                      return AllVAddrsToDiscard.contains(A);
405                                    }),
406                     CallStack.end());
407     if (CallStack.empty())
408       EntriesToErase.push_back(Entry.getFirst());
409   }
410 
411   // Drop the entries where the callstack is empty.
412   for (const uint64_t Id : EntriesToErase) {
413     StackMap.erase(Id);
414     CallstackProfileData.erase(Id);
415   }
416 
417   if (StackMap.empty())
418     return make_error<InstrProfError>(
419         instrprof_error::malformed,
420         "no entries in callstack map after symbolization");
421 
422   return Error::success();
423 }
424 
425 Error RawMemProfReader::readRawProfile(
426     std::unique_ptr<MemoryBuffer> DataBuffer) {
427   const char *Next = DataBuffer->getBufferStart();
428 
429   while (Next < DataBuffer->getBufferEnd()) {
430     auto *Header = reinterpret_cast<const memprof::Header *>(Next);
431 
432     // Read in the segment information, check whether its the same across all
433     // profiles in this binary file.
434     const llvm::SmallVector<SegmentEntry> Entries =
435         readSegmentEntries(Next + Header->SegmentOffset);
436     if (!SegmentInfo.empty() && SegmentInfo != Entries) {
437       // We do not expect segment information to change when deserializing from
438       // the same binary profile file. This can happen if dynamic libraries are
439       // loaded/unloaded between profile dumping.
440       return make_error<InstrProfError>(
441           instrprof_error::malformed,
442           "memprof raw profile has different segment information");
443     }
444     SegmentInfo.assign(Entries.begin(), Entries.end());
445 
446     // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
447     // raw profiles in the same binary file are from the same process so the
448     // stackdepot ids are the same.
449     for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) {
450       if (CallstackProfileData.count(Value.first)) {
451         CallstackProfileData[Value.first].Merge(Value.second);
452       } else {
453         CallstackProfileData[Value.first] = Value.second;
454       }
455     }
456 
457     // Read in the callstack for each ids. For multiple raw profiles in the same
458     // file, we expect that the callstack is the same for a unique id.
459     const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
460     if (StackMap.empty()) {
461       StackMap = CSM;
462     } else {
463       if (mergeStackMap(CSM, StackMap))
464         return make_error<InstrProfError>(
465             instrprof_error::malformed,
466             "memprof raw profile got different call stack for same id");
467     }
468 
469     Next += Header->TotalSize;
470   }
471 
472   return Error::success();
473 }
474 
475 object::SectionedAddress
476 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
477   LLVM_DEBUG({
478   SegmentEntry *ContainingSegment = nullptr;
479   for (auto &SE : SegmentInfo) {
480     if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) {
481       ContainingSegment = &SE;
482     }
483   }
484 
485   // Ensure that the virtual address is valid.
486   assert(ContainingSegment && "Could not find a segment entry");
487   });
488 
489   // TODO: Compute the file offset based on the maps and program headers. For
490   // now this only works for non PIE binaries.
491   return object::SectionedAddress{VirtualAddress};
492 }
493 
494 Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
495   if (FunctionProfileData.empty())
496     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
497 
498   if (Iter == FunctionProfileData.end())
499     return make_error<InstrProfError>(instrprof_error::eof);
500 
501   auto IdToFrameCallback = [this](const FrameId Id) {
502     Frame F = this->idToFrame(Id);
503     if (!this->KeepSymbolName)
504       return F;
505     auto Iter = this->GuidToSymbolName.find(F.Function);
506     assert(Iter != this->GuidToSymbolName.end());
507     F.SymbolName = Iter->getSecond();
508     return F;
509   };
510 
511   const IndexedMemProfRecord &IndexedRecord = Iter->second;
512   GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)};
513   Iter++;
514   return Error::success();
515 }
516 } // namespace memprof
517 } // namespace llvm
518