1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading MemProf profiling data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <cstdint>
14 #include <type_traits>
15 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
20 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
21 #include "llvm/Object/Binary.h"
22 #include "llvm/Object/ELFObjectFile.h"
23 #include "llvm/Object/ObjectFile.h"
24 #include "llvm/ProfileData/InstrProf.h"
25 #include "llvm/ProfileData/MemProf.h"
26 #include "llvm/ProfileData/MemProfData.inc"
27 #include "llvm/ProfileData/RawMemProfReader.h"
28 #include "llvm/Support/MD5.h"
29 
30 namespace llvm {
31 namespace memprof {
32 namespace {
33 
34 struct Summary {
35   uint64_t Version;
36   uint64_t TotalSizeBytes;
37   uint64_t NumSegments;
38   uint64_t NumMIBInfo;
39   uint64_t NumStackOffsets;
40 };
41 
42 template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
43   static_assert(std::is_pod<T>::value, "Not a pod type.");
44   assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
45   return *reinterpret_cast<const T *>(Ptr);
46 }
47 
48 Summary computeSummary(const char *Start) {
49   auto *H = reinterpret_cast<const Header *>(Start);
50 
51   // Check alignment while reading the number of items in each section.
52   return Summary{
53       H->Version,
54       H->TotalSize,
55       alignedRead(Start + H->SegmentOffset),
56       alignedRead(Start + H->MIBOffset),
57       alignedRead(Start + H->StackOffset),
58   };
59 }
60 
61 Error checkBuffer(const MemoryBuffer &Buffer) {
62   if (!RawMemProfReader::hasFormat(Buffer))
63     return make_error<InstrProfError>(instrprof_error::bad_magic);
64 
65   if (Buffer.getBufferSize() == 0)
66     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
67 
68   if (Buffer.getBufferSize() < sizeof(Header)) {
69     return make_error<InstrProfError>(instrprof_error::truncated);
70   }
71 
72   // The size of the buffer can be > header total size since we allow repeated
73   // serialization of memprof profiles to the same file.
74   uint64_t TotalSize = 0;
75   const char *Next = Buffer.getBufferStart();
76   while (Next < Buffer.getBufferEnd()) {
77     auto *H = reinterpret_cast<const Header *>(Next);
78     if (H->Version != MEMPROF_RAW_VERSION) {
79       return make_error<InstrProfError>(instrprof_error::unsupported_version);
80     }
81 
82     TotalSize += H->TotalSize;
83     Next += H->TotalSize;
84   }
85 
86   if (Buffer.getBufferSize() != TotalSize) {
87     return make_error<InstrProfError>(instrprof_error::malformed);
88   }
89   return Error::success();
90 }
91 
92 // A generic method to read binary data for type T where the first 8b indicate
93 // the number of elements of type T to be read.
94 template <typename T> llvm::SmallVector<T, 16> readInfo(const char *Begin) {
95   const uint64_t NumItemsToRead = *reinterpret_cast<const uint64_t *>(Begin);
96   const char *Ptr = Begin + sizeof(uint64_t);
97   llvm::SmallVector<T, 16> Items;
98   for (uint64_t I = 0; I < NumItemsToRead; I++) {
99     Items.emplace_back(*reinterpret_cast<const T *>(Ptr + I * sizeof(T)));
100   }
101   return Items;
102 }
103 
104 CallStackMap readStackInfo(const char *Begin) {
105   const uint64_t NumItemsToRead = *reinterpret_cast<const uint64_t *>(Begin);
106   char *Ptr = const_cast<char *>(Begin) + sizeof(uint64_t);
107   CallStackMap Items;
108 
109   uint64_t Count = 0;
110   do {
111     const uint64_t StackId = alignedRead(Ptr);
112     Ptr += sizeof(uint64_t);
113 
114     const uint64_t NumPCs = alignedRead(Ptr);
115     Ptr += sizeof(uint64_t);
116 
117     SmallVector<uint64_t, 32> CallStack;
118     for (uint64_t I = 0; I < NumPCs; I++) {
119       CallStack.push_back(alignedRead(Ptr));
120       Ptr += sizeof(uint64_t);
121     }
122 
123     Items[StackId] = CallStack;
124   } while (++Count < NumItemsToRead);
125   return Items;
126 }
127 
128 // Merges the contents of stack information in \p From to \p To. Returns true if
129 // any stack ids observed previously map to a different set of program counter
130 // addresses.
131 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
132   for (const auto &IdStack : From) {
133     auto I = To.find(IdStack.first);
134     if (I == To.end()) {
135       To[IdStack.first] = IdStack.second;
136     } else {
137       // Check that the PCs are the same (in order).
138       if (IdStack.second != I->second)
139         return true;
140     }
141   }
142   return false;
143 }
144 
145 StringRef trimSuffix(const StringRef Name) {
146   const auto Pos = Name.find(".llvm.");
147   return Name.take_front(Pos);
148 }
149 
150 Error report(Error E, const StringRef Context) {
151   return joinErrors(createStringError(inconvertibleErrorCode(), Context),
152                     std::move(E));
153 }
154 } // namespace
155 
156 Expected<std::unique_ptr<RawMemProfReader>>
157 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) {
158   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
159   if (std::error_code EC = BufferOr.getError())
160     return report(errorCodeToError(EC), Path.getSingleStringRef());
161 
162   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
163   if (Error E = checkBuffer(*Buffer))
164     return report(std::move(E), Path.getSingleStringRef());
165 
166   if (ProfiledBinary.empty())
167     return report(
168         errorCodeToError(make_error_code(std::errc::invalid_argument)),
169         "Path to profiled binary is empty!");
170 
171   auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
172   if (!BinaryOr) {
173     return report(BinaryOr.takeError(), ProfiledBinary);
174   }
175 
176   std::unique_ptr<RawMemProfReader> Reader(
177       new RawMemProfReader(std::move(Buffer), std::move(BinaryOr.get())));
178   if (Error E = Reader->initialize()) {
179     return std::move(E);
180   }
181   return std::move(Reader);
182 }
183 
184 bool RawMemProfReader::hasFormat(const StringRef Path) {
185   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
186   if (!BufferOr)
187     return false;
188 
189   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
190   return hasFormat(*Buffer);
191 }
192 
193 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
194   if (Buffer.getBufferSize() < sizeof(uint64_t))
195     return false;
196   // Aligned read to sanity check that the buffer was allocated with at least 8b
197   // alignment.
198   const uint64_t Magic = alignedRead(Buffer.getBufferStart());
199   return Magic == MEMPROF_RAW_MAGIC_64;
200 }
201 
202 void RawMemProfReader::printYAML(raw_ostream &OS) {
203   OS << "MemprofProfile:\n";
204   printSummaries(OS);
205   // Print out the merged contents of the profiles.
206   OS << "  Records:\n";
207   for (const auto &Record : *this) {
208     OS << "  -\n";
209     Record.print(OS);
210   }
211 }
212 
213 void RawMemProfReader::printSummaries(raw_ostream &OS) const {
214   const char *Next = DataBuffer->getBufferStart();
215   while (Next < DataBuffer->getBufferEnd()) {
216     auto Summary = computeSummary(Next);
217     OS << "  -\n";
218     OS << "  Header:\n";
219     OS << "    Version: " << Summary.Version << "\n";
220     OS << "    TotalSizeBytes: " << Summary.TotalSizeBytes << "\n";
221     OS << "    NumSegments: " << Summary.NumSegments << "\n";
222     OS << "    NumMibInfo: " << Summary.NumMIBInfo << "\n";
223     OS << "    NumStackOffsets: " << Summary.NumStackOffsets << "\n";
224     // TODO: Print the build ids once we can record them using the
225     // sanitizer_procmaps library for linux.
226 
227     auto *H = reinterpret_cast<const Header *>(Next);
228     Next += H->TotalSize;
229   }
230 }
231 
232 Error RawMemProfReader::initialize() {
233   const StringRef FileName = Binary.getBinary()->getFileName();
234 
235   auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
236   if (!ElfObject) {
237     return report(make_error<StringError>(Twine("Not an ELF file: "),
238                                           inconvertibleErrorCode()),
239                   FileName);
240   }
241 
242   auto Triple = ElfObject->makeTriple();
243   if (!Triple.isX86())
244     return report(make_error<StringError>(Twine("Unsupported target: ") +
245                                               Triple.getArchName(),
246                                           inconvertibleErrorCode()),
247                   FileName);
248 
249   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
250   std::unique_ptr<DIContext> Context = DWARFContext::create(
251       *Object, DWARFContext::ProcessDebugRelocations::Process);
252 
253   auto SOFOr = symbolize::SymbolizableObjectFile::create(
254       Object, std::move(Context), /*UntagAddresses=*/false);
255   if (!SOFOr)
256     return report(SOFOr.takeError(), FileName);
257   Symbolizer = std::move(SOFOr.get());
258 
259   return readRawProfile();
260 }
261 
262 Error RawMemProfReader::readRawProfile() {
263   const char *Next = DataBuffer->getBufferStart();
264 
265   while (Next < DataBuffer->getBufferEnd()) {
266     auto *Header = reinterpret_cast<const memprof::Header *>(Next);
267 
268     // Read in the segment information, check whether its the same across all
269     // profiles in this binary file.
270     if (SegmentInfo.empty()) {
271       SegmentInfo = readInfo<SegmentEntry>(Next + Header->SegmentOffset);
272     } else {
273       auto Info = readInfo<SegmentEntry>(Next + Header->SegmentOffset);
274       // We do not expect segment information to change when deserializing from
275       // the same binary profile file. This can happen if dynamic libraries are
276       // loaded/unloaded between profile dumping.
277       if (SegmentInfo != Info) {
278         return make_error<InstrProfError>(instrprof_error::malformed);
279       }
280     }
281 
282     // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
283     // raw profiles in the same binary file are from the same process so the
284     // stackdepot ids are the same.
285     PACKED(struct IDAndMIB {
286       uint64_t Id;
287       MemInfoBlock MIB;
288     });
289     for (const auto &Value : readInfo<IDAndMIB>(Next + Header->MIBOffset)) {
290       if (ProfileData.count(Value.Id)) {
291         ProfileData[Value.Id].Merge(Value.MIB);
292       } else {
293         ProfileData[Value.Id] = Value.MIB;
294       }
295     }
296 
297     // Read in the callstack for each ids. For multiple raw profiles in the same
298     // file, we expect that the callstack is the same for a unique id.
299     const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
300     if (StackMap.empty()) {
301       StackMap = CSM;
302     } else {
303       if (mergeStackMap(CSM, StackMap))
304         return make_error<InstrProfError>(instrprof_error::malformed);
305     }
306 
307     Next += Header->TotalSize;
308   }
309 
310   return Error::success();
311 }
312 
313 object::SectionedAddress
314 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
315   SegmentEntry *ContainingSegment = nullptr;
316   for (auto &SE : SegmentInfo) {
317     if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) {
318       ContainingSegment = &SE;
319     }
320   }
321 
322   // Ensure that the virtual address is valid.
323   assert(ContainingSegment && "Could not find a segment entry");
324 
325   // TODO: Compute the file offset based on the maps and program headers. For
326   // now this only works for non PIE binaries.
327   return object::SectionedAddress{VirtualAddress};
328 }
329 
330 Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB,
331                                    MemProfRecord &Record) {
332   auto &CallStack = StackMap[Id];
333   DILineInfoSpecifier Specifier(
334       DILineInfoSpecifier::FileLineInfoKind::RawValue,
335       DILineInfoSpecifier::FunctionNameKind::LinkageName);
336   for (const uint64_t Address : CallStack) {
337     Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
338         getModuleOffset(Address), Specifier, /*UseSymbolTable=*/false);
339 
340     if (!DIOr)
341       return DIOr.takeError();
342     DIInliningInfo DI = DIOr.get();
343 
344     for (size_t I = 0; I < DI.getNumberOfFrames(); I++) {
345       const auto &Frame = DI.getFrame(I);
346       Record.CallStack.emplace_back(
347           std::to_string(llvm::MD5Hash(trimSuffix(Frame.FunctionName))),
348           Frame.Line - Frame.StartLine, Frame.Column,
349           // Only the first entry is not an inlined location.
350           I != 0);
351     }
352   }
353   Record.Info = MIB;
354   return Error::success();
355 }
356 
357 Error RawMemProfReader::readNextRecord(MemProfRecord &Record) {
358   if (ProfileData.empty())
359     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
360 
361   if (Iter == ProfileData.end())
362     return make_error<InstrProfError>(instrprof_error::eof);
363 
364   Record.clear();
365   if (Error E = fillRecord(Iter->first, Iter->second, Record)) {
366     return E;
367   }
368   Iter++;
369   return Error::success();
370 }
371 
372 } // namespace memprof
373 } // namespace llvm
374