1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading MemProf profiling data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <cstdint>
14 #include <type_traits>
15 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
20 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
21 #include "llvm/Object/Binary.h"
22 #include "llvm/Object/ELFObjectFile.h"
23 #include "llvm/Object/ObjectFile.h"
24 #include "llvm/ProfileData/InstrProf.h"
25 #include "llvm/ProfileData/MemProf.h"
26 #include "llvm/ProfileData/MemProfData.inc"
27 #include "llvm/ProfileData/RawMemProfReader.h"
28 #include "llvm/Support/Endian.h"
29 #include "llvm/Support/MD5.h"
30 
31 namespace llvm {
32 namespace memprof {
33 namespace {
34 
35 struct Summary {
36   uint64_t Version;
37   uint64_t TotalSizeBytes;
38   uint64_t NumSegments;
39   uint64_t NumMIBInfo;
40   uint64_t NumStackOffsets;
41 };
42 
43 template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
44   static_assert(std::is_pod<T>::value, "Not a pod type.");
45   assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
46   return *reinterpret_cast<const T *>(Ptr);
47 }
48 
49 Summary computeSummary(const char *Start) {
50   auto *H = reinterpret_cast<const Header *>(Start);
51 
52   // Check alignment while reading the number of items in each section.
53   return Summary{
54       H->Version,
55       H->TotalSize,
56       alignedRead(Start + H->SegmentOffset),
57       alignedRead(Start + H->MIBOffset),
58       alignedRead(Start + H->StackOffset),
59   };
60 }
61 
62 Error checkBuffer(const MemoryBuffer &Buffer) {
63   if (!RawMemProfReader::hasFormat(Buffer))
64     return make_error<InstrProfError>(instrprof_error::bad_magic);
65 
66   if (Buffer.getBufferSize() == 0)
67     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
68 
69   if (Buffer.getBufferSize() < sizeof(Header)) {
70     return make_error<InstrProfError>(instrprof_error::truncated);
71   }
72 
73   // The size of the buffer can be > header total size since we allow repeated
74   // serialization of memprof profiles to the same file.
75   uint64_t TotalSize = 0;
76   const char *Next = Buffer.getBufferStart();
77   while (Next < Buffer.getBufferEnd()) {
78     auto *H = reinterpret_cast<const Header *>(Next);
79     if (H->Version != MEMPROF_RAW_VERSION) {
80       return make_error<InstrProfError>(instrprof_error::unsupported_version);
81     }
82 
83     TotalSize += H->TotalSize;
84     Next += H->TotalSize;
85   }
86 
87   if (Buffer.getBufferSize() != TotalSize) {
88     return make_error<InstrProfError>(instrprof_error::malformed);
89   }
90   return Error::success();
91 }
92 
93 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
94   using namespace support;
95 
96   const uint64_t NumItemsToRead =
97       endian::readNext<uint64_t, little, unaligned>(Ptr);
98   llvm::SmallVector<SegmentEntry> Items;
99   for (uint64_t I = 0; I < NumItemsToRead; I++) {
100     Items.push_back(*reinterpret_cast<const SegmentEntry *>(
101         Ptr + I * sizeof(SegmentEntry)));
102   }
103   return Items;
104 }
105 
106 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
107 readMemInfoBlocks(const char *Ptr) {
108   using namespace support;
109 
110   const uint64_t NumItemsToRead =
111       endian::readNext<uint64_t, little, unaligned>(Ptr);
112   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
113   for (uint64_t I = 0; I < NumItemsToRead; I++) {
114     const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr);
115     const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
116     Items.push_back({Id, MIB});
117     // Only increment by size of MIB since readNext implicitly increments.
118     Ptr += sizeof(MemInfoBlock);
119   }
120   return Items;
121 }
122 
123 CallStackMap readStackInfo(const char *Ptr) {
124   using namespace support;
125 
126   const uint64_t NumItemsToRead =
127       endian::readNext<uint64_t, little, unaligned>(Ptr);
128   CallStackMap Items;
129 
130   for (uint64_t I = 0; I < NumItemsToRead; I++) {
131     const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr);
132     const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr);
133 
134     SmallVector<uint64_t, 32> CallStack;
135     for (uint64_t J = 0; J < NumPCs; J++) {
136       CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr));
137     }
138 
139     Items[StackId] = CallStack;
140   }
141   return Items;
142 }
143 
144 // Merges the contents of stack information in \p From to \p To. Returns true if
145 // any stack ids observed previously map to a different set of program counter
146 // addresses.
147 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
148   for (const auto &IdStack : From) {
149     auto I = To.find(IdStack.first);
150     if (I == To.end()) {
151       To[IdStack.first] = IdStack.second;
152     } else {
153       // Check that the PCs are the same (in order).
154       if (IdStack.second != I->second)
155         return true;
156     }
157   }
158   return false;
159 }
160 
161 StringRef trimSuffix(const StringRef Name) {
162   const auto Pos = Name.find(".llvm.");
163   return Name.take_front(Pos);
164 }
165 
166 Error report(Error E, const StringRef Context) {
167   return joinErrors(createStringError(inconvertibleErrorCode(), Context),
168                     std::move(E));
169 }
170 } // namespace
171 
172 Expected<std::unique_ptr<RawMemProfReader>>
173 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) {
174   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
175   if (std::error_code EC = BufferOr.getError())
176     return report(errorCodeToError(EC), Path.getSingleStringRef());
177 
178   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
179   if (Error E = checkBuffer(*Buffer))
180     return report(std::move(E), Path.getSingleStringRef());
181 
182   if (ProfiledBinary.empty())
183     return report(
184         errorCodeToError(make_error_code(std::errc::invalid_argument)),
185         "Path to profiled binary is empty!");
186 
187   auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
188   if (!BinaryOr) {
189     return report(BinaryOr.takeError(), ProfiledBinary);
190   }
191 
192   std::unique_ptr<RawMemProfReader> Reader(
193       new RawMemProfReader(std::move(Buffer), std::move(BinaryOr.get())));
194   if (Error E = Reader->initialize()) {
195     return std::move(E);
196   }
197   return std::move(Reader);
198 }
199 
200 bool RawMemProfReader::hasFormat(const StringRef Path) {
201   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
202   if (!BufferOr)
203     return false;
204 
205   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
206   return hasFormat(*Buffer);
207 }
208 
209 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
210   if (Buffer.getBufferSize() < sizeof(uint64_t))
211     return false;
212   // Aligned read to sanity check that the buffer was allocated with at least 8b
213   // alignment.
214   const uint64_t Magic = alignedRead(Buffer.getBufferStart());
215   return Magic == MEMPROF_RAW_MAGIC_64;
216 }
217 
218 void RawMemProfReader::printYAML(raw_ostream &OS) {
219   OS << "MemprofProfile:\n";
220   printSummaries(OS);
221   // Print out the merged contents of the profiles.
222   OS << "  Records:\n";
223   for (const auto &Record : *this) {
224     OS << "  -\n";
225     Record.print(OS);
226   }
227 }
228 
229 void RawMemProfReader::printSummaries(raw_ostream &OS) const {
230   const char *Next = DataBuffer->getBufferStart();
231   while (Next < DataBuffer->getBufferEnd()) {
232     auto Summary = computeSummary(Next);
233     OS << "  -\n";
234     OS << "  Header:\n";
235     OS << "    Version: " << Summary.Version << "\n";
236     OS << "    TotalSizeBytes: " << Summary.TotalSizeBytes << "\n";
237     OS << "    NumSegments: " << Summary.NumSegments << "\n";
238     OS << "    NumMibInfo: " << Summary.NumMIBInfo << "\n";
239     OS << "    NumStackOffsets: " << Summary.NumStackOffsets << "\n";
240     // TODO: Print the build ids once we can record them using the
241     // sanitizer_procmaps library for linux.
242 
243     auto *H = reinterpret_cast<const Header *>(Next);
244     Next += H->TotalSize;
245   }
246 }
247 
248 Error RawMemProfReader::initialize() {
249   const StringRef FileName = Binary.getBinary()->getFileName();
250 
251   auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
252   if (!ElfObject) {
253     return report(make_error<StringError>(Twine("Not an ELF file: "),
254                                           inconvertibleErrorCode()),
255                   FileName);
256   }
257 
258   auto Triple = ElfObject->makeTriple();
259   if (!Triple.isX86())
260     return report(make_error<StringError>(Twine("Unsupported target: ") +
261                                               Triple.getArchName(),
262                                           inconvertibleErrorCode()),
263                   FileName);
264 
265   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
266   std::unique_ptr<DIContext> Context = DWARFContext::create(
267       *Object, DWARFContext::ProcessDebugRelocations::Process);
268 
269   auto SOFOr = symbolize::SymbolizableObjectFile::create(
270       Object, std::move(Context), /*UntagAddresses=*/false);
271   if (!SOFOr)
272     return report(SOFOr.takeError(), FileName);
273   Symbolizer = std::move(SOFOr.get());
274 
275   return readRawProfile();
276 }
277 
278 Error RawMemProfReader::readRawProfile() {
279   const char *Next = DataBuffer->getBufferStart();
280 
281   while (Next < DataBuffer->getBufferEnd()) {
282     auto *Header = reinterpret_cast<const memprof::Header *>(Next);
283 
284     // Read in the segment information, check whether its the same across all
285     // profiles in this binary file.
286     const llvm::SmallVector<SegmentEntry> Entries =
287         readSegmentEntries(Next + Header->SegmentOffset);
288     if (!SegmentInfo.empty() && SegmentInfo != Entries) {
289       // We do not expect segment information to change when deserializing from
290       // the same binary profile file. This can happen if dynamic libraries are
291       // loaded/unloaded between profile dumping.
292       return make_error<InstrProfError>(
293           instrprof_error::malformed,
294           "memprof raw profile has different segment information");
295     }
296     SegmentInfo.assign(Entries.begin(), Entries.end());
297 
298     // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
299     // raw profiles in the same binary file are from the same process so the
300     // stackdepot ids are the same.
301     for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) {
302       if (ProfileData.count(Value.first)) {
303         ProfileData[Value.first].Merge(Value.second);
304       } else {
305         ProfileData[Value.first] = Value.second;
306       }
307     }
308 
309     // Read in the callstack for each ids. For multiple raw profiles in the same
310     // file, we expect that the callstack is the same for a unique id.
311     const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
312     if (StackMap.empty()) {
313       StackMap = CSM;
314     } else {
315       if (mergeStackMap(CSM, StackMap))
316         return make_error<InstrProfError>(
317             instrprof_error::malformed,
318             "memprof raw profile got different call stack for same id");
319     }
320 
321     Next += Header->TotalSize;
322   }
323 
324   return Error::success();
325 }
326 
327 object::SectionedAddress
328 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
329   SegmentEntry *ContainingSegment = nullptr;
330   for (auto &SE : SegmentInfo) {
331     if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) {
332       ContainingSegment = &SE;
333     }
334   }
335 
336   // Ensure that the virtual address is valid.
337   assert(ContainingSegment && "Could not find a segment entry");
338 
339   // TODO: Compute the file offset based on the maps and program headers. For
340   // now this only works for non PIE binaries.
341   return object::SectionedAddress{VirtualAddress};
342 }
343 
344 Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB,
345                                    MemProfRecord &Record) {
346   auto &CallStack = StackMap[Id];
347   DILineInfoSpecifier Specifier(
348       DILineInfoSpecifier::FileLineInfoKind::RawValue,
349       DILineInfoSpecifier::FunctionNameKind::LinkageName);
350   for (const uint64_t Address : CallStack) {
351     Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
352         getModuleOffset(Address), Specifier, /*UseSymbolTable=*/false);
353 
354     if (!DIOr)
355       return DIOr.takeError();
356     DIInliningInfo DI = DIOr.get();
357 
358     for (size_t I = 0; I < DI.getNumberOfFrames(); I++) {
359       const auto &Frame = DI.getFrame(I);
360       Record.CallStack.emplace_back(
361           std::to_string(llvm::MD5Hash(trimSuffix(Frame.FunctionName))),
362           Frame.Line - Frame.StartLine, Frame.Column,
363           // Only the first entry is not an inlined location.
364           I != 0);
365     }
366   }
367   Record.Info = MIB;
368   return Error::success();
369 }
370 
371 Error RawMemProfReader::readNextRecord(MemProfRecord &Record) {
372   if (ProfileData.empty())
373     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
374 
375   if (Iter == ProfileData.end())
376     return make_error<InstrProfError>(instrprof_error::eof);
377 
378   Record.clear();
379   if (Error E = fillRecord(Iter->first, Iter->second, Record)) {
380     return E;
381   }
382   Iter++;
383   return Error::success();
384 }
385 } // namespace memprof
386 } // namespace llvm
387