1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading MemProf profiling data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <cstdint>
14 #include <type_traits>
15 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
20 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
21 #include "llvm/Object/Binary.h"
22 #include "llvm/Object/ELFObjectFile.h"
23 #include "llvm/Object/ObjectFile.h"
24 #include "llvm/ProfileData/InstrProf.h"
25 #include "llvm/ProfileData/MemProf.h"
26 #include "llvm/ProfileData/MemProfData.inc"
27 #include "llvm/ProfileData/RawMemProfReader.h"
28 #include "llvm/Support/Endian.h"
29 #include "llvm/Support/MD5.h"
30 
31 #define DEBUG_TYPE "memprof"
32 
33 namespace llvm {
34 namespace memprof {
35 namespace {
36 
37 struct Summary {
38   uint64_t Version;
39   uint64_t TotalSizeBytes;
40   uint64_t NumSegments;
41   uint64_t NumMIBInfo;
42   uint64_t NumStackOffsets;
43 };
44 
45 template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
46   static_assert(std::is_pod<T>::value, "Not a pod type.");
47   assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
48   return *reinterpret_cast<const T *>(Ptr);
49 }
50 
51 Summary computeSummary(const char *Start) {
52   auto *H = reinterpret_cast<const Header *>(Start);
53 
54   // Check alignment while reading the number of items in each section.
55   return Summary{
56       H->Version,
57       H->TotalSize,
58       alignedRead(Start + H->SegmentOffset),
59       alignedRead(Start + H->MIBOffset),
60       alignedRead(Start + H->StackOffset),
61   };
62 }
63 
64 Error checkBuffer(const MemoryBuffer &Buffer) {
65   if (!RawMemProfReader::hasFormat(Buffer))
66     return make_error<InstrProfError>(instrprof_error::bad_magic);
67 
68   if (Buffer.getBufferSize() == 0)
69     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
70 
71   if (Buffer.getBufferSize() < sizeof(Header)) {
72     return make_error<InstrProfError>(instrprof_error::truncated);
73   }
74 
75   // The size of the buffer can be > header total size since we allow repeated
76   // serialization of memprof profiles to the same file.
77   uint64_t TotalSize = 0;
78   const char *Next = Buffer.getBufferStart();
79   while (Next < Buffer.getBufferEnd()) {
80     auto *H = reinterpret_cast<const Header *>(Next);
81     if (H->Version != MEMPROF_RAW_VERSION) {
82       return make_error<InstrProfError>(instrprof_error::unsupported_version);
83     }
84 
85     TotalSize += H->TotalSize;
86     Next += H->TotalSize;
87   }
88 
89   if (Buffer.getBufferSize() != TotalSize) {
90     return make_error<InstrProfError>(instrprof_error::malformed);
91   }
92   return Error::success();
93 }
94 
95 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
96   using namespace support;
97 
98   const uint64_t NumItemsToRead =
99       endian::readNext<uint64_t, little, unaligned>(Ptr);
100   llvm::SmallVector<SegmentEntry> Items;
101   for (uint64_t I = 0; I < NumItemsToRead; I++) {
102     Items.push_back(*reinterpret_cast<const SegmentEntry *>(
103         Ptr + I * sizeof(SegmentEntry)));
104   }
105   return Items;
106 }
107 
108 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
109 readMemInfoBlocks(const char *Ptr) {
110   using namespace support;
111 
112   const uint64_t NumItemsToRead =
113       endian::readNext<uint64_t, little, unaligned>(Ptr);
114   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
115   for (uint64_t I = 0; I < NumItemsToRead; I++) {
116     const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr);
117     const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
118     Items.push_back({Id, MIB});
119     // Only increment by size of MIB since readNext implicitly increments.
120     Ptr += sizeof(MemInfoBlock);
121   }
122   return Items;
123 }
124 
125 CallStackMap readStackInfo(const char *Ptr) {
126   using namespace support;
127 
128   const uint64_t NumItemsToRead =
129       endian::readNext<uint64_t, little, unaligned>(Ptr);
130   CallStackMap Items;
131 
132   for (uint64_t I = 0; I < NumItemsToRead; I++) {
133     const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr);
134     const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr);
135 
136     SmallVector<uint64_t, 32> CallStack;
137     for (uint64_t J = 0; J < NumPCs; J++) {
138       CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr));
139     }
140 
141     Items[StackId] = CallStack;
142   }
143   return Items;
144 }
145 
146 // Merges the contents of stack information in \p From to \p To. Returns true if
147 // any stack ids observed previously map to a different set of program counter
148 // addresses.
149 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
150   for (const auto &IdStack : From) {
151     auto I = To.find(IdStack.first);
152     if (I == To.end()) {
153       To[IdStack.first] = IdStack.second;
154     } else {
155       // Check that the PCs are the same (in order).
156       if (IdStack.second != I->second)
157         return true;
158     }
159   }
160   return false;
161 }
162 
163 StringRef trimSuffix(const StringRef Name) {
164   const auto Pos = Name.find(".llvm.");
165   return Name.take_front(Pos);
166 }
167 
168 Error report(Error E, const StringRef Context) {
169   return joinErrors(createStringError(inconvertibleErrorCode(), Context),
170                     std::move(E));
171 }
172 } // namespace
173 
174 Expected<std::unique_ptr<RawMemProfReader>>
175 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) {
176   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
177   if (std::error_code EC = BufferOr.getError())
178     return report(errorCodeToError(EC), Path.getSingleStringRef());
179 
180   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
181   if (Error E = checkBuffer(*Buffer))
182     return report(std::move(E), Path.getSingleStringRef());
183 
184   if (ProfiledBinary.empty())
185     return report(
186         errorCodeToError(make_error_code(std::errc::invalid_argument)),
187         "Path to profiled binary is empty!");
188 
189   auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
190   if (!BinaryOr) {
191     return report(BinaryOr.takeError(), ProfiledBinary);
192   }
193 
194   std::unique_ptr<RawMemProfReader> Reader(
195       new RawMemProfReader(std::move(Buffer), std::move(BinaryOr.get())));
196   if (Error E = Reader->initialize()) {
197     return std::move(E);
198   }
199   return std::move(Reader);
200 }
201 
202 bool RawMemProfReader::hasFormat(const StringRef Path) {
203   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
204   if (!BufferOr)
205     return false;
206 
207   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
208   return hasFormat(*Buffer);
209 }
210 
211 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
212   if (Buffer.getBufferSize() < sizeof(uint64_t))
213     return false;
214   // Aligned read to sanity check that the buffer was allocated with at least 8b
215   // alignment.
216   const uint64_t Magic = alignedRead(Buffer.getBufferStart());
217   return Magic == MEMPROF_RAW_MAGIC_64;
218 }
219 
220 void RawMemProfReader::printYAML(raw_ostream &OS) {
221   OS << "MemprofProfile:\n";
222   printSummaries(OS);
223   // Print out the merged contents of the profiles.
224   OS << "  Records:\n";
225   for (const auto &Record : *this) {
226     OS << "  -\n";
227     Record.print(OS);
228   }
229 }
230 
231 void RawMemProfReader::printSummaries(raw_ostream &OS) const {
232   const char *Next = DataBuffer->getBufferStart();
233   while (Next < DataBuffer->getBufferEnd()) {
234     auto Summary = computeSummary(Next);
235     OS << "  -\n";
236     OS << "  Header:\n";
237     OS << "    Version: " << Summary.Version << "\n";
238     OS << "    TotalSizeBytes: " << Summary.TotalSizeBytes << "\n";
239     OS << "    NumSegments: " << Summary.NumSegments << "\n";
240     OS << "    NumMibInfo: " << Summary.NumMIBInfo << "\n";
241     OS << "    NumStackOffsets: " << Summary.NumStackOffsets << "\n";
242     // TODO: Print the build ids once we can record them using the
243     // sanitizer_procmaps library for linux.
244 
245     auto *H = reinterpret_cast<const Header *>(Next);
246     Next += H->TotalSize;
247   }
248 }
249 
250 Error RawMemProfReader::initialize() {
251   const StringRef FileName = Binary.getBinary()->getFileName();
252 
253   auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
254   if (!ElfObject) {
255     return report(make_error<StringError>(Twine("Not an ELF file: "),
256                                           inconvertibleErrorCode()),
257                   FileName);
258   }
259 
260   auto Triple = ElfObject->makeTriple();
261   if (!Triple.isX86())
262     return report(make_error<StringError>(Twine("Unsupported target: ") +
263                                               Triple.getArchName(),
264                                           inconvertibleErrorCode()),
265                   FileName);
266 
267   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
268   std::unique_ptr<DIContext> Context = DWARFContext::create(
269       *Object, DWARFContext::ProcessDebugRelocations::Process);
270 
271   auto SOFOr = symbolize::SymbolizableObjectFile::create(
272       Object, std::move(Context), /*UntagAddresses=*/false);
273   if (!SOFOr)
274     return report(SOFOr.takeError(), FileName);
275   Symbolizer = std::move(SOFOr.get());
276 
277   return readRawProfile();
278 }
279 
280 Error RawMemProfReader::readRawProfile() {
281   const char *Next = DataBuffer->getBufferStart();
282 
283   while (Next < DataBuffer->getBufferEnd()) {
284     auto *Header = reinterpret_cast<const memprof::Header *>(Next);
285 
286     // Read in the segment information, check whether its the same across all
287     // profiles in this binary file.
288     const llvm::SmallVector<SegmentEntry> Entries =
289         readSegmentEntries(Next + Header->SegmentOffset);
290     if (!SegmentInfo.empty() && SegmentInfo != Entries) {
291       // We do not expect segment information to change when deserializing from
292       // the same binary profile file. This can happen if dynamic libraries are
293       // loaded/unloaded between profile dumping.
294       return make_error<InstrProfError>(
295           instrprof_error::malformed,
296           "memprof raw profile has different segment information");
297     }
298     SegmentInfo.assign(Entries.begin(), Entries.end());
299 
300     // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
301     // raw profiles in the same binary file are from the same process so the
302     // stackdepot ids are the same.
303     for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) {
304       if (ProfileData.count(Value.first)) {
305         ProfileData[Value.first].Merge(Value.second);
306       } else {
307         ProfileData[Value.first] = Value.second;
308       }
309     }
310 
311     // Read in the callstack for each ids. For multiple raw profiles in the same
312     // file, we expect that the callstack is the same for a unique id.
313     const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
314     if (StackMap.empty()) {
315       StackMap = CSM;
316     } else {
317       if (mergeStackMap(CSM, StackMap))
318         return make_error<InstrProfError>(
319             instrprof_error::malformed,
320             "memprof raw profile got different call stack for same id");
321     }
322 
323     Next += Header->TotalSize;
324   }
325 
326   return Error::success();
327 }
328 
329 object::SectionedAddress
330 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
331   LLVM_DEBUG({
332   SegmentEntry *ContainingSegment = nullptr;
333   for (auto &SE : SegmentInfo) {
334     if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) {
335       ContainingSegment = &SE;
336     }
337   }
338 
339   // Ensure that the virtual address is valid.
340   assert(ContainingSegment && "Could not find a segment entry");
341   });
342 
343   // TODO: Compute the file offset based on the maps and program headers. For
344   // now this only works for non PIE binaries.
345   return object::SectionedAddress{VirtualAddress};
346 }
347 
348 Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB,
349                                    MemProfRecord &Record) {
350   auto &CallStack = StackMap[Id];
351   DILineInfoSpecifier Specifier(
352       DILineInfoSpecifier::FileLineInfoKind::RawValue,
353       DILineInfoSpecifier::FunctionNameKind::LinkageName);
354   for (const uint64_t Address : CallStack) {
355     Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
356         getModuleOffset(Address), Specifier, /*UseSymbolTable=*/false);
357 
358     if (!DIOr)
359       return DIOr.takeError();
360     DIInliningInfo DI = DIOr.get();
361 
362     for (size_t I = 0; I < DI.getNumberOfFrames(); I++) {
363       const auto &Frame = DI.getFrame(I);
364       Record.CallStack.emplace_back(
365           std::to_string(llvm::MD5Hash(trimSuffix(Frame.FunctionName))),
366           Frame.Line - Frame.StartLine, Frame.Column,
367           // Only the first entry is not an inlined location.
368           I != 0);
369     }
370   }
371   Record.Info = MIB;
372   return Error::success();
373 }
374 
375 Error RawMemProfReader::readNextRecord(MemProfRecord &Record) {
376   if (ProfileData.empty())
377     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
378 
379   if (Iter == ProfileData.end())
380     return make_error<InstrProfError>(instrprof_error::eof);
381 
382   Record.clear();
383   if (Error E = fillRecord(Iter->first, Iter->second, Record)) {
384     return E;
385   }
386   Iter++;
387   return Error::success();
388 }
389 } // namespace memprof
390 } // namespace llvm
391